├── .gitignore ├── proxies.txt ├── src ├── recaptcha_task.py ├── main.py ├── utils.py ├── image_handler.py └── recaptcha_solver.py ├── .env ├── requirements.txt ├── tests └── test_proxy_formatter.py ├── LICENSE ├── Dockerfile └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /proxies.txt: -------------------------------------------------------------------------------- 1 | ip:port 2 | ip:port:user:pass -------------------------------------------------------------------------------- /src/recaptcha_task.py: -------------------------------------------------------------------------------- 1 | class RecaptchaTask: 2 | image_grid_url = "" 3 | desired_image_type = "" -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY_ID_HERE 2 | AWS_SECRET_ACCESS_KEY=YOUR_SECRET_ACCESS_KEY_HERE 3 | AWS_REGION=YOUR_REGION_HERE -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | image-slicer==2.1.1 2 | selenium==3.141.0 3 | loguru==0.5.3 4 | requests==2.22.0 5 | boto3==1.17.44 6 | python-dotenv==0.17.0 -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from recaptcha_solver import RecaptchaSolver 2 | import signal 3 | 4 | 5 | if __name__ == "__main__": 6 | signal.signal(signal.SIGINT, signal.SIG_DFL) 7 | 8 | rcs = RecaptchaSolver("https://www.google.com/recaptcha/api2/demo") 9 | recaptcha_token = rcs.solve() -------------------------------------------------------------------------------- /tests/test_proxy_formatter.py: -------------------------------------------------------------------------------- 1 | 2 | import unittest 3 | 4 | class ProxyTest(unittest.TestCase): 5 | def test_proxy_formatter(self): 6 | proxy = "127.0.0.1:8888:nate:test" 7 | proxy_split = proxy.split(":") 8 | if len(proxy_split) == 2: 9 | formatted_proxy = { 10 | "ip": proxy_split[0], 11 | "port": int(proxy_split[1]) 12 | } 13 | else: 14 | formatted_proxy = { 15 | "ip": proxy_split[0], 16 | "port": int(proxy_split[1]), 17 | "user": proxy_split[2], 18 | "pass": proxy_split[3] 19 | } 20 | self.assertEqual(formatted_proxy["ip"], "127.0.0.1") 21 | self.assertEqual(formatted_proxy["port"], 8888) 22 | self.assertEqual(formatted_proxy["user"], "nate") 23 | self.assertEqual(formatted_proxy["pass"], "test") 24 | 25 | 26 | if __name__ == "__main__": 27 | unittest.main() -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | import random 3 | import time 4 | import os 5 | 6 | def sleep_random(min_time, max_time): 7 | delay = random.uniform(min_time, max_time) 8 | logger.debug(f"Sleeping for {delay} seconds") 9 | time.sleep(delay) 10 | 11 | def load_proxy(): 12 | with open(os.path.join(os.getcwd(), "proxies.txt"), "r") as file: 13 | lines = file.read().splitlines() 14 | if len(lines) == 0: 15 | raise Exception("No proxies found in proxies.txt") 16 | proxy_split = random.choice(lines).split(":") 17 | if len(proxy_split) == 2: 18 | return { 19 | "ip": proxy_split[0], 20 | "port": int(proxy_split[1]) 21 | } 22 | else: 23 | return { 24 | "ip": proxy_split[0], 25 | "port": int(proxy_split[1]), 26 | "user": proxy_split[2], 27 | "pass": proxy_split[3] 28 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2021 Nate Wong 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining 5 | a copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | 3 | WORKDIR /app 4 | 5 | COPY .env .env 6 | COPY requirements.txt requirements.txt 7 | COPY proxies.txt proxies.txt 8 | COPY ./src . 9 | RUN pip3 install -r requirements.txt 10 | 11 | RUN apt-get update && apt-get install -y \ 12 | fonts-liberation libappindicator3-1 libasound2 libatk-bridge2.0-0 \ 13 | libnspr4 libnss3 lsb-release xdg-utils libxss1 libdbus-glib-1-2 \ 14 | curl unzip wget \ 15 | xvfb 16 | 17 | RUN GECKODRIVER_VERSION=`curl https://github.com/mozilla/geckodriver/releases/latest | grep -Po 'v[0-9]+.[0-9]+.[0-9]+'` && \ 18 | wget https://github.com/mozilla/geckodriver/releases/download/$GECKODRIVER_VERSION/geckodriver-$GECKODRIVER_VERSION-linux64.tar.gz && \ 19 | tar -zxf geckodriver-$GECKODRIVER_VERSION-linux64.tar.gz -C /usr/local/bin && \ 20 | chmod +x /usr/local/bin/geckodriver && \ 21 | rm geckodriver-$GECKODRIVER_VERSION-linux64.tar.gz 22 | 23 | RUN FIREFOX_SETUP=firefox-setup.tar.bz2 && \ 24 | apt-get purge firefox && \ 25 | wget -O $FIREFOX_SETUP "https://download.mozilla.org/?product=firefox-latest&os=linux64" && \ 26 | tar xjf $FIREFOX_SETUP -C /opt/ && \ 27 | ln -s /opt/firefox/firefox /usr/bin/firefox && \ 28 | rm $FIREFOX_SETUP 29 | 30 | CMD ["python3", "main.py"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Recaptcha Fullauto 2 | I've decided to open source my old Recaptcha v2 solver. My latest version will be opened sourced this summer. I am hoping this project will serve as inspiration for others to build a solver from, as plugging in an existing model is not too difficult to do.

3 | This project uses the AWS [Rekognition](https://aws.amazon.com/rekognition/) API, which is a decent solution but isn't the best. If you are looking to build off this project, I would take a look at [this repo](https://github.com/haze/nocap) for inspiration on building and training your own model. I would also reccomend using [this repo](https://github.com/deathlyface/recaptcha-dataset) as a base for your dataset or scraping images from Google Images using [this project](https://pypi.org/project/icrawler/).
4 | 5 | 6 | https://user-images.githubusercontent.com/39974384/115634145-ff9b4900-a2d6-11eb-972c-838389a5c5f6.mp4 7 | 8 | 9 | ## Requirements 10 | * Python 3 11 | * Firefox 12 | * Geckodriver (make sure it is installed in your PATH) 13 | * AWS credentials (create a new IAM user with the AmazonRekognitionFullAccess role) 14 | ## Installation 15 | ### Download The Project 16 | ```bash 17 | git clone https://github.com/natewong1313/recaptcha-fullauto.git 18 | ``` 19 | ```bash 20 | cd recaptcha-fullauto 21 | ``` 22 | ### Configuration 23 | Add your proxies in the proxies.txt file and add your AWS credentials in the .env file
24 | If you don't want to use proxies, modify the src/main.py file as such 25 | ```python 26 | rcs = RecaptchaSolver("https://www.google.com/recaptcha/api2/demo", use_proxies = False) 27 | ``` 28 | ### Using Docker 29 | ```bash 30 | docker build -t recaptcha-fullauto . 31 | ``` 32 | ```bash 33 | docker run recaptcha-fullauto 34 | ``` 35 | ### From Source 36 | ```bash 37 | pip install -r requirements.txt 38 | ``` 39 | ```bash 40 | python src/main.py 41 | ``` 42 | -------------------------------------------------------------------------------- /src/image_handler.py: -------------------------------------------------------------------------------- 1 | from image_slicer import slice as image_slice 2 | from dotenv import load_dotenv 3 | import threading 4 | import requests 5 | import boto3 6 | import glob 7 | import os 8 | 9 | load_dotenv() 10 | 11 | image_types_conversions = { 12 | "crosswalks": "Zebra Crossing", 13 | "a fire hydrant": "Fire Hydrant", 14 | "cars": "Vehicle", 15 | "bicycles": "Bicycle", 16 | "bus": "Bus", 17 | "chimneys": "Roof", 18 | "traffic lights": "Traffic Light", 19 | "parking meters": "Parking Meter", 20 | "boats": "Boat", 21 | "motorcycles": "Motorcycle", 22 | "mountains or hills": "Landscape", 23 | "tractors": "Tractor", 24 | "taxis": "Taxi" 25 | } 26 | 27 | class ImageHandler: 28 | def __init__(self): 29 | if not os.path.exists("images"): 30 | os.makedirs("images") 31 | 32 | self.aws_rekognition_client = boto3.client("rekognition", aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID"), 33 | aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY"), region_name = os.getenv("AWS_REGION")) 34 | files = glob.glob(os.path.join(os.getcwd(), "images\\*")) 35 | for f in files: 36 | os.remove(f) 37 | 38 | def process_grid(self, image_grid_url, desired_image_type): 39 | self.save_image(image_grid_url, "images/captcha_grid.jpg", is_grid = True) 40 | 41 | image_worker_threads = [] 42 | self.results = [] 43 | for x in range(3): 44 | for y in range(3): 45 | index = (x*3) + y 46 | t = threading.Thread(target = self.process_image, args = (f"images/captcha_grid_0{x+1}_0{y+1}.png", desired_image_type, index)) 47 | t.start() 48 | image_worker_threads.append(t) 49 | 50 | for t in image_worker_threads: 51 | t.join() 52 | 53 | return self.results 54 | 55 | def process_new_images(self, images_urls, desired_image_type): 56 | image_worker_threads = [] 57 | self.results = [] 58 | for i, image_url in enumerate(images_urls): 59 | self.save_image(image_url, f"images/captcha_img{i}.jpg") 60 | t = threading.Thread(target = self.process_image, args = (f"images/captcha_img{i}.jpg", desired_image_type, i)) 61 | t.start() 62 | image_worker_threads.append(t) 63 | 64 | for t in image_worker_threads: 65 | t.join() 66 | 67 | new_images_results = [] 68 | for i, image_url in enumerate(images_urls): 69 | new_images_results.append({"image_url": image_url, "matches": i in self.results}) 70 | 71 | return new_images_results 72 | 73 | 74 | def save_image(self, image_url, path_name, is_grid = False): 75 | r = requests.get(image_url, stream = True) 76 | if r.status_code == 200: 77 | with open(path_name, "wb") as f: 78 | for chunk in r: 79 | f.write(chunk) 80 | if is_grid: 81 | image_slice(path_name, 9) 82 | else: 83 | raise Exception(f"Unknown status code from image url: {r.status_code}") 84 | 85 | def process_image(self, image_path, desired_image_type, index): 86 | with open(os.path.join(os.getcwd(), image_path), "rb") as image: 87 | response = self.aws_rekognition_client.detect_labels(Image = {"Bytes": image.read()}) 88 | for label in response["Labels"]: 89 | if image_types_conversions[desired_image_type] == label["Name"]: 90 | self.results.append(index) -------------------------------------------------------------------------------- /src/recaptcha_solver.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.common.exceptions import MoveTargetOutOfBoundsException, NoSuchElementException, WebDriverException 3 | from selenium.webdriver.common.action_chains import ActionChains 4 | from selenium.webdriver.support import expected_conditions as EC 5 | from selenium.webdriver.support.ui import WebDriverWait 6 | from selenium.webdriver.firefox.options import Options 7 | from selenium.webdriver.common.by import By 8 | from base64 import b64encode 9 | from loguru import logger 10 | import random 11 | import time 12 | import os 13 | from utils import sleep_random, load_proxy 14 | from recaptcha_task import RecaptchaTask 15 | from image_handler import ImageHandler, image_types_conversions 16 | 17 | class RecaptchaSolver: 18 | def __init__(self, solve_url, use_proxies = True, headless = False): 19 | options = Options() 20 | options.headless = headless 21 | 22 | profile = webdriver.FirefoxProfile() 23 | if use_proxies: 24 | proxy = load_proxy() 25 | profile.set_preference("network.proxy.type", 1) 26 | profile.set_preference("network.proxy.http", proxy["ip"]) 27 | profile.set_preference("network.proxy.http_port", proxy["port"]) 28 | if "username" in proxy: 29 | credentials = b64encode(f'{proxy["username"]}:{proxy["password"]}'.encode("ascii")).decode() 30 | profile.set_preference("extensions.closeproxyauth.authtoken", credentials) 31 | 32 | profile.set_preference("dom.webdriver.enabled", False) 33 | profile.set_preference("useAutomationExtension", False) 34 | profile.update_preferences() 35 | 36 | try: 37 | self.driver = webdriver.Firefox(firefox_profile = profile, options = options) 38 | except WebDriverException: 39 | options.headless = True 40 | self.driver = webdriver.Firefox(firefox_profile = profile, options = options) 41 | 42 | self.image_handler = ImageHandler() 43 | self.solve_url = solve_url 44 | self.recaptcha_task = RecaptchaTask() 45 | 46 | def solve(self): 47 | self.load_captcha_url() 48 | self.switch_to_recap_iframe() 49 | self.trigger_captcha() 50 | self.switch_to_challenge_iframe() 51 | while True: 52 | self.check_challenge_type() 53 | self.find_image_grid() 54 | self.solve_image_grid() 55 | self.solve_new_images() 56 | success = self.verify_challenge() 57 | if success: 58 | recaptcha_token = self.get_recaptcha_token() 59 | self.driver.quit() 60 | return recaptcha_token 61 | 62 | def load_captcha_url(self): 63 | logger.debug(f"Load url: {self.solve_url}") 64 | self.driver.get(self.solve_url) 65 | 66 | def switch_to_recap_iframe(self): 67 | logger.debug("Searching for recaptcha iframe") 68 | recaptcha_iframe = WebDriverWait(self.driver, 25).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'iframe[title="reCAPTCHA"]'))) 69 | logger.debug("Found iframe, switching to it...") 70 | self.driver.switch_to.frame(recaptcha_iframe.get_attribute("name")) 71 | 72 | def trigger_captcha(self): 73 | logger.debug("Searching for recaptcha checkbox") 74 | recaptcha_checkbox = WebDriverWait(self.driver, 25).until(EC.presence_of_element_located((By.CLASS_NAME, "recaptcha-checkbox"))) 75 | logger.debug("Found recaptcha checkbox, delaying before click...") 76 | sleep_random(1.0, 3.0) 77 | 78 | ActionChains(self.driver).move_to_element(recaptcha_checkbox).perform() 79 | recaptcha_checkbox.click() 80 | 81 | def switch_to_challenge_iframe(self): 82 | logger.debug("Switching back to parent frame") 83 | self.driver.switch_to.parent_frame() 84 | logger.debug("Searching for challenge iframe") 85 | recaptcha_challenge_iframe = WebDriverWait(self.driver, 25).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'iframe[title="recaptcha challenge"]'))) 86 | logger.debug("Found challenge iframe, switching to it...") 87 | self.driver.switch_to.frame(recaptcha_challenge_iframe.get_attribute("name")) 88 | 89 | def check_challenge_type(self): 90 | while True: 91 | class_name = "rc-imageselect-desc-no-canonical" 92 | try: 93 | captcha_type = self.driver.find_element_by_class_name("rc-imageselect-desc-no-canonical").get_attribute("textContent") 94 | except NoSuchElementException: 95 | captcha_type = self.driver.find_element_by_class_name("rc-imageselect-desc").get_attribute("textContent") 96 | class_name = "rc-imageselect-desc" 97 | 98 | if "Select all squares with" in captcha_type: 99 | logger.debug("Fetching new challenge...") 100 | self.reload_captcha() 101 | continue 102 | elif "Select all images with" in captcha_type: 103 | desired_image_type = self.driver.find_element_by_class_name(class_name).find_element_by_tag_name("strong").get_attribute("textContent") 104 | if desired_image_type in image_types_conversions: 105 | logger.debug(f"Challenge type found: {desired_image_type}") 106 | self.recaptcha_task.desired_image_type = desired_image_type 107 | return 108 | else: 109 | logger.error(f"Unknown challenge type found ({desired_image_type}), reloading...") 110 | self.reload_captcha() 111 | continue 112 | else: 113 | raise Exception("Unknown challenge type") 114 | 115 | def find_image_grid(self): 116 | logger.debug("Searching for image grid") 117 | image_grid_url = self.driver.find_element_by_class_name("rc-image-tile-wrapper").find_element_by_tag_name("img").get_attribute("src") 118 | logger.debug(f"Found image grid: {image_grid_url}") 119 | self.recaptcha_task.image_grid_url = image_grid_url 120 | 121 | def solve_image_grid(self): 122 | while True: 123 | logger.debug("Processing images in grid") 124 | results = self.image_handler.process_grid(self.recaptcha_task.image_grid_url, self.recaptcha_task.desired_image_type) 125 | if len(results) == 0: 126 | logger.error("Failed to identify images, reloading") 127 | self.reload_captcha() 128 | time.sleep(1) 129 | continue 130 | for index in results: 131 | self.click_image_grid_elem(index) 132 | return 133 | 134 | def click_image_grid_elem(self, index): 135 | image_element = self.driver.find_elements_by_class_name("rc-image-tile-target")[index] 136 | ActionChains(self.driver).move_to_element(image_element).perform() 137 | image_element.click() 138 | 139 | def solve_new_images(self): 140 | while True: 141 | logger.debug("Sleeping before checking new images") 142 | time.sleep(5) 143 | logger.debug("Processing new images") 144 | new_images = self.driver.find_elements_by_class_name("rc-image-tile-11") 145 | new_images_urls = [new_image.get_attribute("src") for new_image in new_images] 146 | 147 | results = self.image_handler.process_new_images(new_images_urls, self.recaptcha_task.desired_image_type) 148 | for i, result in enumerate(results): 149 | if result["matches"]: 150 | self.click_new_image_elem(i) 151 | 152 | if len([result for result in results if result["matches"]]) == 0 or len([result for result in results if not result["matches"]]) == len(results): 153 | logger.debug("All new images solved, proceeding") 154 | return 155 | 156 | def get_element_index(self, image_url, new_images_urls): 157 | for i, new_image_url in enumerate(new_images_urls): 158 | if new_image_url == image_url: 159 | return i 160 | 161 | def click_new_image_elem(self, index): 162 | image_element = self.driver.find_elements_by_class_name("rc-image-tile-11")[index].find_element_by_xpath("..") 163 | ActionChains(self.driver).move_to_element(image_element).perform() 164 | image_element.click() 165 | 166 | def verify_challenge(self): 167 | logger.debug("Verifying challenge solution") 168 | self.driver.find_element_by_id("recaptcha-verify-button").click() 169 | time.sleep(1) 170 | 171 | self.driver.switch_to.parent_frame() 172 | self.switch_to_recap_iframe() 173 | try: 174 | self.driver.find_element_by_class_name("recaptcha-checkbox-checked") 175 | logger.success("Successfully solved challenge") 176 | return True 177 | except NoSuchElementException: 178 | logger.error("Failed to solve challenge, retrying") 179 | self.switch_to_challenge_iframe() 180 | if self.driver.find_element_by_class_name("rc-imageselect-incorrect-response").get_attribute("style") != "": 181 | self.reload_captcha() 182 | return False 183 | 184 | def get_recaptcha_token(self): 185 | logger.debug("Searching for recaptcha token") 186 | self.driver.switch_to.parent_frame() 187 | recaptcha_token = self.driver.find_element_by_id("g-recaptcha-response").get_attribute("value") 188 | logger.debug(f"Found recaptcha token: {recaptcha_token}") 189 | return recaptcha_token 190 | 191 | def reload_captcha(self): 192 | old_val = self.driver.find_element_by_id("recaptcha-token").get_attribute("value") 193 | self.driver.find_element_by_id("recaptcha-reload-button").click() 194 | while True: 195 | if self.driver.find_element_by_id("recaptcha-token").get_attribute("value") != old_val: 196 | return 197 | time.sleep(0.01) --------------------------------------------------------------------------------