├── .gitignore
├── proxies.txt
├── src
    ├── recaptcha_task.py
    ├── main.py
    ├── utils.py
    ├── image_handler.py
    └── recaptcha_solver.py
├── .env
├── requirements.txt
├── tests
    └── test_proxy_formatter.py
├── LICENSE
├── Dockerfile
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/proxies.txt:
--------------------------------------------------------------------------------
1 | ip:port
2 | ip:port:user:pass


--------------------------------------------------------------------------------
/src/recaptcha_task.py:
--------------------------------------------------------------------------------
1 | class RecaptchaTask:
2 |     image_grid_url = ""
3 |     desired_image_type = ""


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY_ID_HERE
2 | AWS_SECRET_ACCESS_KEY=YOUR_SECRET_ACCESS_KEY_HERE
3 | AWS_REGION=YOUR_REGION_HERE


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | image-slicer==2.1.1
2 | selenium==3.141.0
3 | loguru==0.5.3
4 | requests==2.22.0
5 | boto3==1.17.44
6 | python-dotenv==0.17.0


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
1 | from recaptcha_solver import RecaptchaSolver
2 | import signal
3 | 
4 | 
5 | if __name__ == "__main__":
6 |     signal.signal(signal.SIGINT, signal.SIG_DFL)
7 | 
8 |     rcs = RecaptchaSolver("https://www.google.com/recaptcha/api2/demo")
9 |     recaptcha_token = rcs.solve()


--------------------------------------------------------------------------------
/tests/test_proxy_formatter.py:
--------------------------------------------------------------------------------
 1 |   
 2 | import unittest
 3 | 
 4 | class ProxyTest(unittest.TestCase):
 5 |     def test_proxy_formatter(self):
 6 |         proxy = "127.0.0.1:8888:nate:test"
 7 |         proxy_split = proxy.split(":")
 8 |         if len(proxy_split) == 2:
 9 |             formatted_proxy = {
10 |                 "ip": proxy_split[0], 
11 |                 "port": int(proxy_split[1])
12 |             }
13 |         else:
14 |             formatted_proxy = {
15 |                 "ip": proxy_split[0], 
16 |                 "port": int(proxy_split[1]),
17 |                 "user": proxy_split[2], 
18 |                 "pass": proxy_split[3]
19 |             }
20 |         self.assertEqual(formatted_proxy["ip"], "127.0.0.1")
21 |         self.assertEqual(formatted_proxy["port"], 8888)
22 |         self.assertEqual(formatted_proxy["user"], "nate")
23 |         self.assertEqual(formatted_proxy["pass"], "test")
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     unittest.main() 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
 1 | from loguru import logger
 2 | import random
 3 | import time
 4 | import os
 5 | 
 6 | def sleep_random(min_time, max_time):
 7 |     delay = random.uniform(min_time, max_time)
 8 |     logger.debug(f"Sleeping for {delay} seconds")
 9 |     time.sleep(delay)
10 | 
11 | def load_proxy():
12 |     with open(os.path.join(os.getcwd(), "proxies.txt"), "r") as file:
13 |         lines = file.read().splitlines()
14 |         if len(lines) == 0:
15 |             raise Exception("No proxies found in proxies.txt")
16 |         proxy_split = random.choice(lines).split(":")
17 |         if len(proxy_split) == 2:
18 |             return {
19 |                 "ip": proxy_split[0], 
20 |                 "port": int(proxy_split[1])
21 |             }
22 |         else:
23 |             return {
24 |                 "ip": proxy_split[0], 
25 |                 "port": int(proxy_split[1]),
26 |                 "user": proxy_split[2], 
27 |                 "pass": proxy_split[3]
28 |             }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |   
 2 | Copyright (c) 2021 Nate Wong
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining
 5 | a copy of this software and associated documentation files (the
 6 | "Software"), to deal in the Software without restriction, including
 7 | without limitation the rights to use, copy, modify, merge, publish,
 8 | distribute, sublicense, and/or sell copies of the Software, and to
 9 | permit persons to whom the Software is furnished to do so, subject to
10 | the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8-slim-buster
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY .env .env
 6 | COPY requirements.txt requirements.txt
 7 | COPY proxies.txt proxies.txt
 8 | COPY ./src .
 9 | RUN pip3 install -r requirements.txt
10 | 
11 | RUN apt-get update && apt-get install -y \
12 |     fonts-liberation libappindicator3-1 libasound2 libatk-bridge2.0-0 \
13 |     libnspr4 libnss3 lsb-release xdg-utils libxss1 libdbus-glib-1-2 \
14 |     curl unzip wget \
15 |     xvfb
16 | 
17 | RUN GECKODRIVER_VERSION=`curl https://github.com/mozilla/geckodriver/releases/latest | grep -Po 'v[0-9]+.[0-9]+.[0-9]+'` && \
18 |     wget https://github.com/mozilla/geckodriver/releases/download/$GECKODRIVER_VERSION/geckodriver-$GECKODRIVER_VERSION-linux64.tar.gz && \
19 |     tar -zxf geckodriver-$GECKODRIVER_VERSION-linux64.tar.gz -C /usr/local/bin && \
20 |     chmod +x /usr/local/bin/geckodriver && \
21 |     rm geckodriver-$GECKODRIVER_VERSION-linux64.tar.gz
22 | 
23 | RUN FIREFOX_SETUP=firefox-setup.tar.bz2 && \
24 |     apt-get purge firefox && \
25 |     wget -O $FIREFOX_SETUP "https://download.mozilla.org/?product=firefox-latest&os=linux64" && \
26 |     tar xjf $FIREFOX_SETUP -C /opt/ && \
27 |     ln -s /opt/firefox/firefox /usr/bin/firefox && \
28 |     rm $FIREFOX_SETUP
29 | 
30 | CMD ["python3", "main.py"]


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Recaptcha Fullauto
 2 | I've decided to open source my old Recaptcha v2 solver. My latest version will be opened sourced this summer. I am hoping this project will serve as inspiration for others to build a solver from, as plugging in an existing model is not too difficult to do.<br><br>
 3 | This project uses the AWS [Rekognition](https://aws.amazon.com/rekognition/) API, which is a decent solution but isn't the best. If you are looking to build off this project, I would take a look at [this repo](https://github.com/haze/nocap) for inspiration on building and training your own model. I would also reccomend using [this repo](https://github.com/deathlyface/recaptcha-dataset) as a base for your dataset or scraping images from Google Images using [this project](https://pypi.org/project/icrawler/).<br> 
 4 | 
 5 | 
 6 | https://user-images.githubusercontent.com/39974384/115634145-ff9b4900-a2d6-11eb-972c-838389a5c5f6.mp4
 7 | 
 8 | 
 9 | ## Requirements
10 | * Python 3
11 | * Firefox
12 | * Geckodriver (make sure it is installed in your PATH)
13 | * AWS credentials (create a new IAM user with the AmazonRekognitionFullAccess role) 
14 | ## Installation
15 | ### Download The Project
16 | ```bash
17 | git clone https://github.com/natewong1313/recaptcha-fullauto.git
18 | ```
19 | ```bash
20 | cd recaptcha-fullauto
21 | ```
22 | ### Configuration
23 | Add your proxies in the proxies.txt file and add your AWS credentials in the .env file<br>
24 | If you don't want to use proxies, modify the src/main.py file as such
25 | ```python
26 | rcs = RecaptchaSolver("https://www.google.com/recaptcha/api2/demo", use_proxies = False)
27 | ```
28 | ### Using Docker
29 | ```bash
30 | docker build -t recaptcha-fullauto .
31 | ```
32 | ```bash
33 | docker run recaptcha-fullauto
34 | ```
35 | ### From Source
36 | ```bash
37 | pip install -r requirements.txt
38 | ```
39 | ```bash
40 | python src/main.py
41 | ```
42 | 


--------------------------------------------------------------------------------
/src/image_handler.py:
--------------------------------------------------------------------------------
 1 | from image_slicer import slice as image_slice
 2 | from dotenv import load_dotenv
 3 | import threading
 4 | import requests
 5 | import boto3
 6 | import glob
 7 | import os
 8 | 
 9 | load_dotenv()
10 | 
11 | image_types_conversions = {
12 |     "crosswalks": "Zebra Crossing",
13 |     "a fire hydrant": "Fire Hydrant",
14 |     "cars": "Vehicle",
15 |     "bicycles": "Bicycle",
16 |     "bus": "Bus",
17 |     "chimneys": "Roof",
18 |     "traffic lights": "Traffic Light",
19 |     "parking meters": "Parking Meter",
20 |     "boats": "Boat",
21 |     "motorcycles": "Motorcycle",
22 |     "mountains or hills": "Landscape",
23 |     "tractors": "Tractor",
24 |     "taxis": "Taxi"
25 | }
26 | 
27 | class ImageHandler:
28 |     def __init__(self):
29 |         if not os.path.exists("images"):
30 |             os.makedirs("images")
31 | 
32 |         self.aws_rekognition_client = boto3.client("rekognition", aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID"), 
33 |             aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY"), region_name = os.getenv("AWS_REGION"))
34 |         files = glob.glob(os.path.join(os.getcwd(), "images\\*"))
35 |         for f in files:
36 |             os.remove(f)
37 |     
38 |     def process_grid(self, image_grid_url, desired_image_type):
39 |         self.save_image(image_grid_url, "images/captcha_grid.jpg", is_grid = True)
40 |         
41 |         image_worker_threads = []
42 |         self.results = []
43 |         for x in range(3):
44 |             for y in range(3):
45 |                 index = (x*3) + y
46 |                 t = threading.Thread(target = self.process_image, args = (f"images/captcha_grid_0{x+1}_0{y+1}.png", desired_image_type, index))
47 |                 t.start()
48 |                 image_worker_threads.append(t)
49 | 
50 |         for t in image_worker_threads:
51 |             t.join()
52 | 
53 |         return self.results
54 |     
55 |     def process_new_images(self, images_urls, desired_image_type):
56 |         image_worker_threads = []
57 |         self.results = []
58 |         for i, image_url in enumerate(images_urls):
59 |             self.save_image(image_url, f"images/captcha_img{i}.jpg")
60 |             t = threading.Thread(target = self.process_image, args = (f"images/captcha_img{i}.jpg", desired_image_type, i))
61 |             t.start()
62 |             image_worker_threads.append(t)
63 |         
64 |         for t in image_worker_threads:
65 |             t.join()
66 |         
67 |         new_images_results = []
68 |         for i, image_url in enumerate(images_urls):
69 |             new_images_results.append({"image_url": image_url, "matches": i in self.results})
70 | 
71 |         return new_images_results
72 | 
73 |     
74 |     def save_image(self, image_url, path_name, is_grid = False):
75 |         r = requests.get(image_url, stream = True)
76 |         if r.status_code == 200:
77 |             with open(path_name, "wb") as f:
78 |                 for chunk in r:
79 |                     f.write(chunk)
80 |             if is_grid:
81 |                 image_slice(path_name, 9)
82 |         else:
83 |             raise Exception(f"Unknown status code from image url: {r.status_code}")
84 | 
85 |     def process_image(self, image_path, desired_image_type, index):
86 |         with open(os.path.join(os.getcwd(), image_path), "rb") as image:
87 |             response = self.aws_rekognition_client.detect_labels(Image = {"Bytes": image.read()})
88 |         for label in response["Labels"]:
89 |             if image_types_conversions[desired_image_type] == label["Name"]:
90 |                 self.results.append(index)


--------------------------------------------------------------------------------
/src/recaptcha_solver.py:
--------------------------------------------------------------------------------
  1 | from selenium import webdriver
  2 | from selenium.common.exceptions import MoveTargetOutOfBoundsException, NoSuchElementException, WebDriverException
  3 | from selenium.webdriver.common.action_chains import ActionChains
  4 | from selenium.webdriver.support import expected_conditions as EC
  5 | from selenium.webdriver.support.ui import WebDriverWait
  6 | from selenium.webdriver.firefox.options import Options
  7 | from selenium.webdriver.common.by import By
  8 | from base64 import b64encode
  9 | from loguru import logger
 10 | import random
 11 | import time
 12 | import os
 13 | from utils import sleep_random, load_proxy
 14 | from recaptcha_task import RecaptchaTask
 15 | from image_handler import ImageHandler, image_types_conversions
 16 | 
 17 | class RecaptchaSolver:
 18 |     def __init__(self, solve_url, use_proxies = True, headless = False):
 19 |         options = Options()
 20 |         options.headless = headless
 21 | 
 22 |         profile = webdriver.FirefoxProfile() 
 23 |         if use_proxies:
 24 |             proxy = load_proxy()
 25 |             profile.set_preference("network.proxy.type", 1)
 26 |             profile.set_preference("network.proxy.http", proxy["ip"])
 27 |             profile.set_preference("network.proxy.http_port", proxy["port"])
 28 |             if "username" in proxy:
 29 |                 credentials = b64encode(f'{proxy["username"]}:{proxy["password"]}'.encode("ascii")).decode()
 30 |                 profile.set_preference("extensions.closeproxyauth.authtoken", credentials)
 31 | 
 32 |         profile.set_preference("dom.webdriver.enabled", False)
 33 |         profile.set_preference("useAutomationExtension", False)
 34 |         profile.update_preferences() 
 35 |         
 36 |         try:
 37 |             self.driver = webdriver.Firefox(firefox_profile = profile, options = options)
 38 |         except WebDriverException:
 39 |             options.headless = True
 40 |             self.driver = webdriver.Firefox(firefox_profile = profile, options = options)
 41 |             
 42 |         self.image_handler = ImageHandler()
 43 |         self.solve_url = solve_url
 44 |         self.recaptcha_task = RecaptchaTask()
 45 |     
 46 |     def solve(self):
 47 |         self.load_captcha_url()
 48 |         self.switch_to_recap_iframe()
 49 |         self.trigger_captcha()
 50 |         self.switch_to_challenge_iframe()
 51 |         while True:
 52 |             self.check_challenge_type()
 53 |             self.find_image_grid()
 54 |             self.solve_image_grid()
 55 |             self.solve_new_images()
 56 |             success = self.verify_challenge()
 57 |             if success:
 58 |                 recaptcha_token = self.get_recaptcha_token()
 59 |                 self.driver.quit()
 60 |                 return recaptcha_token
 61 |         
 62 |     def load_captcha_url(self):
 63 |         logger.debug(f"Load url: {self.solve_url}")
 64 |         self.driver.get(self.solve_url)
 65 |     
 66 |     def switch_to_recap_iframe(self):
 67 |         logger.debug("Searching for recaptcha iframe")
 68 |         recaptcha_iframe = WebDriverWait(self.driver, 25).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'iframe[title="reCAPTCHA"]')))
 69 |         logger.debug("Found iframe, switching to it...")
 70 |         self.driver.switch_to.frame(recaptcha_iframe.get_attribute("name"))
 71 |     
 72 |     def trigger_captcha(self):
 73 |         logger.debug("Searching for recaptcha checkbox")
 74 |         recaptcha_checkbox = WebDriverWait(self.driver, 25).until(EC.presence_of_element_located((By.CLASS_NAME, "recaptcha-checkbox")))
 75 |         logger.debug("Found recaptcha checkbox, delaying before click...")
 76 |         sleep_random(1.0, 3.0)
 77 | 
 78 |         ActionChains(self.driver).move_to_element(recaptcha_checkbox).perform()
 79 |         recaptcha_checkbox.click()
 80 |     
 81 |     def switch_to_challenge_iframe(self):
 82 |         logger.debug("Switching back to parent frame")
 83 |         self.driver.switch_to.parent_frame()
 84 |         logger.debug("Searching for challenge iframe")
 85 |         recaptcha_challenge_iframe = WebDriverWait(self.driver, 25).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'iframe[title="recaptcha challenge"]')))
 86 |         logger.debug("Found challenge iframe, switching to it...")
 87 |         self.driver.switch_to.frame(recaptcha_challenge_iframe.get_attribute("name"))
 88 |     
 89 |     def check_challenge_type(self):
 90 |         while True:
 91 |             class_name = "rc-imageselect-desc-no-canonical"
 92 |             try:
 93 |                 captcha_type = self.driver.find_element_by_class_name("rc-imageselect-desc-no-canonical").get_attribute("textContent")
 94 |             except NoSuchElementException:
 95 |                 captcha_type = self.driver.find_element_by_class_name("rc-imageselect-desc").get_attribute("textContent")
 96 |                 class_name = "rc-imageselect-desc"
 97 | 
 98 |             if "Select all squares with" in captcha_type:
 99 |                 logger.debug("Fetching new challenge...")
100 |                 self.reload_captcha()
101 |                 continue
102 |             elif "Select all images with" in captcha_type:
103 |                 desired_image_type = self.driver.find_element_by_class_name(class_name).find_element_by_tag_name("strong").get_attribute("textContent")
104 |                 if desired_image_type in image_types_conversions:
105 |                     logger.debug(f"Challenge type found: {desired_image_type}")
106 |                     self.recaptcha_task.desired_image_type = desired_image_type
107 |                     return
108 |                 else:
109 |                     logger.error(f"Unknown challenge type found ({desired_image_type}), reloading...")
110 |                     self.reload_captcha()
111 |                     continue
112 |             else:
113 |                 raise Exception("Unknown challenge type")
114 | 
115 |     def find_image_grid(self):
116 |         logger.debug("Searching for image grid")
117 |         image_grid_url = self.driver.find_element_by_class_name("rc-image-tile-wrapper").find_element_by_tag_name("img").get_attribute("src")
118 |         logger.debug(f"Found image grid: {image_grid_url}")
119 |         self.recaptcha_task.image_grid_url = image_grid_url
120 |         
121 |     def solve_image_grid(self):
122 |         while True:
123 |             logger.debug("Processing images in grid")
124 |             results = self.image_handler.process_grid(self.recaptcha_task.image_grid_url, self.recaptcha_task.desired_image_type)
125 |             if len(results) == 0:
126 |                 logger.error("Failed to identify images, reloading")
127 |                 self.reload_captcha()
128 |                 time.sleep(1)
129 |                 continue
130 |             for index in results:
131 |                 self.click_image_grid_elem(index)
132 |             return
133 |     
134 |     def click_image_grid_elem(self, index):
135 |         image_element = self.driver.find_elements_by_class_name("rc-image-tile-target")[index]
136 |         ActionChains(self.driver).move_to_element(image_element).perform()
137 |         image_element.click()
138 |     
139 |     def solve_new_images(self):
140 |         while True:
141 |             logger.debug("Sleeping before checking new images")
142 |             time.sleep(5)
143 |             logger.debug("Processing new images")
144 |             new_images = self.driver.find_elements_by_class_name("rc-image-tile-11")
145 |             new_images_urls = [new_image.get_attribute("src") for new_image in new_images]
146 | 
147 |             results = self.image_handler.process_new_images(new_images_urls, self.recaptcha_task.desired_image_type)
148 |             for i, result in enumerate(results):
149 |                 if result["matches"]: 
150 |                     self.click_new_image_elem(i)
151 | 
152 |             if len([result for result in results if result["matches"]]) == 0 or len([result for result in results if not result["matches"]]) == len(results):
153 |                 logger.debug("All new images solved, proceeding")
154 |                 return
155 |     
156 |     def get_element_index(self, image_url, new_images_urls):
157 |         for i, new_image_url in enumerate(new_images_urls):
158 |             if new_image_url == image_url:
159 |                 return i
160 | 
161 |     def click_new_image_elem(self, index):
162 |         image_element = self.driver.find_elements_by_class_name("rc-image-tile-11")[index].find_element_by_xpath("..")
163 |         ActionChains(self.driver).move_to_element(image_element).perform()
164 |         image_element.click()
165 |     
166 |     def verify_challenge(self):
167 |         logger.debug("Verifying challenge solution")
168 |         self.driver.find_element_by_id("recaptcha-verify-button").click()
169 |         time.sleep(1)
170 |         
171 |         self.driver.switch_to.parent_frame()
172 |         self.switch_to_recap_iframe()
173 |         try:
174 |             self.driver.find_element_by_class_name("recaptcha-checkbox-checked")
175 |             logger.success("Successfully solved challenge")
176 |             return True
177 |         except NoSuchElementException:
178 |             logger.error("Failed to solve challenge, retrying")
179 |             self.switch_to_challenge_iframe()
180 |             if self.driver.find_element_by_class_name("rc-imageselect-incorrect-response").get_attribute("style") != "":
181 |                 self.reload_captcha()
182 |             return False
183 |     
184 |     def get_recaptcha_token(self):
185 |         logger.debug("Searching for recaptcha token")
186 |         self.driver.switch_to.parent_frame()
187 |         recaptcha_token = self.driver.find_element_by_id("g-recaptcha-response").get_attribute("value")
188 |         logger.debug(f"Found recaptcha token: {recaptcha_token}")
189 |         return recaptcha_token
190 | 
191 |     def reload_captcha(self):
192 |         old_val = self.driver.find_element_by_id("recaptcha-token").get_attribute("value")
193 |         self.driver.find_element_by_id("recaptcha-reload-button").click()
194 |         while True:
195 |             if self.driver.find_element_by_id("recaptcha-token").get_attribute("value") != old_val:
196 |                 return
197 |             time.sleep(0.01)


--------------------------------------------------------------------------------