├── .gitignore
├── proxies.txt
├── src
├── recaptcha_task.py
├── main.py
├── utils.py
├── image_handler.py
└── recaptcha_solver.py
├── .env
├── requirements.txt
├── tests
└── test_proxy_formatter.py
├── LICENSE
├── Dockerfile
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
--------------------------------------------------------------------------------
/proxies.txt:
--------------------------------------------------------------------------------
1 | ip:port
2 | ip:port:user:pass
--------------------------------------------------------------------------------
/src/recaptcha_task.py:
--------------------------------------------------------------------------------
1 | class RecaptchaTask:
2 | image_grid_url = ""
3 | desired_image_type = ""
--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY_ID_HERE
2 | AWS_SECRET_ACCESS_KEY=YOUR_SECRET_ACCESS_KEY_HERE
3 | AWS_REGION=YOUR_REGION_HERE
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | image-slicer==2.1.1
2 | selenium==3.141.0
3 | loguru==0.5.3
4 | requests==2.22.0
5 | boto3==1.17.44
6 | python-dotenv==0.17.0
--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
1 | from recaptcha_solver import RecaptchaSolver
2 | import signal
3 |
4 |
5 | if __name__ == "__main__":
6 | signal.signal(signal.SIGINT, signal.SIG_DFL)
7 |
8 | rcs = RecaptchaSolver("https://www.google.com/recaptcha/api2/demo")
9 | recaptcha_token = rcs.solve()
--------------------------------------------------------------------------------
/tests/test_proxy_formatter.py:
--------------------------------------------------------------------------------
1 |
2 | import unittest
3 |
4 | class ProxyTest(unittest.TestCase):
5 | def test_proxy_formatter(self):
6 | proxy = "127.0.0.1:8888:nate:test"
7 | proxy_split = proxy.split(":")
8 | if len(proxy_split) == 2:
9 | formatted_proxy = {
10 | "ip": proxy_split[0],
11 | "port": int(proxy_split[1])
12 | }
13 | else:
14 | formatted_proxy = {
15 | "ip": proxy_split[0],
16 | "port": int(proxy_split[1]),
17 | "user": proxy_split[2],
18 | "pass": proxy_split[3]
19 | }
20 | self.assertEqual(formatted_proxy["ip"], "127.0.0.1")
21 | self.assertEqual(formatted_proxy["port"], 8888)
22 | self.assertEqual(formatted_proxy["user"], "nate")
23 | self.assertEqual(formatted_proxy["pass"], "test")
24 |
25 |
26 | if __name__ == "__main__":
27 | unittest.main()
--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
1 | from loguru import logger
2 | import random
3 | import time
4 | import os
5 |
6 | def sleep_random(min_time, max_time):
7 | delay = random.uniform(min_time, max_time)
8 | logger.debug(f"Sleeping for {delay} seconds")
9 | time.sleep(delay)
10 |
11 | def load_proxy():
12 | with open(os.path.join(os.getcwd(), "proxies.txt"), "r") as file:
13 | lines = file.read().splitlines()
14 | if len(lines) == 0:
15 | raise Exception("No proxies found in proxies.txt")
16 | proxy_split = random.choice(lines).split(":")
17 | if len(proxy_split) == 2:
18 | return {
19 | "ip": proxy_split[0],
20 | "port": int(proxy_split[1])
21 | }
22 | else:
23 | return {
24 | "ip": proxy_split[0],
25 | "port": int(proxy_split[1]),
26 | "user": proxy_split[2],
27 | "pass": proxy_split[3]
28 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Copyright (c) 2021 Nate Wong
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining
5 | a copy of this software and associated documentation files (the
6 | "Software"), to deal in the Software without restriction, including
7 | without limitation the rights to use, copy, modify, merge, publish,
8 | distribute, sublicense, and/or sell copies of the Software, and to
9 | permit persons to whom the Software is furnished to do so, subject to
10 | the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8-slim-buster
2 |
3 | WORKDIR /app
4 |
5 | COPY .env .env
6 | COPY requirements.txt requirements.txt
7 | COPY proxies.txt proxies.txt
8 | COPY ./src .
9 | RUN pip3 install -r requirements.txt
10 |
11 | RUN apt-get update && apt-get install -y \
12 | fonts-liberation libappindicator3-1 libasound2 libatk-bridge2.0-0 \
13 | libnspr4 libnss3 lsb-release xdg-utils libxss1 libdbus-glib-1-2 \
14 | curl unzip wget \
15 | xvfb
16 |
17 | RUN GECKODRIVER_VERSION=`curl https://github.com/mozilla/geckodriver/releases/latest | grep -Po 'v[0-9]+.[0-9]+.[0-9]+'` && \
18 | wget https://github.com/mozilla/geckodriver/releases/download/$GECKODRIVER_VERSION/geckodriver-$GECKODRIVER_VERSION-linux64.tar.gz && \
19 | tar -zxf geckodriver-$GECKODRIVER_VERSION-linux64.tar.gz -C /usr/local/bin && \
20 | chmod +x /usr/local/bin/geckodriver && \
21 | rm geckodriver-$GECKODRIVER_VERSION-linux64.tar.gz
22 |
23 | RUN FIREFOX_SETUP=firefox-setup.tar.bz2 && \
24 | apt-get purge firefox && \
25 | wget -O $FIREFOX_SETUP "https://download.mozilla.org/?product=firefox-latest&os=linux64" && \
26 | tar xjf $FIREFOX_SETUP -C /opt/ && \
27 | ln -s /opt/firefox/firefox /usr/bin/firefox && \
28 | rm $FIREFOX_SETUP
29 |
30 | CMD ["python3", "main.py"]
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Recaptcha Fullauto
2 | I've decided to open source my old Recaptcha v2 solver. My latest version will be opened sourced this summer. I am hoping this project will serve as inspiration for others to build a solver from, as plugging in an existing model is not too difficult to do.
3 | This project uses the AWS [Rekognition](https://aws.amazon.com/rekognition/) API, which is a decent solution but isn't the best. If you are looking to build off this project, I would take a look at [this repo](https://github.com/haze/nocap) for inspiration on building and training your own model. I would also reccomend using [this repo](https://github.com/deathlyface/recaptcha-dataset) as a base for your dataset or scraping images from Google Images using [this project](https://pypi.org/project/icrawler/).
4 |
5 |
6 | https://user-images.githubusercontent.com/39974384/115634145-ff9b4900-a2d6-11eb-972c-838389a5c5f6.mp4
7 |
8 |
9 | ## Requirements
10 | * Python 3
11 | * Firefox
12 | * Geckodriver (make sure it is installed in your PATH)
13 | * AWS credentials (create a new IAM user with the AmazonRekognitionFullAccess role)
14 | ## Installation
15 | ### Download The Project
16 | ```bash
17 | git clone https://github.com/natewong1313/recaptcha-fullauto.git
18 | ```
19 | ```bash
20 | cd recaptcha-fullauto
21 | ```
22 | ### Configuration
23 | Add your proxies in the proxies.txt file and add your AWS credentials in the .env file
24 | If you don't want to use proxies, modify the src/main.py file as such
25 | ```python
26 | rcs = RecaptchaSolver("https://www.google.com/recaptcha/api2/demo", use_proxies = False)
27 | ```
28 | ### Using Docker
29 | ```bash
30 | docker build -t recaptcha-fullauto .
31 | ```
32 | ```bash
33 | docker run recaptcha-fullauto
34 | ```
35 | ### From Source
36 | ```bash
37 | pip install -r requirements.txt
38 | ```
39 | ```bash
40 | python src/main.py
41 | ```
42 |
--------------------------------------------------------------------------------
/src/image_handler.py:
--------------------------------------------------------------------------------
1 | from image_slicer import slice as image_slice
2 | from dotenv import load_dotenv
3 | import threading
4 | import requests
5 | import boto3
6 | import glob
7 | import os
8 |
9 | load_dotenv()
10 |
11 | image_types_conversions = {
12 | "crosswalks": "Zebra Crossing",
13 | "a fire hydrant": "Fire Hydrant",
14 | "cars": "Vehicle",
15 | "bicycles": "Bicycle",
16 | "bus": "Bus",
17 | "chimneys": "Roof",
18 | "traffic lights": "Traffic Light",
19 | "parking meters": "Parking Meter",
20 | "boats": "Boat",
21 | "motorcycles": "Motorcycle",
22 | "mountains or hills": "Landscape",
23 | "tractors": "Tractor",
24 | "taxis": "Taxi"
25 | }
26 |
27 | class ImageHandler:
28 | def __init__(self):
29 | if not os.path.exists("images"):
30 | os.makedirs("images")
31 |
32 | self.aws_rekognition_client = boto3.client("rekognition", aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID"),
33 | aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY"), region_name = os.getenv("AWS_REGION"))
34 | files = glob.glob(os.path.join(os.getcwd(), "images\\*"))
35 | for f in files:
36 | os.remove(f)
37 |
38 | def process_grid(self, image_grid_url, desired_image_type):
39 | self.save_image(image_grid_url, "images/captcha_grid.jpg", is_grid = True)
40 |
41 | image_worker_threads = []
42 | self.results = []
43 | for x in range(3):
44 | for y in range(3):
45 | index = (x*3) + y
46 | t = threading.Thread(target = self.process_image, args = (f"images/captcha_grid_0{x+1}_0{y+1}.png", desired_image_type, index))
47 | t.start()
48 | image_worker_threads.append(t)
49 |
50 | for t in image_worker_threads:
51 | t.join()
52 |
53 | return self.results
54 |
55 | def process_new_images(self, images_urls, desired_image_type):
56 | image_worker_threads = []
57 | self.results = []
58 | for i, image_url in enumerate(images_urls):
59 | self.save_image(image_url, f"images/captcha_img{i}.jpg")
60 | t = threading.Thread(target = self.process_image, args = (f"images/captcha_img{i}.jpg", desired_image_type, i))
61 | t.start()
62 | image_worker_threads.append(t)
63 |
64 | for t in image_worker_threads:
65 | t.join()
66 |
67 | new_images_results = []
68 | for i, image_url in enumerate(images_urls):
69 | new_images_results.append({"image_url": image_url, "matches": i in self.results})
70 |
71 | return new_images_results
72 |
73 |
74 | def save_image(self, image_url, path_name, is_grid = False):
75 | r = requests.get(image_url, stream = True)
76 | if r.status_code == 200:
77 | with open(path_name, "wb") as f:
78 | for chunk in r:
79 | f.write(chunk)
80 | if is_grid:
81 | image_slice(path_name, 9)
82 | else:
83 | raise Exception(f"Unknown status code from image url: {r.status_code}")
84 |
85 | def process_image(self, image_path, desired_image_type, index):
86 | with open(os.path.join(os.getcwd(), image_path), "rb") as image:
87 | response = self.aws_rekognition_client.detect_labels(Image = {"Bytes": image.read()})
88 | for label in response["Labels"]:
89 | if image_types_conversions[desired_image_type] == label["Name"]:
90 | self.results.append(index)
--------------------------------------------------------------------------------
/src/recaptcha_solver.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.common.exceptions import MoveTargetOutOfBoundsException, NoSuchElementException, WebDriverException
3 | from selenium.webdriver.common.action_chains import ActionChains
4 | from selenium.webdriver.support import expected_conditions as EC
5 | from selenium.webdriver.support.ui import WebDriverWait
6 | from selenium.webdriver.firefox.options import Options
7 | from selenium.webdriver.common.by import By
8 | from base64 import b64encode
9 | from loguru import logger
10 | import random
11 | import time
12 | import os
13 | from utils import sleep_random, load_proxy
14 | from recaptcha_task import RecaptchaTask
15 | from image_handler import ImageHandler, image_types_conversions
16 |
17 | class RecaptchaSolver:
18 | def __init__(self, solve_url, use_proxies = True, headless = False):
19 | options = Options()
20 | options.headless = headless
21 |
22 | profile = webdriver.FirefoxProfile()
23 | if use_proxies:
24 | proxy = load_proxy()
25 | profile.set_preference("network.proxy.type", 1)
26 | profile.set_preference("network.proxy.http", proxy["ip"])
27 | profile.set_preference("network.proxy.http_port", proxy["port"])
28 | if "username" in proxy:
29 | credentials = b64encode(f'{proxy["username"]}:{proxy["password"]}'.encode("ascii")).decode()
30 | profile.set_preference("extensions.closeproxyauth.authtoken", credentials)
31 |
32 | profile.set_preference("dom.webdriver.enabled", False)
33 | profile.set_preference("useAutomationExtension", False)
34 | profile.update_preferences()
35 |
36 | try:
37 | self.driver = webdriver.Firefox(firefox_profile = profile, options = options)
38 | except WebDriverException:
39 | options.headless = True
40 | self.driver = webdriver.Firefox(firefox_profile = profile, options = options)
41 |
42 | self.image_handler = ImageHandler()
43 | self.solve_url = solve_url
44 | self.recaptcha_task = RecaptchaTask()
45 |
46 | def solve(self):
47 | self.load_captcha_url()
48 | self.switch_to_recap_iframe()
49 | self.trigger_captcha()
50 | self.switch_to_challenge_iframe()
51 | while True:
52 | self.check_challenge_type()
53 | self.find_image_grid()
54 | self.solve_image_grid()
55 | self.solve_new_images()
56 | success = self.verify_challenge()
57 | if success:
58 | recaptcha_token = self.get_recaptcha_token()
59 | self.driver.quit()
60 | return recaptcha_token
61 |
62 | def load_captcha_url(self):
63 | logger.debug(f"Load url: {self.solve_url}")
64 | self.driver.get(self.solve_url)
65 |
66 | def switch_to_recap_iframe(self):
67 | logger.debug("Searching for recaptcha iframe")
68 | recaptcha_iframe = WebDriverWait(self.driver, 25).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'iframe[title="reCAPTCHA"]')))
69 | logger.debug("Found iframe, switching to it...")
70 | self.driver.switch_to.frame(recaptcha_iframe.get_attribute("name"))
71 |
72 | def trigger_captcha(self):
73 | logger.debug("Searching for recaptcha checkbox")
74 | recaptcha_checkbox = WebDriverWait(self.driver, 25).until(EC.presence_of_element_located((By.CLASS_NAME, "recaptcha-checkbox")))
75 | logger.debug("Found recaptcha checkbox, delaying before click...")
76 | sleep_random(1.0, 3.0)
77 |
78 | ActionChains(self.driver).move_to_element(recaptcha_checkbox).perform()
79 | recaptcha_checkbox.click()
80 |
81 | def switch_to_challenge_iframe(self):
82 | logger.debug("Switching back to parent frame")
83 | self.driver.switch_to.parent_frame()
84 | logger.debug("Searching for challenge iframe")
85 | recaptcha_challenge_iframe = WebDriverWait(self.driver, 25).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'iframe[title="recaptcha challenge"]')))
86 | logger.debug("Found challenge iframe, switching to it...")
87 | self.driver.switch_to.frame(recaptcha_challenge_iframe.get_attribute("name"))
88 |
89 | def check_challenge_type(self):
90 | while True:
91 | class_name = "rc-imageselect-desc-no-canonical"
92 | try:
93 | captcha_type = self.driver.find_element_by_class_name("rc-imageselect-desc-no-canonical").get_attribute("textContent")
94 | except NoSuchElementException:
95 | captcha_type = self.driver.find_element_by_class_name("rc-imageselect-desc").get_attribute("textContent")
96 | class_name = "rc-imageselect-desc"
97 |
98 | if "Select all squares with" in captcha_type:
99 | logger.debug("Fetching new challenge...")
100 | self.reload_captcha()
101 | continue
102 | elif "Select all images with" in captcha_type:
103 | desired_image_type = self.driver.find_element_by_class_name(class_name).find_element_by_tag_name("strong").get_attribute("textContent")
104 | if desired_image_type in image_types_conversions:
105 | logger.debug(f"Challenge type found: {desired_image_type}")
106 | self.recaptcha_task.desired_image_type = desired_image_type
107 | return
108 | else:
109 | logger.error(f"Unknown challenge type found ({desired_image_type}), reloading...")
110 | self.reload_captcha()
111 | continue
112 | else:
113 | raise Exception("Unknown challenge type")
114 |
115 | def find_image_grid(self):
116 | logger.debug("Searching for image grid")
117 | image_grid_url = self.driver.find_element_by_class_name("rc-image-tile-wrapper").find_element_by_tag_name("img").get_attribute("src")
118 | logger.debug(f"Found image grid: {image_grid_url}")
119 | self.recaptcha_task.image_grid_url = image_grid_url
120 |
121 | def solve_image_grid(self):
122 | while True:
123 | logger.debug("Processing images in grid")
124 | results = self.image_handler.process_grid(self.recaptcha_task.image_grid_url, self.recaptcha_task.desired_image_type)
125 | if len(results) == 0:
126 | logger.error("Failed to identify images, reloading")
127 | self.reload_captcha()
128 | time.sleep(1)
129 | continue
130 | for index in results:
131 | self.click_image_grid_elem(index)
132 | return
133 |
134 | def click_image_grid_elem(self, index):
135 | image_element = self.driver.find_elements_by_class_name("rc-image-tile-target")[index]
136 | ActionChains(self.driver).move_to_element(image_element).perform()
137 | image_element.click()
138 |
139 | def solve_new_images(self):
140 | while True:
141 | logger.debug("Sleeping before checking new images")
142 | time.sleep(5)
143 | logger.debug("Processing new images")
144 | new_images = self.driver.find_elements_by_class_name("rc-image-tile-11")
145 | new_images_urls = [new_image.get_attribute("src") for new_image in new_images]
146 |
147 | results = self.image_handler.process_new_images(new_images_urls, self.recaptcha_task.desired_image_type)
148 | for i, result in enumerate(results):
149 | if result["matches"]:
150 | self.click_new_image_elem(i)
151 |
152 | if len([result for result in results if result["matches"]]) == 0 or len([result for result in results if not result["matches"]]) == len(results):
153 | logger.debug("All new images solved, proceeding")
154 | return
155 |
156 | def get_element_index(self, image_url, new_images_urls):
157 | for i, new_image_url in enumerate(new_images_urls):
158 | if new_image_url == image_url:
159 | return i
160 |
161 | def click_new_image_elem(self, index):
162 | image_element = self.driver.find_elements_by_class_name("rc-image-tile-11")[index].find_element_by_xpath("..")
163 | ActionChains(self.driver).move_to_element(image_element).perform()
164 | image_element.click()
165 |
166 | def verify_challenge(self):
167 | logger.debug("Verifying challenge solution")
168 | self.driver.find_element_by_id("recaptcha-verify-button").click()
169 | time.sleep(1)
170 |
171 | self.driver.switch_to.parent_frame()
172 | self.switch_to_recap_iframe()
173 | try:
174 | self.driver.find_element_by_class_name("recaptcha-checkbox-checked")
175 | logger.success("Successfully solved challenge")
176 | return True
177 | except NoSuchElementException:
178 | logger.error("Failed to solve challenge, retrying")
179 | self.switch_to_challenge_iframe()
180 | if self.driver.find_element_by_class_name("rc-imageselect-incorrect-response").get_attribute("style") != "":
181 | self.reload_captcha()
182 | return False
183 |
184 | def get_recaptcha_token(self):
185 | logger.debug("Searching for recaptcha token")
186 | self.driver.switch_to.parent_frame()
187 | recaptcha_token = self.driver.find_element_by_id("g-recaptcha-response").get_attribute("value")
188 | logger.debug(f"Found recaptcha token: {recaptcha_token}")
189 | return recaptcha_token
190 |
191 | def reload_captcha(self):
192 | old_val = self.driver.find_element_by_id("recaptcha-token").get_attribute("value")
193 | self.driver.find_element_by_id("recaptcha-reload-button").click()
194 | while True:
195 | if self.driver.find_element_by_id("recaptcha-token").get_attribute("value") != old_val:
196 | return
197 | time.sleep(0.01)
--------------------------------------------------------------------------------