├── .gitignore ├── README.md ├── app ├── __init__.py ├── captcha_resolver.py ├── settings.py ├── solution.py └── utils.py ├── captcha_entire_image.png ├── captcha_resized_image.png ├── captcha_single_image.png ├── main.py ├── requirements.txt └── resized.png /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .env 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RecaptchaResolver 2 | 3 | Recaptcha Resolver 4 | 5 | ## Usage 6 | 7 | Clone this repo: 8 | 9 | ``` 10 | git clone https://github.com/Python3WebSpider/RecaptchaResolver.git 11 | ``` 12 | 13 | Then go to https://yescaptcha.com/i/CnZPBu and register your account, then get a `clientKey` from portal. 14 | 15 | ![image](https://user-images.githubusercontent.com/8678661/170099424-bbe53c64-79b5-46fc-a7c9-95fc88877e3d.png) 16 | 17 | Then create a `.env` file in root of this repo, and write this content: 18 | 19 | ``` 20 | CAPTCHA_RESOLVER_API_KEY= 21 | ``` 22 | 23 | Next, you need to install packages: 24 | 25 | ``` 26 | pip3 install -r requirements.txt 27 | ``` 28 | 29 | At last, run demo: 30 | 31 | ``` 32 | python3 main.py 33 | ``` 34 | 35 | Result: 36 | 37 | ![image](https://user-images.githubusercontent.com/8678661/170100015-f08385e6-7990-4300-831d-b429cd7f9329.png) 38 | 39 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/RecaptchaResolver/55430eac27863870ace8fb126806785b95ab249f/app/__init__.py -------------------------------------------------------------------------------- /app/captcha_resolver.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | from app.settings import CAPTCHA_RESOLVER_API_KEY, CAPTCHA_RESOLVER_API_URL 3 | import requests 4 | 5 | 6 | class CaptchaResolver(object): 7 | 8 | def __init__(self, api_url=CAPTCHA_RESOLVER_API_URL, api_key=CAPTCHA_RESOLVER_API_KEY): 9 | self.api_url = api_url 10 | self.api_key = api_key 11 | 12 | def create_task(self, image_base64_string, question_id): 13 | logger.debug(f'start to recognize image for question {question_id}') 14 | data = { 15 | "clientKey": self.api_key, 16 | "task": { 17 | "type": "ReCaptchaV2Classification", 18 | "image": image_base64_string, 19 | "question": question_id, 20 | "softID": 78 21 | } 22 | } 23 | try: 24 | response = requests.post(self.api_url, json=data) 25 | result = response.json() 26 | logger.debug(f'captcha recogize result {result}') 27 | return result 28 | except requests.RequestException: 29 | logger.exception( 30 | 'error occurred while recognizing captcha', exc_info=True) 31 | -------------------------------------------------------------------------------- /app/settings.py: -------------------------------------------------------------------------------- 1 | from environs import Env 2 | 3 | env = Env() 4 | env.read_env() 5 | 6 | CAPTCHA_RESOLVER_API_URL = 'https://api.yescaptcha.com/createTask' 7 | CAPTCHA_RESOLVER_API_KEY = env.str('CAPTCHA_RESOLVER_API_KEY') 8 | 9 | CAPTCHA_DEMO_URL = 'https://www.google.com/recaptcha/api2/demo' 10 | 11 | CAPTCHA_ENTIRE_IMAGE_FILE_PATH = 'captcha_entire_image.png' 12 | CAPTCHA_SINGLE_IMAGE_FILE_PATH = 'captcha_single_image.png' 13 | CAPTCHA_RESIZED_IMAGE_FILE_PATH = 'captcha_resized_image.png' 14 | 15 | import json 16 | CAPTCHA_TARGET_NAME_QUESTION_ID_MAPPING = { 17 | "taxis": "/m/0pg52", 18 | "bus": "/m/01bjv", 19 | "school bus": "/m/02yvhj", 20 | "motorcycles": "/m/04_sv", 21 | "tractors": "/m/013xlm", 22 | "chimneys": "/m/01jk_4", 23 | "crosswalks": "/m/014xcs", 24 | "traffic lights": "/m/015qff", 25 | "bicycles": "/m/0199g", 26 | "parking meters": "/m/015qbp", 27 | "cars": "/m/0k4j", 28 | "vehicles": "/m/0k4j", 29 | "bridges": "/m/015kr", 30 | "boats": "/m/019jd", 31 | "palm trees": "/m/0cdl1", 32 | "mountains or hills": "/m/09d_r", 33 | "fire hydrant": "/m/01pns0", 34 | "fire hydrants": "/m/01pns0", 35 | "a fire hydrant": "/m/01pns0", 36 | "stairs": "/m/01lynh", 37 | "出租车": "/m/0pg52", 38 | "巴士": "/m/01bjv", 39 | "摩托车": "/m/04_sv", 40 | "机动车": "/m/0k4j", 41 | "小轿车": "/m/0k4j", 42 | "拖拉机": "/m/013xlm", 43 | "烟囱": "/m/01jk_4", 44 | "人行横道": "/m/014xcs", 45 | "红绿灯": "/m/015qff", 46 | "自行车": "/m/0199g", 47 | "停车计价表": "/m/015qbp", 48 | "汽车": "/m/0k4j", 49 | "桥": "/m/015kr", 50 | "船": "/m/019jd", 51 | "棕榈树": "/m/0cdl1", 52 | "山": "/m/09d_r", 53 | "消防栓": "/m/01pns0", 54 | "楼梯": "/m/01lynh", 55 | "交通工具": "/m/0k4j", 56 | "公交车": "/m/01bjv", 57 | "彩色玻璃": "/m/011y23", 58 | "火车站": "/m/0py27", 59 | "消火栓": "/m/01pns0", 60 | "过街人行道": "/m/014xcs", 61 | "车库门": "/m/08l941", 62 | "公交站": "/m/01jw_1", 63 | "停车计时器": "/m/015qbp", 64 | "丘陵": "/m/09d_r", 65 | "车辆": "/m/0k4j", 66 | "公共汽车": "/m/01bjv", 67 | "交通灯": "/m/015qff", 68 | "停车咪表": "/m/015qbp" 69 | } -------------------------------------------------------------------------------- /app/solution.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | import requests 3 | from selenium import webdriver 4 | from selenium.webdriver.common.by import By 5 | from selenium.webdriver.support import expected_conditions as EC 6 | from selenium.webdriver.support.wait import WebDriverWait 7 | from selenium.webdriver.remote.webelement import WebElement 8 | import time 9 | from loguru import logger 10 | from app.captcha_resolver import CaptchaResolver 11 | from app.settings import CAPTCHA_ENTIRE_IMAGE_FILE_PATH, CAPTCHA_SINGLE_IMAGE_FILE_PATH 12 | from app.utils import get_question_id_by_target_name, resize_base64_image 13 | 14 | 15 | class Solution(object): 16 | def __init__(self, url): 17 | self.browser = webdriver.Chrome() 18 | self.browser.get(url) 19 | self.wait = WebDriverWait(self.browser, 10) 20 | self.captcha_resolver = CaptchaResolver() 21 | 22 | def __del__(self): 23 | time.sleep(10) 24 | self.browser.close() 25 | 26 | def get_all_frames(self) -> List[WebElement]: 27 | self.browser.switch_to.default_content() 28 | return self.browser.find_elements_by_tag_name('iframe') 29 | 30 | def get_captcha_entry_iframe(self) -> WebElement: 31 | self.browser.switch_to.default_content() 32 | captcha_entry_iframe = self.browser.find_element_by_css_selector( 33 | 'iframe[title="reCAPTCHA"]') 34 | return captcha_entry_iframe 35 | 36 | def switch_to_captcha_entry_iframe(self) -> None: 37 | captcha_entry_iframe: WebElement = self.get_captcha_entry_iframe() 38 | self.browser.switch_to.frame(captcha_entry_iframe) 39 | 40 | def get_captcha_content_iframe(self) -> WebElement: 41 | self.browser.switch_to.default_content() 42 | captcha_content_iframe = self.browser.find_element_by_css_selector( 43 | 'iframe[src*="bframe?"]') 44 | return captcha_content_iframe 45 | 46 | def switch_to_captcha_content_iframe(self) -> None: 47 | captcha_content_iframe: WebElement = self.get_captcha_content_iframe() 48 | self.browser.switch_to.frame(captcha_content_iframe) 49 | 50 | def get_entire_captcha_element(self) -> WebElement: 51 | entire_captcha_element: WebElement = self.wait.until(EC.element_to_be_clickable( 52 | (By.CSS_SELECTOR, '#rc-imageselect-target'))) 53 | return entire_captcha_element 54 | 55 | def get_entire_captcha_natural_width(self) -> Union[int, None]: 56 | result = self.browser.execute_script( 57 | "return document.querySelector('div.rc-image-tile-wrapper > img').naturalWidth") 58 | if result: 59 | return int(result) 60 | return None 61 | 62 | def get_entire_captcha_display_width(self) -> Union[int, None]: 63 | entire_captcha_element = self.get_entire_captcha_element() 64 | if entire_captcha_element: 65 | return entire_captcha_element.rect.get('width') 66 | return None 67 | 68 | def trigger_captcha(self) -> None: 69 | self.switch_to_captcha_entry_iframe() 70 | captcha_entry = self.wait.until(EC.presence_of_element_located( 71 | (By.ID, 'recaptcha-anchor'))) 72 | captcha_entry.click() 73 | time.sleep(2) 74 | self.switch_to_captcha_content_iframe() 75 | entire_captcha_element: WebElement = self.get_entire_captcha_element() 76 | if entire_captcha_element.is_displayed: 77 | logger.debug('trigged captcha successfully') 78 | 79 | def get_captcha_target_name(self) -> WebElement: 80 | captcha_target_name_element: WebElement = self.wait.until(EC.presence_of_element_located( 81 | (By.CSS_SELECTOR, '.rc-imageselect-desc-wrapper strong'))) 82 | return captcha_target_name_element.text 83 | 84 | def get_verify_button(self) -> WebElement: 85 | verify_button = self.wait.until(EC.presence_of_element_located( 86 | (By.CSS_SELECTOR, '#recaptcha-verify-button'))) 87 | return verify_button 88 | 89 | def verify_single_captcha(self, index): 90 | time.sleep(3) 91 | elements = self.wait.until(EC.visibility_of_all_elements_located( 92 | (By.CSS_SELECTOR, '#rc-imageselect-target table td'))) 93 | single_captcha_element: WebElement = elements[index] 94 | class_name = single_captcha_element.get_attribute('class') 95 | logger.debug(f'verifiying single captcha {index}, class {class_name}') 96 | if 'selected' in class_name: 97 | logger.debug(f'no new single captcha displayed') 98 | return 99 | logger.debug('new single captcha displayed') 100 | single_captcha_url = single_captcha_element.find_element_by_css_selector( 101 | 'img').get_attribute('src') 102 | logger.debug(f'single_captcha_url {single_captcha_url}') 103 | with open(CAPTCHA_SINGLE_IMAGE_FILE_PATH, 'wb') as f: 104 | f.write(requests.get(single_captcha_url).content) 105 | resized_single_captcha_base64_string = resize_base64_image( 106 | CAPTCHA_SINGLE_IMAGE_FILE_PATH, (100, 100)) 107 | single_captcha_recognize_result = self.captcha_resolver.create_task( 108 | resized_single_captcha_base64_string, get_question_id_by_target_name(self.captcha_target_name)) 109 | if not single_captcha_recognize_result: 110 | logger.error('count not get single captcha recognize result') 111 | return 112 | has_object = single_captcha_recognize_result.get( 113 | 'solution', {}).get('hasObject') 114 | if has_object is None: 115 | logger.error('count not get captcha recognized indices') 116 | return 117 | if has_object is False: 118 | logger.debug('no more object in this single captcha') 119 | return 120 | if has_object: 121 | single_captcha_element.click() 122 | # check for new single captcha 123 | self.verify_single_captcha(index) 124 | 125 | def get_verify_error_info(self): 126 | self.switch_to_captcha_content_iframe() 127 | self.browser.execute_script( 128 | "return document.querySelector('div.rc-imageselect-incorrect-response')?.text") 129 | 130 | def get_is_successful(self): 131 | self.switch_to_captcha_entry_iframe() 132 | anchor: WebElement = self.wait.until(EC.visibility_of_element_located(( 133 | By.ID, 'recaptcha-anchor' 134 | ))) 135 | checked = anchor.get_attribute('aria-checked') 136 | logger.debug(f'checked {checked}') 137 | return str(checked) == 'true' 138 | 139 | def get_is_failed(self): 140 | return bool(self.get_verify_error_info()) 141 | 142 | def verify_entire_captcha(self): 143 | self.entire_captcha_natural_width = self.get_entire_captcha_natural_width() 144 | logger.debug( 145 | f'entire_captcha_natural_width {self.entire_captcha_natural_width}' 146 | ) 147 | self.captcha_target_name = self.get_captcha_target_name() 148 | logger.debug( 149 | f'captcha_target_name {self.captcha_target_name}' 150 | ) 151 | entire_captcha_element: WebElement = self.get_entire_captcha_element() 152 | entire_captcha_url = entire_captcha_element.find_element_by_css_selector( 153 | 'td img').get_attribute('src') 154 | logger.debug(f'entire_captcha_url {entire_captcha_url}') 155 | with open(CAPTCHA_ENTIRE_IMAGE_FILE_PATH, 'wb') as f: 156 | f.write(requests.get(entire_captcha_url).content) 157 | logger.debug( 158 | f'saved entire captcha to {CAPTCHA_ENTIRE_IMAGE_FILE_PATH}') 159 | resized_entire_captcha_base64_string = resize_base64_image( 160 | CAPTCHA_ENTIRE_IMAGE_FILE_PATH, (self.entire_captcha_natural_width, 161 | self.entire_captcha_natural_width)) 162 | logger.debug( 163 | f'resized_entire_captcha_base64_string, {resized_entire_captcha_base64_string[0:100]}...') 164 | entire_captcha_recognize_result = self.captcha_resolver.create_task( 165 | resized_entire_captcha_base64_string, 166 | get_question_id_by_target_name(self.captcha_target_name) 167 | ) 168 | if not entire_captcha_recognize_result: 169 | logger.error('count not get captcha recognize result') 170 | return 171 | recognized_indices = entire_captcha_recognize_result.get( 172 | 'solution', {}).get('objects') 173 | if not recognized_indices: 174 | logger.error('count not get captcha recognized indices') 175 | return 176 | single_captcha_elements = self.wait.until(EC.visibility_of_all_elements_located( 177 | (By.CSS_SELECTOR, '#rc-imageselect-target table td'))) 178 | for recognized_index in recognized_indices: 179 | single_captcha_element: WebElement = single_captcha_elements[recognized_index] 180 | single_captcha_element.click() 181 | # check if need verify single captcha 182 | self.verify_single_captcha(recognized_index) 183 | 184 | # after all captcha clicked 185 | verify_button: WebElement = self.get_verify_button() 186 | if verify_button.is_displayed: 187 | verify_button.click() 188 | time.sleep(3) 189 | 190 | is_succeed = self.get_is_successful() 191 | if is_succeed: 192 | logger.debug('verifed successfully') 193 | else: 194 | verify_error_info = self.get_verify_error_info() 195 | logger.debug(f'verify_error_info {verify_error_info}') 196 | self.verify_entire_captcha() 197 | 198 | def resolve(self): 199 | self.trigger_captcha() 200 | self.verify_entire_captcha() 201 | -------------------------------------------------------------------------------- /app/utils.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import base64 3 | from loguru import logger 4 | from app.settings import CAPTCHA_RESIZED_IMAGE_FILE_PATH, CAPTCHA_TARGET_NAME_QUESTION_ID_MAPPING 5 | 6 | 7 | def resize_base64_image(filename, size): 8 | width, height = size 9 | img = Image.open(filename) 10 | new_img = img.resize((width, height)) 11 | new_img.save(CAPTCHA_RESIZED_IMAGE_FILE_PATH) 12 | with open(CAPTCHA_RESIZED_IMAGE_FILE_PATH, "rb") as f: 13 | data = f.read() 14 | encoded_string = base64.b64encode(data) 15 | return encoded_string.decode('utf-8') 16 | 17 | 18 | def get_question_id_by_target_name(target_name): 19 | logger.debug(f'try to get question id by {target_name}') 20 | question_id = CAPTCHA_TARGET_NAME_QUESTION_ID_MAPPING.get(target_name) 21 | logger.debug(f'question_id {question_id}') 22 | return question_id 23 | -------------------------------------------------------------------------------- /captcha_entire_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/RecaptchaResolver/55430eac27863870ace8fb126806785b95ab249f/captcha_entire_image.png -------------------------------------------------------------------------------- /captcha_resized_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/RecaptchaResolver/55430eac27863870ace8fb126806785b95ab249f/captcha_resized_image.png -------------------------------------------------------------------------------- /captcha_single_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/RecaptchaResolver/55430eac27863870ace8fb126806785b95ab249f/captcha_single_image.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | 2 | from app.settings import CAPTCHA_DEMO_URL 3 | from app.solution import Solution 4 | 5 | 6 | if __name__ == '__main__': 7 | Solution(CAPTCHA_DEMO_URL).resolve() 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | environs 2 | requests 3 | pillow 4 | selenium 5 | loguru -------------------------------------------------------------------------------- /resized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/RecaptchaResolver/55430eac27863870ace8fb126806785b95ab249f/resized.png --------------------------------------------------------------------------------