├── .gitignore
├── README.md
├── app
    ├── __init__.py
    ├── captcha_resolver.py
    ├── settings.py
    ├── solution.py
    └── utils.py
├── captcha_entire_image.png
├── captcha_resized_image.png
├── captcha_single_image.png
├── main.py
├── requirements.txt
└── resized.png


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .env
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RecaptchaResolver
 2 | 
 3 | Recaptcha Resolver
 4 | 
 5 | ## Usage
 6 | 
 7 | Clone this repo:
 8 | 
 9 | ```
10 | git clone https://github.com/Python3WebSpider/RecaptchaResolver.git
11 | ```
12 | 
13 | Then go to https://yescaptcha.com/i/CnZPBu and register your account, then get a `clientKey` from portal.
14 | 
15 | ![image](https://user-images.githubusercontent.com/8678661/170099424-bbe53c64-79b5-46fc-a7c9-95fc88877e3d.png)
16 | 
17 | Then create a `.env` file in root of this repo, and write this content:
18 | 
19 | ```
20 | CAPTCHA_RESOLVER_API_KEY=<Your Client Key>
21 | ```
22 | 
23 | Next, you need to install packages:
24 | 
25 | ```
26 | pip3 install -r requirements.txt
27 | ```
28 | 
29 | At last, run demo:
30 | 
31 | ```
32 | python3 main.py
33 | ```
34 | 
35 | Result:
36 | 
37 | ![image](https://user-images.githubusercontent.com/8678661/170100015-f08385e6-7990-4300-831d-b429cd7f9329.png)
38 | 
39 | 


--------------------------------------------------------------------------------
/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Python3WebSpider/RecaptchaResolver/55430eac27863870ace8fb126806785b95ab249f/app/__init__.py


--------------------------------------------------------------------------------
/app/captcha_resolver.py:
--------------------------------------------------------------------------------
 1 | from loguru import logger
 2 | from app.settings import CAPTCHA_RESOLVER_API_KEY, CAPTCHA_RESOLVER_API_URL
 3 | import requests
 4 | 
 5 | 
 6 | class CaptchaResolver(object):
 7 | 
 8 |     def __init__(self, api_url=CAPTCHA_RESOLVER_API_URL, api_key=CAPTCHA_RESOLVER_API_KEY):
 9 |         self.api_url = api_url
10 |         self.api_key = api_key
11 | 
12 |     def create_task(self, image_base64_string, question_id):
13 |         logger.debug(f'start to recognize image for question {question_id}')
14 |         data = {
15 |             "clientKey": self.api_key,
16 |             "task": {
17 |                 "type": "ReCaptchaV2Classification",
18 |                 "image": image_base64_string,
19 |                 "question": question_id,
20 |                 "softID": 78
21 |             }
22 |         }
23 |         try:
24 |             response = requests.post(self.api_url, json=data)
25 |             result = response.json()
26 |             logger.debug(f'captcha recogize result {result}')
27 |             return result
28 |         except requests.RequestException:
29 |             logger.exception(
30 |                 'error occurred while recognizing captcha', exc_info=True)
31 | 


--------------------------------------------------------------------------------
/app/settings.py:
--------------------------------------------------------------------------------
 1 | from environs import Env
 2 | 
 3 | env = Env()
 4 | env.read_env()
 5 | 
 6 | CAPTCHA_RESOLVER_API_URL = 'https://api.yescaptcha.com/createTask'
 7 | CAPTCHA_RESOLVER_API_KEY = env.str('CAPTCHA_RESOLVER_API_KEY')
 8 | 
 9 | CAPTCHA_DEMO_URL = 'https://www.google.com/recaptcha/api2/demo'
10 | 
11 | CAPTCHA_ENTIRE_IMAGE_FILE_PATH = 'captcha_entire_image.png'
12 | CAPTCHA_SINGLE_IMAGE_FILE_PATH = 'captcha_single_image.png'
13 | CAPTCHA_RESIZED_IMAGE_FILE_PATH = 'captcha_resized_image.png'
14 | 
15 | import json
16 | CAPTCHA_TARGET_NAME_QUESTION_ID_MAPPING = {
17 |     "taxis": "/m/0pg52",
18 |     "bus": "/m/01bjv",
19 |     "school bus": "/m/02yvhj",
20 |     "motorcycles": "/m/04_sv",
21 |     "tractors": "/m/013xlm",
22 |     "chimneys": "/m/01jk_4",
23 |     "crosswalks": "/m/014xcs",
24 |     "traffic lights": "/m/015qff",
25 |     "bicycles": "/m/0199g",
26 |     "parking meters": "/m/015qbp",
27 |     "cars": "/m/0k4j",
28 |     "vehicles": "/m/0k4j",
29 |     "bridges": "/m/015kr",
30 |     "boats": "/m/019jd",
31 |     "palm trees": "/m/0cdl1",
32 |     "mountains or hills": "/m/09d_r",
33 |     "fire hydrant": "/m/01pns0",
34 |     "fire hydrants": "/m/01pns0",
35 |     "a fire hydrant": "/m/01pns0",
36 |     "stairs": "/m/01lynh",
37 |     "出租车": "/m/0pg52",
38 |     "巴士": "/m/01bjv",
39 |     "摩托车": "/m/04_sv",
40 |     "机动车": "/m/0k4j",
41 |     "小轿车": "/m/0k4j",
42 |     "拖拉机": "/m/013xlm",
43 |     "烟囱": "/m/01jk_4",
44 |     "人行横道": "/m/014xcs",
45 |     "红绿灯": "/m/015qff",
46 |     "自行车": "/m/0199g",
47 |     "停车计价表": "/m/015qbp",
48 |     "汽车": "/m/0k4j",
49 |     "桥": "/m/015kr",
50 |     "船": "/m/019jd",
51 |     "棕榈树": "/m/0cdl1",
52 |     "山": "/m/09d_r",
53 |     "消防栓": "/m/01pns0",
54 |     "楼梯": "/m/01lynh",
55 |     "交通工具": "/m/0k4j",
56 |     "公交车": "/m/01bjv",
57 |     "彩色玻璃": "/m/011y23",
58 |     "火车站": "/m/0py27",
59 |     "消火栓": "/m/01pns0",
60 |     "过街人行道": "/m/014xcs",
61 |     "车库门": "/m/08l941",
62 |     "公交站": "/m/01jw_1",
63 |     "停车计时器": "/m/015qbp",
64 |     "丘陵": "/m/09d_r",
65 |     "车辆": "/m/0k4j",
66 |     "公共汽车": "/m/01bjv",
67 |     "交通灯": "/m/015qff",
68 |     "停车咪表": "/m/015qbp"
69 | }


--------------------------------------------------------------------------------
/app/solution.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Union
  2 | import requests
  3 | from selenium import webdriver
  4 | from selenium.webdriver.common.by import By
  5 | from selenium.webdriver.support import expected_conditions as EC
  6 | from selenium.webdriver.support.wait import WebDriverWait
  7 | from selenium.webdriver.remote.webelement import WebElement
  8 | import time
  9 | from loguru import logger
 10 | from app.captcha_resolver import CaptchaResolver
 11 | from app.settings import CAPTCHA_ENTIRE_IMAGE_FILE_PATH, CAPTCHA_SINGLE_IMAGE_FILE_PATH
 12 | from app.utils import get_question_id_by_target_name, resize_base64_image
 13 | 
 14 | 
 15 | class Solution(object):
 16 |     def __init__(self, url):
 17 |         self.browser = webdriver.Chrome()
 18 |         self.browser.get(url)
 19 |         self.wait = WebDriverWait(self.browser, 10)
 20 |         self.captcha_resolver = CaptchaResolver()
 21 | 
 22 |     def __del__(self):
 23 |         time.sleep(10)
 24 |         self.browser.close()
 25 | 
 26 |     def get_all_frames(self) -> List[WebElement]:
 27 |         self.browser.switch_to.default_content()
 28 |         return self.browser.find_elements_by_tag_name('iframe')
 29 | 
 30 |     def get_captcha_entry_iframe(self) -> WebElement:
 31 |         self.browser.switch_to.default_content()
 32 |         captcha_entry_iframe = self.browser.find_element_by_css_selector(
 33 |             'iframe[title="reCAPTCHA"]')
 34 |         return captcha_entry_iframe
 35 | 
 36 |     def switch_to_captcha_entry_iframe(self) -> None:
 37 |         captcha_entry_iframe: WebElement = self.get_captcha_entry_iframe()
 38 |         self.browser.switch_to.frame(captcha_entry_iframe)
 39 | 
 40 |     def get_captcha_content_iframe(self) -> WebElement:
 41 |         self.browser.switch_to.default_content()
 42 |         captcha_content_iframe = self.browser.find_element_by_css_selector(
 43 |             'iframe[src*="bframe?"]')
 44 |         return captcha_content_iframe
 45 | 
 46 |     def switch_to_captcha_content_iframe(self) -> None:
 47 |         captcha_content_iframe: WebElement = self.get_captcha_content_iframe()
 48 |         self.browser.switch_to.frame(captcha_content_iframe)
 49 | 
 50 |     def get_entire_captcha_element(self) -> WebElement:
 51 |         entire_captcha_element: WebElement = self.wait.until(EC.element_to_be_clickable(
 52 |             (By.CSS_SELECTOR, '#rc-imageselect-target')))
 53 |         return entire_captcha_element
 54 | 
 55 |     def get_entire_captcha_natural_width(self) -> Union[int, None]:
 56 |         result = self.browser.execute_script(
 57 |             "return document.querySelector('div.rc-image-tile-wrapper > img').naturalWidth")
 58 |         if result:
 59 |             return int(result)
 60 |         return None
 61 | 
 62 |     def get_entire_captcha_display_width(self) -> Union[int, None]:
 63 |         entire_captcha_element = self.get_entire_captcha_element()
 64 |         if entire_captcha_element:
 65 |             return entire_captcha_element.rect.get('width')
 66 |         return None
 67 | 
 68 |     def trigger_captcha(self) -> None:
 69 |         self.switch_to_captcha_entry_iframe()
 70 |         captcha_entry = self.wait.until(EC.presence_of_element_located(
 71 |             (By.ID, 'recaptcha-anchor')))
 72 |         captcha_entry.click()
 73 |         time.sleep(2)
 74 |         self.switch_to_captcha_content_iframe()
 75 |         entire_captcha_element: WebElement = self.get_entire_captcha_element()
 76 |         if entire_captcha_element.is_displayed:
 77 |             logger.debug('trigged captcha successfully')
 78 | 
 79 |     def get_captcha_target_name(self) -> WebElement:
 80 |         captcha_target_name_element: WebElement = self.wait.until(EC.presence_of_element_located(
 81 |             (By.CSS_SELECTOR, '.rc-imageselect-desc-wrapper strong')))
 82 |         return captcha_target_name_element.text
 83 | 
 84 |     def get_verify_button(self) -> WebElement:
 85 |         verify_button = self.wait.until(EC.presence_of_element_located(
 86 |             (By.CSS_SELECTOR, '#recaptcha-verify-button')))
 87 |         return verify_button
 88 | 
 89 |     def verify_single_captcha(self, index):
 90 |         time.sleep(3)
 91 |         elements = self.wait.until(EC.visibility_of_all_elements_located(
 92 |             (By.CSS_SELECTOR, '#rc-imageselect-target table td')))
 93 |         single_captcha_element: WebElement = elements[index]
 94 |         class_name = single_captcha_element.get_attribute('class')
 95 |         logger.debug(f'verifiying single captcha {index}, class {class_name}')
 96 |         if 'selected' in class_name:
 97 |             logger.debug(f'no new single captcha displayed')
 98 |             return
 99 |         logger.debug('new single captcha displayed')
100 |         single_captcha_url = single_captcha_element.find_element_by_css_selector(
101 |             'img').get_attribute('src')
102 |         logger.debug(f'single_captcha_url {single_captcha_url}')
103 |         with open(CAPTCHA_SINGLE_IMAGE_FILE_PATH, 'wb') as f:
104 |             f.write(requests.get(single_captcha_url).content)
105 |         resized_single_captcha_base64_string = resize_base64_image(
106 |             CAPTCHA_SINGLE_IMAGE_FILE_PATH, (100, 100))
107 |         single_captcha_recognize_result = self.captcha_resolver.create_task(
108 |             resized_single_captcha_base64_string, get_question_id_by_target_name(self.captcha_target_name))
109 |         if not single_captcha_recognize_result:
110 |             logger.error('count not get single captcha recognize result')
111 |             return
112 |         has_object = single_captcha_recognize_result.get(
113 |             'solution', {}).get('hasObject')
114 |         if has_object is None:
115 |             logger.error('count not get captcha recognized indices')
116 |             return
117 |         if has_object is False:
118 |             logger.debug('no more object in this single captcha')
119 |             return
120 |         if has_object:
121 |             single_captcha_element.click()
122 |             # check for new single captcha
123 |             self.verify_single_captcha(index)
124 | 
125 |     def get_verify_error_info(self):
126 |         self.switch_to_captcha_content_iframe()
127 |         self.browser.execute_script(
128 |             "return document.querySelector('div.rc-imageselect-incorrect-response')?.text")
129 | 
130 |     def get_is_successful(self):
131 |         self.switch_to_captcha_entry_iframe()
132 |         anchor: WebElement = self.wait.until(EC.visibility_of_element_located((
133 |             By.ID, 'recaptcha-anchor'
134 |         )))
135 |         checked = anchor.get_attribute('aria-checked')
136 |         logger.debug(f'checked {checked}')
137 |         return str(checked) == 'true'
138 | 
139 |     def get_is_failed(self):
140 |         return bool(self.get_verify_error_info())
141 | 
142 |     def verify_entire_captcha(self):
143 |         self.entire_captcha_natural_width = self.get_entire_captcha_natural_width()
144 |         logger.debug(
145 |             f'entire_captcha_natural_width {self.entire_captcha_natural_width}'
146 |         )
147 |         self.captcha_target_name = self.get_captcha_target_name()
148 |         logger.debug(
149 |             f'captcha_target_name {self.captcha_target_name}'
150 |         )
151 |         entire_captcha_element: WebElement = self.get_entire_captcha_element()
152 |         entire_captcha_url = entire_captcha_element.find_element_by_css_selector(
153 |             'td img').get_attribute('src')
154 |         logger.debug(f'entire_captcha_url {entire_captcha_url}')
155 |         with open(CAPTCHA_ENTIRE_IMAGE_FILE_PATH, 'wb') as f:
156 |             f.write(requests.get(entire_captcha_url).content)
157 |         logger.debug(
158 |             f'saved entire captcha to {CAPTCHA_ENTIRE_IMAGE_FILE_PATH}')
159 |         resized_entire_captcha_base64_string = resize_base64_image(
160 |             CAPTCHA_ENTIRE_IMAGE_FILE_PATH, (self.entire_captcha_natural_width,
161 |                                              self.entire_captcha_natural_width))
162 |         logger.debug(
163 |             f'resized_entire_captcha_base64_string, {resized_entire_captcha_base64_string[0:100]}...')
164 |         entire_captcha_recognize_result = self.captcha_resolver.create_task(
165 |             resized_entire_captcha_base64_string,
166 |             get_question_id_by_target_name(self.captcha_target_name)
167 |         )
168 |         if not entire_captcha_recognize_result:
169 |             logger.error('count not get captcha recognize result')
170 |             return
171 |         recognized_indices = entire_captcha_recognize_result.get(
172 |             'solution', {}).get('objects')
173 |         if not recognized_indices:
174 |             logger.error('count not get captcha recognized indices')
175 |             return
176 |         single_captcha_elements = self.wait.until(EC.visibility_of_all_elements_located(
177 |             (By.CSS_SELECTOR, '#rc-imageselect-target table td')))
178 |         for recognized_index in recognized_indices:
179 |             single_captcha_element: WebElement = single_captcha_elements[recognized_index]
180 |             single_captcha_element.click()
181 |             # check if need verify single captcha
182 |             self.verify_single_captcha(recognized_index)
183 | 
184 |         # after all captcha clicked
185 |         verify_button: WebElement = self.get_verify_button()
186 |         if verify_button.is_displayed:
187 |             verify_button.click()
188 |             time.sleep(3)
189 | 
190 |         is_succeed = self.get_is_successful()
191 |         if is_succeed:
192 |             logger.debug('verifed successfully')
193 |         else:
194 |             verify_error_info = self.get_verify_error_info()
195 |             logger.debug(f'verify_error_info {verify_error_info}')
196 |             self.verify_entire_captcha()
197 | 
198 |     def resolve(self):
199 |         self.trigger_captcha()
200 |         self.verify_entire_captcha()
201 | 


--------------------------------------------------------------------------------
/app/utils.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import base64
 3 | from loguru import logger
 4 | from app.settings import CAPTCHA_RESIZED_IMAGE_FILE_PATH, CAPTCHA_TARGET_NAME_QUESTION_ID_MAPPING
 5 | 
 6 | 
 7 | def resize_base64_image(filename, size):
 8 |     width, height = size
 9 |     img = Image.open(filename)
10 |     new_img = img.resize((width, height))
11 |     new_img.save(CAPTCHA_RESIZED_IMAGE_FILE_PATH)
12 |     with open(CAPTCHA_RESIZED_IMAGE_FILE_PATH, "rb") as f:
13 |         data = f.read()
14 |         encoded_string = base64.b64encode(data)
15 |         return encoded_string.decode('utf-8')
16 | 
17 | 
18 | def get_question_id_by_target_name(target_name):
19 |     logger.debug(f'try to get question id by {target_name}')
20 |     question_id = CAPTCHA_TARGET_NAME_QUESTION_ID_MAPPING.get(target_name)
21 |     logger.debug(f'question_id {question_id}')
22 |     return question_id
23 | 


--------------------------------------------------------------------------------
/captcha_entire_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Python3WebSpider/RecaptchaResolver/55430eac27863870ace8fb126806785b95ab249f/captcha_entire_image.png


--------------------------------------------------------------------------------
/captcha_resized_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Python3WebSpider/RecaptchaResolver/55430eac27863870ace8fb126806785b95ab249f/captcha_resized_image.png


--------------------------------------------------------------------------------
/captcha_single_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Python3WebSpider/RecaptchaResolver/55430eac27863870ace8fb126806785b95ab249f/captcha_single_image.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | 
2 | from app.settings import CAPTCHA_DEMO_URL
3 | from app.solution import Solution
4 | 
5 | 
6 | if __name__ == '__main__':
7 |     Solution(CAPTCHA_DEMO_URL).resolve()
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | environs
2 | requests
3 | pillow
4 | selenium
5 | loguru


--------------------------------------------------------------------------------
/resized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Python3WebSpider/RecaptchaResolver/55430eac27863870ace8fb126806785b95ab249f/resized.png


--------------------------------------------------------------------------------