├── app
    ├── __init__.py
    ├── settings.py
    ├── utils.py
    ├── captcha_resolver.py
    └── solution.py
├── .gitignore
├── requirements.txt
├── main.py
└── README.md


/app/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .env
3 | *.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | environs
2 | requests
3 | pillow
4 | selenium
5 | loguru


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | 
2 | from app.settings import CAPTCHA_DEMO_URL
3 | from app.solution import Solution
4 | 
5 | 
6 | if __name__ == '__main__':
7 |     Solution(CAPTCHA_DEMO_URL).resolve()
8 | 


--------------------------------------------------------------------------------
/app/settings.py:
--------------------------------------------------------------------------------
 1 | from environs import Env
 2 | 
 3 | env = Env()
 4 | env.read_env()
 5 | 
 6 | CAPTCHA_RESOLVER_API_URL = 'https://api.yescaptcha.com/createTask'
 7 | CAPTCHA_RESOLVER_API_KEY = env.str('CAPTCHA_RESOLVER_API_KEY')
 8 | 
 9 | CAPTCHA_DEMO_URL = 'https://democaptcha.com/demo-form-eng/hcaptcha.html'
10 | 
11 | CAPTCHA_ENTIRE_IMAGE_FILE_PATH = 'captcha_entire_image.png'
12 | CAPTCHA_SINGLE_IMAGE_FILE_PATH = 'captcha_single_image_%s.png'
13 | CAPTCHA_RESIZED_IMAGE_FILE_PATH = 'captcha_resized_image.png'
14 | 


--------------------------------------------------------------------------------
/app/utils.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import base64
 3 | from app.settings import CAPTCHA_RESIZED_IMAGE_FILE_PATH
 4 | 
 5 | 
 6 | def resize_base64_image(filename, size):
 7 |     width, height = size
 8 |     img = Image.open(filename)
 9 |     new_img = img.resize((width, height))
10 |     new_img.save(CAPTCHA_RESIZED_IMAGE_FILE_PATH)
11 |     with open(CAPTCHA_RESIZED_IMAGE_FILE_PATH, "rb") as f:
12 |         data = f.read()
13 |         encoded_string = base64.b64encode(data)
14 |         return encoded_string.decode('utf-8')
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # HCaptchaResolver
 2 | 
 3 | HCaptcha Resolver
 4 | 
 5 | ## Usage
 6 | 
 7 | Clone this repo:
 8 | 
 9 | ```
10 | git clone https://github.com/Python3WebSpider/HCaptchaResolver.git
11 | ```
12 | 
13 | Then go to https://yescaptcha.com/i/CnZPBu and register your account, then get a `clientKey` from portal.
14 | 
15 | ![image](https://user-images.githubusercontent.com/8678661/170099424-bbe53c64-79b5-46fc-a7c9-95fc88877e3d.png)
16 | 
17 | Then create a `.env` file in root of this repo, and write this content:
18 | 
19 | ```
20 | CAPTCHA_RESOLVER_API_KEY=<Your Client Key>
21 | ```
22 | 
23 | Next, you need to install packages:
24 | 
25 | ```
26 | pip3 install -r requirements.txt
27 | ```
28 | 
29 | At last, run demo:
30 | 
31 | ```
32 | python3 main.py
33 | ```
34 | 
35 | Result:
36 | 
37 | ![image](https://user-images.githubusercontent.com/8678661/170533027-1f06daf2-ee73-4800-948e-851dc9f6a648.png)
38 | 


--------------------------------------------------------------------------------
/app/captcha_resolver.py:
--------------------------------------------------------------------------------
 1 | from loguru import logger
 2 | from app.settings import CAPTCHA_RESOLVER_API_KEY, CAPTCHA_RESOLVER_API_URL
 3 | import requests
 4 | 
 5 | 
 6 | class CaptchaResolver(object):
 7 | 
 8 |     def __init__(self, api_url=CAPTCHA_RESOLVER_API_URL, api_key=CAPTCHA_RESOLVER_API_KEY):
 9 |         self.api_url = api_url
10 |         self.api_key = api_key
11 | 
12 |     def create_task(self, queries, question):
13 |         logger.debug(f'start to recognize image for question {question}')
14 |         data = {
15 |             "clientKey": self.api_key,
16 |             "task": {
17 |                 "type": "HCaptchaClassification",
18 |                 "queries": queries,
19 |                 "question": question
20 |             },
21 |             "softID": 78
22 |         }
23 |         try:
24 |             response = requests.post(self.api_url, json=data)
25 |             result = response.json()
26 |             logger.debug(f'captcha recogize result {result}')
27 |             return result
28 |         except requests.RequestException:
29 |             logger.exception(
30 |                 'error occurred while recognizing captcha', exc_info=True)
31 | 


--------------------------------------------------------------------------------
/app/solution.py:
--------------------------------------------------------------------------------
  1 | from random import random
  2 | import re
  3 | from typing import List, Union
  4 | import requests
  5 | from selenium import webdriver
  6 | from selenium.webdriver.common.by import By
  7 | from selenium.webdriver.support import expected_conditions as EC
  8 | from selenium.webdriver.support.wait import WebDriverWait
  9 | from selenium.webdriver.remote.webelement import WebElement
 10 | import time
 11 | from loguru import logger
 12 | from app.captcha_resolver import CaptchaResolver
 13 | from app.settings import CAPTCHA_SINGLE_IMAGE_FILE_PATH
 14 | from app.utils import resize_base64_image
 15 | 
 16 | 
 17 | class Solution(object):
 18 |     def __init__(self, url):
 19 |         self.browser = webdriver.Chrome()
 20 |         self.browser.get(url)
 21 |         self.wait = WebDriverWait(self.browser, 10)
 22 |         self.captcha_resolver = CaptchaResolver()
 23 | 
 24 |     def __del__(self):
 25 |         time.sleep(10)
 26 |         self.browser.close()
 27 | 
 28 |     def get_captcha_entry_iframe(self) -> WebElement:
 29 |         self.browser.switch_to.default_content()
 30 |         captcha_entry_iframe = self.browser.find_element_by_css_selector(
 31 |             '.h-captcha > iframe')
 32 |         return captcha_entry_iframe
 33 | 
 34 |     def switch_to_captcha_entry_iframe(self) -> None:
 35 |         captcha_entry_iframe: WebElement = self.get_captcha_entry_iframe()
 36 |         self.browser.switch_to.frame(captcha_entry_iframe)
 37 | 
 38 |     def get_captcha_content_iframe(self) -> WebElement:
 39 |         self.browser.switch_to.default_content()
 40 |         captcha_content_iframe = self.browser.find_element_by_xpath(
 41 |             '//iframe[contains(@title, "Main content")]')
 42 |         return captcha_content_iframe
 43 | 
 44 |     def switch_to_captcha_content_iframe(self) -> None:
 45 |         captcha_content_iframe: WebElement = self.get_captcha_content_iframe()
 46 |         self.browser.switch_to.frame(captcha_content_iframe)
 47 | 
 48 |     def get_captcha_element(self) -> WebElement:
 49 |         captcha_element: WebElement = self.wait.until(EC.element_to_be_clickable(
 50 |             (By.CSS_SELECTOR, '.task-grid')))
 51 |         return captcha_element
 52 | 
 53 |     def trigger_captcha(self) -> None:
 54 |         self.switch_to_captcha_entry_iframe()
 55 |         captcha_entry = self.wait.until(EC.presence_of_element_located(
 56 |             (By.CSS_SELECTOR, '#anchor #checkbox')))
 57 |         captcha_entry.click()
 58 |         time.sleep(2)
 59 |         self.switch_to_captcha_content_iframe()
 60 |         captcha_element: WebElement = self.get_captcha_element()
 61 |         if captcha_element.is_displayed:
 62 |             logger.debug('trigged captcha successfully')
 63 | 
 64 |     def get_captcha_target_text(self) -> WebElement:
 65 |         captcha_target_name_element: WebElement = self.wait.until(EC.presence_of_element_located(
 66 |             (By.CSS_SELECTOR, '.prompt-text')))
 67 |         return captcha_target_name_element.text
 68 | 
 69 |     def get_verify_button(self) -> WebElement:
 70 |         verify_button = self.wait.until(EC.presence_of_element_located(
 71 |             (By.CSS_SELECTOR, '.button-submit')))
 72 |         return verify_button
 73 | 
 74 |     def get_is_successful(self):
 75 |         self.switch_to_captcha_entry_iframe()
 76 |         anchor: WebElement = self.wait.until(EC.visibility_of_element_located((
 77 |             By.CSS_SELECTOR, '#anchor #checkbox'
 78 |         )))
 79 |         checked = anchor.get_attribute('aria-checked')
 80 |         logger.debug(f'checked {checked}')
 81 |         return str(checked) == 'true'
 82 | 
 83 |     def verify_captcha(self):
 84 |         # get target text
 85 |         self.captcha_target_text = self.get_captcha_target_text()
 86 |         logger.debug(
 87 |             f'captcha_target_text {self.captcha_target_text}'
 88 |         )
 89 |         # extract all images
 90 |         single_captcha_elements = self.wait.until(EC.visibility_of_all_elements_located(
 91 |             (By.CSS_SELECTOR, '.task-image .image-wrapper .image')))
 92 |         resized_single_captcha_base64_strings = []
 93 |         for i, single_captcha_element in enumerate(single_captcha_elements):
 94 |             single_captcha_element_style = single_captcha_element.get_attribute(
 95 |                 'style')
 96 |             pattern = re.compile('url\("(https.*?)"\)')
 97 |             match_result = re.search(pattern, single_captcha_element_style)
 98 |             single_captcha_element_url = match_result.group(
 99 |                 1) if match_result else None
100 |             logger.debug(
101 |                 f'single_captcha_element_url {single_captcha_element_url}')
102 |             with open(CAPTCHA_SINGLE_IMAGE_FILE_PATH % (i,), 'wb') as f:
103 |                 f.write(requests.get(single_captcha_element_url).content)
104 |             resized_single_captcha_base64_string = resize_base64_image(
105 |                 CAPTCHA_SINGLE_IMAGE_FILE_PATH % (i,), (100, 100))
106 |             resized_single_captcha_base64_strings.append(
107 |                 resized_single_captcha_base64_string)
108 | 
109 |         logger.debug(
110 |             f'length of single_captcha_element_urls {len(resized_single_captcha_base64_strings)}')
111 | 
112 |         # try to verify using API
113 |         captcha_recognize_result = self.captcha_resolver.create_task(
114 |             resized_single_captcha_base64_strings,
115 |             self.captcha_target_text
116 |         )
117 |         if not captcha_recognize_result:
118 |             logger.error('count not get captcha recognize result')
119 |             return
120 |         recognized_results = captcha_recognize_result.get(
121 |             'solution', {}).get('objects')
122 | 
123 |         if not recognized_results:
124 |             logger.error('count not get captcha recognized indices')
125 |             return
126 | 
127 |         # click captchas
128 |         recognized_indices = [i for i, x in enumerate(recognized_results) if x]
129 |         logger.debug(f'recognized_indices {recognized_indices}')
130 |         click_targets = self.wait.until(EC.visibility_of_all_elements_located(
131 |             (By.CSS_SELECTOR, '.task-image')))
132 |         for recognized_index in recognized_indices:
133 |             click_target: WebElement = click_targets[recognized_index]
134 |             click_target.click()
135 |             time.sleep(random())
136 | 
137 |         # after all captcha clicked
138 |         verify_button: WebElement = self.get_verify_button()
139 |         if verify_button.is_displayed:
140 |             verify_button.click()
141 |             time.sleep(3)
142 | 
143 |         # check if succeed
144 |         is_succeed = self.get_is_successful()
145 |         if is_succeed:
146 |             logger.debug('verifed successfully')
147 |         else:
148 |             self.verify_captcha()
149 | 
150 |     def resolve(self):
151 |         self.trigger_captcha()
152 |         self.verify_captcha()
153 | 


--------------------------------------------------------------------------------