├── app ├── __init__.py ├── settings.py ├── utils.py ├── captcha_resolver.py └── solution.py ├── .gitignore ├── requirements.txt ├── main.py └── README.md /app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .env 3 | *.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | environs 2 | requests 3 | pillow 4 | selenium 5 | loguru -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | 2 | from app.settings import CAPTCHA_DEMO_URL 3 | from app.solution import Solution 4 | 5 | 6 | if __name__ == '__main__': 7 | Solution(CAPTCHA_DEMO_URL).resolve() 8 | -------------------------------------------------------------------------------- /app/settings.py: -------------------------------------------------------------------------------- 1 | from environs import Env 2 | 3 | env = Env() 4 | env.read_env() 5 | 6 | CAPTCHA_RESOLVER_API_URL = 'https://api.yescaptcha.com/createTask' 7 | CAPTCHA_RESOLVER_API_KEY = env.str('CAPTCHA_RESOLVER_API_KEY') 8 | 9 | CAPTCHA_DEMO_URL = 'https://democaptcha.com/demo-form-eng/hcaptcha.html' 10 | 11 | CAPTCHA_ENTIRE_IMAGE_FILE_PATH = 'captcha_entire_image.png' 12 | CAPTCHA_SINGLE_IMAGE_FILE_PATH = 'captcha_single_image_%s.png' 13 | CAPTCHA_RESIZED_IMAGE_FILE_PATH = 'captcha_resized_image.png' 14 | -------------------------------------------------------------------------------- /app/utils.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import base64 3 | from app.settings import CAPTCHA_RESIZED_IMAGE_FILE_PATH 4 | 5 | 6 | def resize_base64_image(filename, size): 7 | width, height = size 8 | img = Image.open(filename) 9 | new_img = img.resize((width, height)) 10 | new_img.save(CAPTCHA_RESIZED_IMAGE_FILE_PATH) 11 | with open(CAPTCHA_RESIZED_IMAGE_FILE_PATH, "rb") as f: 12 | data = f.read() 13 | encoded_string = base64.b64encode(data) 14 | return encoded_string.decode('utf-8') 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HCaptchaResolver 2 | 3 | HCaptcha Resolver 4 | 5 | ## Usage 6 | 7 | Clone this repo: 8 | 9 | ``` 10 | git clone https://github.com/Python3WebSpider/HCaptchaResolver.git 11 | ``` 12 | 13 | Then go to https://yescaptcha.com/i/CnZPBu and register your account, then get a `clientKey` from portal. 14 | 15 | ![image](https://user-images.githubusercontent.com/8678661/170099424-bbe53c64-79b5-46fc-a7c9-95fc88877e3d.png) 16 | 17 | Then create a `.env` file in root of this repo, and write this content: 18 | 19 | ``` 20 | CAPTCHA_RESOLVER_API_KEY= 21 | ``` 22 | 23 | Next, you need to install packages: 24 | 25 | ``` 26 | pip3 install -r requirements.txt 27 | ``` 28 | 29 | At last, run demo: 30 | 31 | ``` 32 | python3 main.py 33 | ``` 34 | 35 | Result: 36 | 37 | ![image](https://user-images.githubusercontent.com/8678661/170533027-1f06daf2-ee73-4800-948e-851dc9f6a648.png) 38 | -------------------------------------------------------------------------------- /app/captcha_resolver.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | from app.settings import CAPTCHA_RESOLVER_API_KEY, CAPTCHA_RESOLVER_API_URL 3 | import requests 4 | 5 | 6 | class CaptchaResolver(object): 7 | 8 | def __init__(self, api_url=CAPTCHA_RESOLVER_API_URL, api_key=CAPTCHA_RESOLVER_API_KEY): 9 | self.api_url = api_url 10 | self.api_key = api_key 11 | 12 | def create_task(self, queries, question): 13 | logger.debug(f'start to recognize image for question {question}') 14 | data = { 15 | "clientKey": self.api_key, 16 | "task": { 17 | "type": "HCaptchaClassification", 18 | "queries": queries, 19 | "question": question 20 | }, 21 | "softID": 78 22 | } 23 | try: 24 | response = requests.post(self.api_url, json=data) 25 | result = response.json() 26 | logger.debug(f'captcha recogize result {result}') 27 | return result 28 | except requests.RequestException: 29 | logger.exception( 30 | 'error occurred while recognizing captcha', exc_info=True) 31 | -------------------------------------------------------------------------------- /app/solution.py: -------------------------------------------------------------------------------- 1 | from random import random 2 | import re 3 | from typing import List, Union 4 | import requests 5 | from selenium import webdriver 6 | from selenium.webdriver.common.by import By 7 | from selenium.webdriver.support import expected_conditions as EC 8 | from selenium.webdriver.support.wait import WebDriverWait 9 | from selenium.webdriver.remote.webelement import WebElement 10 | import time 11 | from loguru import logger 12 | from app.captcha_resolver import CaptchaResolver 13 | from app.settings import CAPTCHA_SINGLE_IMAGE_FILE_PATH 14 | from app.utils import resize_base64_image 15 | 16 | 17 | class Solution(object): 18 | def __init__(self, url): 19 | self.browser = webdriver.Chrome() 20 | self.browser.get(url) 21 | self.wait = WebDriverWait(self.browser, 10) 22 | self.captcha_resolver = CaptchaResolver() 23 | 24 | def __del__(self): 25 | time.sleep(10) 26 | self.browser.close() 27 | 28 | def get_captcha_entry_iframe(self) -> WebElement: 29 | self.browser.switch_to.default_content() 30 | captcha_entry_iframe = self.browser.find_element_by_css_selector( 31 | '.h-captcha > iframe') 32 | return captcha_entry_iframe 33 | 34 | def switch_to_captcha_entry_iframe(self) -> None: 35 | captcha_entry_iframe: WebElement = self.get_captcha_entry_iframe() 36 | self.browser.switch_to.frame(captcha_entry_iframe) 37 | 38 | def get_captcha_content_iframe(self) -> WebElement: 39 | self.browser.switch_to.default_content() 40 | captcha_content_iframe = self.browser.find_element_by_xpath( 41 | '//iframe[contains(@title, "Main content")]') 42 | return captcha_content_iframe 43 | 44 | def switch_to_captcha_content_iframe(self) -> None: 45 | captcha_content_iframe: WebElement = self.get_captcha_content_iframe() 46 | self.browser.switch_to.frame(captcha_content_iframe) 47 | 48 | def get_captcha_element(self) -> WebElement: 49 | captcha_element: WebElement = self.wait.until(EC.element_to_be_clickable( 50 | (By.CSS_SELECTOR, '.task-grid'))) 51 | return captcha_element 52 | 53 | def trigger_captcha(self) -> None: 54 | self.switch_to_captcha_entry_iframe() 55 | captcha_entry = self.wait.until(EC.presence_of_element_located( 56 | (By.CSS_SELECTOR, '#anchor #checkbox'))) 57 | captcha_entry.click() 58 | time.sleep(2) 59 | self.switch_to_captcha_content_iframe() 60 | captcha_element: WebElement = self.get_captcha_element() 61 | if captcha_element.is_displayed: 62 | logger.debug('trigged captcha successfully') 63 | 64 | def get_captcha_target_text(self) -> WebElement: 65 | captcha_target_name_element: WebElement = self.wait.until(EC.presence_of_element_located( 66 | (By.CSS_SELECTOR, '.prompt-text'))) 67 | return captcha_target_name_element.text 68 | 69 | def get_verify_button(self) -> WebElement: 70 | verify_button = self.wait.until(EC.presence_of_element_located( 71 | (By.CSS_SELECTOR, '.button-submit'))) 72 | return verify_button 73 | 74 | def get_is_successful(self): 75 | self.switch_to_captcha_entry_iframe() 76 | anchor: WebElement = self.wait.until(EC.visibility_of_element_located(( 77 | By.CSS_SELECTOR, '#anchor #checkbox' 78 | ))) 79 | checked = anchor.get_attribute('aria-checked') 80 | logger.debug(f'checked {checked}') 81 | return str(checked) == 'true' 82 | 83 | def verify_captcha(self): 84 | # get target text 85 | self.captcha_target_text = self.get_captcha_target_text() 86 | logger.debug( 87 | f'captcha_target_text {self.captcha_target_text}' 88 | ) 89 | # extract all images 90 | single_captcha_elements = self.wait.until(EC.visibility_of_all_elements_located( 91 | (By.CSS_SELECTOR, '.task-image .image-wrapper .image'))) 92 | resized_single_captcha_base64_strings = [] 93 | for i, single_captcha_element in enumerate(single_captcha_elements): 94 | single_captcha_element_style = single_captcha_element.get_attribute( 95 | 'style') 96 | pattern = re.compile('url\("(https.*?)"\)') 97 | match_result = re.search(pattern, single_captcha_element_style) 98 | single_captcha_element_url = match_result.group( 99 | 1) if match_result else None 100 | logger.debug( 101 | f'single_captcha_element_url {single_captcha_element_url}') 102 | with open(CAPTCHA_SINGLE_IMAGE_FILE_PATH % (i,), 'wb') as f: 103 | f.write(requests.get(single_captcha_element_url).content) 104 | resized_single_captcha_base64_string = resize_base64_image( 105 | CAPTCHA_SINGLE_IMAGE_FILE_PATH % (i,), (100, 100)) 106 | resized_single_captcha_base64_strings.append( 107 | resized_single_captcha_base64_string) 108 | 109 | logger.debug( 110 | f'length of single_captcha_element_urls {len(resized_single_captcha_base64_strings)}') 111 | 112 | # try to verify using API 113 | captcha_recognize_result = self.captcha_resolver.create_task( 114 | resized_single_captcha_base64_strings, 115 | self.captcha_target_text 116 | ) 117 | if not captcha_recognize_result: 118 | logger.error('count not get captcha recognize result') 119 | return 120 | recognized_results = captcha_recognize_result.get( 121 | 'solution', {}).get('objects') 122 | 123 | if not recognized_results: 124 | logger.error('count not get captcha recognized indices') 125 | return 126 | 127 | # click captchas 128 | recognized_indices = [i for i, x in enumerate(recognized_results) if x] 129 | logger.debug(f'recognized_indices {recognized_indices}') 130 | click_targets = self.wait.until(EC.visibility_of_all_elements_located( 131 | (By.CSS_SELECTOR, '.task-image'))) 132 | for recognized_index in recognized_indices: 133 | click_target: WebElement = click_targets[recognized_index] 134 | click_target.click() 135 | time.sleep(random()) 136 | 137 | # after all captcha clicked 138 | verify_button: WebElement = self.get_verify_button() 139 | if verify_button.is_displayed: 140 | verify_button.click() 141 | time.sleep(3) 142 | 143 | # check if succeed 144 | is_succeed = self.get_is_successful() 145 | if is_succeed: 146 | logger.debug('verifed successfully') 147 | else: 148 | self.verify_captcha() 149 | 150 | def resolve(self): 151 | self.trigger_captcha() 152 | self.verify_captcha() 153 | --------------------------------------------------------------------------------