├── .gitignore ├── LICENSE ├── README.md ├── hcaptcha ├── __init__.py ├── agents │ ├── __init__.py │ ├── base.py │ └── chrome.py ├── challenges.py ├── constants.py ├── curves │ ├── __init__.py │ ├── _beziercurve.py │ ├── _utils.py │ └── humancurve.py ├── exceptions.py ├── models.py ├── proofs │ ├── __init__.py │ ├── hsl.py │ └── hsw.py ├── startup.py ├── structures.py └── utils.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .nox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # IPython 77 | profile_default/ 78 | ipython_config.py 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | .dmypy.json 111 | dmypy.json 112 | 113 | # Pyre type checker 114 | .pyre/ 115 | 116 | js 117 | hcaptcha-js 118 | test.py 119 | example.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 h0nda 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # py-hcaptcha 2 | An unofficial wrapper for interacting with hCaptcha challenges. 3 | 4 | * Not an automatic solver (yet). 5 | * Device must be running Windows with Google Chrome installed. 6 | * Some parts of the code need to be changed frequently and therefore may be written in a sloppy way. 7 | 8 | # Install 9 | ```bash 10 | pip install git+https://github.com/h0nde/py-hcaptcha 11 | ``` 12 | 13 | # Usage 14 | ```python 15 | import hcaptcha 16 | 17 | ch = hcaptcha.Challenge( 18 | site_key="f5561ba9-8f1e-40ca-9b5b-a0b3f719ef34", 19 | site_url="https://discord.com/", 20 | #http_proxy="user:pass@127.0.0.1:8888", 21 | #ssl_context=__import__("ssl")._create_unverified_context(), 22 | timeout=5 23 | ) 24 | 25 | print(ch.question["en"]) 26 | 27 | for tile in ch: 28 | image = tile.get_image(raw=False) 29 | image.show() 30 | if input("answer (y/n): ").lower() == "y": 31 | ch.answer(tile) 32 | 33 | try: 34 | token = ch.submit() 35 | print(token) 36 | except hcaptcha.ChallengeError as err: 37 | print(err) 38 | ``` 39 | -------------------------------------------------------------------------------- /hcaptcha/__init__.py: -------------------------------------------------------------------------------- 1 | from . import startup 2 | from .challenges import Challenge 3 | from .exceptions import ChallengeError, RequestRejected -------------------------------------------------------------------------------- /hcaptcha/agents/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Agent 2 | from .chrome import ChromeAgent 3 | from random import choice 4 | 5 | def random_agent(): 6 | agent_types = (ChromeAgent,) 7 | agent_type = choice(agent_types) 8 | agent = agent_type() 9 | return agent -------------------------------------------------------------------------------- /hcaptcha/agents/base.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | class Agent: 4 | user_agent: str 5 | header_order: dict 6 | 7 | def __init__(self): 8 | self._epoch_delta = 0 9 | 10 | def get_screen_properties(self) -> dict: 11 | return 12 | 13 | def get_navigator_properties(self) -> dict: 14 | return 15 | 16 | def epoch(self, ms: bool = True) -> float: 17 | return 18 | 19 | def epoch_travel(self, delta: float, ms: bool = True): 20 | return 21 | 22 | def epoch_wait(self): 23 | return 24 | 25 | def json_encode(self, data: Literal): 26 | return 27 | 28 | def url_encode(self, data: dict) -> str: 29 | return 30 | 31 | def format_headers( 32 | self, 33 | url: str, 34 | headers: dict = {}, 35 | origin_url: str = None, 36 | sec_site: str = "cross-site", 37 | sec_mode: str = "cors", 38 | sec_dest: str = "empty" 39 | ) -> dict: 40 | return -------------------------------------------------------------------------------- /hcaptcha/agents/chrome.py: -------------------------------------------------------------------------------- 1 | from .base import Agent 2 | from typing import Literal 3 | from urllib.parse import urlsplit, urlencode 4 | from http.client import HTTPSConnection 5 | import random 6 | import time 7 | import json 8 | 9 | def latest_chrome_agent(): 10 | conn = HTTPSConnection("jnrbsn.github.io", 443) 11 | conn.request("GET", "/user-agents/user-agents.json") 12 | data = json.loads(conn.getresponse().read()) 13 | return data[0] 14 | 15 | class ChromeAgent(Agent): 16 | user_agent = latest_chrome_agent() 17 | header_order = { 18 | "host": 0, 19 | "connection": 1, 20 | "content-length": 2.1, 21 | "sec-ch-ua": 2, 22 | "cache-control": 3, 23 | "content-type": 4, 24 | "sec-ch-ua-mobile": 5, 25 | "user-agent": 6, 26 | "sec-ch-ua-platform": 7, 27 | "accept": 8, 28 | "origin": 9, 29 | "sec-fetch-site": 10, 30 | "sec-fetch-mode": 11, 31 | "sec-fetch-dest": 12, 32 | "referer": 13, 33 | "accept-encoding": 14, 34 | "accept-language": 15 35 | } 36 | chrome_version = user_agent.split("Chrome/", 1)[1].split(" ", 1)[0] 37 | 38 | def __init__(self): 39 | super().__init__() 40 | self.screen_size, self.avail_screen_size = random.choice([ 41 | ((2560, 1440), (2560, 1400)) 42 | ]) 43 | self.cpu_count = random.choice([2, 4, 8, 16]) 44 | self.memory_gb = random.choice([2, 4, 8, 16]) 45 | 46 | def get_screen_properties(self): 47 | return { 48 | "availWidth": self.avail_screen_size[0], 49 | "availHeight": self.avail_screen_size[1], 50 | "width": self.screen_size[0], 51 | "height": self.screen_size[1], 52 | "colorDepth": 24, 53 | "pixelDepth": 24, 54 | "availLeft": 0, 55 | "availTop": 0 56 | } 57 | 58 | def get_navigator_properties(self): 59 | return { 60 | "vendorSub": "", 61 | "productSub": "20030107", 62 | "vendor": "Google Inc.", 63 | "maxTouchPoints": 0, 64 | "userActivation": {}, 65 | "doNotTrack": "1", 66 | "geolocation": {}, 67 | "connection": {}, 68 | "webkitTemporaryStorage": {}, 69 | "webkitPersistentStorage": {}, 70 | "hardwareConcurrency": self.cpu_count, 71 | "cookieEnabled": True, 72 | "appCodeName": "Mozilla", 73 | "appName": "Netscape", 74 | "appVersion": f"5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{self.chrome_version} Safari/537.36", 75 | "platform": "Win32", 76 | "product": "Gecko", 77 | "userAgent": f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{self.chrome_version} Safari/537.36", 78 | "language": "en-US", 79 | "languages": ["en-US"], 80 | "onLine": True, 81 | "webdriver": False, 82 | "pdfViewerEnabled": True, 83 | "scheduling": {}, 84 | "bluetooth": {}, 85 | "clipboard": {}, 86 | "credentials": {}, 87 | "keyboard": {}, 88 | "managed": {}, 89 | "mediaDevices": {}, 90 | "storage": {}, 91 | "serviceWorker": {}, 92 | "wakeLock": {}, 93 | "deviceMemory": self.memory_gb, 94 | "ink": {}, 95 | "hid": {}, 96 | "locks": {}, 97 | "mediaCapabilities": {}, 98 | "mediaSession": {}, 99 | "permissions": {}, 100 | "presentation": {}, 101 | "serial": {}, 102 | "virtualKeyboard": {}, 103 | "usb": {}, 104 | "xr": {}, 105 | "userAgentData": { 106 | "brands": [ 107 | {"brand": "Chromium", "version": self.chrome_version.split(".", 1)[0]}, 108 | {"brand": "Google Chrome", "version": self.chrome_version.split(".", 1)[0]}, 109 | {"brand": ";Not A Brand", "version": "99"} 110 | ], 111 | "mobile": False 112 | }, 113 | "plugins": [ 114 | "internal-pdf-viewer", 115 | "internal-pdf-viewer", 116 | "internal-pdf-viewer", 117 | "internal-pdf-viewer", 118 | "internal-pdf-viewer" 119 | ] 120 | } 121 | 122 | def epoch(self, ms: int = True): 123 | t = time.time() * 1000 124 | t += self._epoch_delta 125 | if not ms: t /= 1000 126 | return int(t) 127 | 128 | def epoch_travel(self, delta: float, ms: bool = True): 129 | if not ms: delta *= 1000 130 | self._epoch_delta += delta 131 | 132 | def epoch_wait(self): 133 | time.sleep(self._epoch_delta/1000) 134 | self._epoch_delta = 0 135 | 136 | def json_encode(self, data: Literal) -> str: 137 | return json.dumps(data, separators=(",", ":")) 138 | 139 | def url_encode(self, data: dict) -> str: 140 | return urlencode(data) 141 | 142 | def format_headers( 143 | self, 144 | url: str, 145 | body: bytes = None, 146 | headers: dict = {}, 147 | origin_url: str = None, 148 | sec_site: str = "cross-site", 149 | sec_mode: str = "cors", 150 | sec_dest: str = "empty" 151 | ) -> dict: 152 | p_url = urlsplit(url) 153 | p_origin_url = urlsplit(origin_url) if origin_url else None 154 | 155 | headers["Host"] = p_url.hostname 156 | headers["Connection"] = "keep-alive" 157 | headers["sec-ch-ua"] = f'"Chromium";v="{self.chrome_version.split(".", 1)[0]}", "Google Chrome";v="{self.chrome_version.split(".", 1)[0]}", ";Not A Brand";v="99"' 158 | headers["sec-ch-ua-mobile"] = "?0" 159 | headers["User-Agent"] = self.user_agent 160 | headers["sec-ch-ua-platform"] = '"Windows"' 161 | headers.setdefault("Accept", "*/*") 162 | headers["Accept-Encoding"] = "gzip, deflate, br" 163 | headers["Accept-Language"] = "en-US,en;q=0.9" 164 | 165 | if body is not None: 166 | headers["Content-Length"] = str(len(body)) 167 | 168 | headers["Sec-Fetch-Site"] = sec_site 169 | headers["Sec-Fetch-Mode"] = sec_mode 170 | headers["Sec-Fetch-Dest"] = sec_dest 171 | 172 | if sec_mode == "navigate": 173 | headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" 174 | if sec_site == "same-origin" and origin_url: 175 | headers["Referer"] = p_origin_url.scheme + "://" + p_origin_url.hostname + p_origin_url.path + (("?" + p_origin_url.query) if p_origin_url.query else "") 176 | elif origin_url: 177 | headers["Referer"] = p_origin_url.scheme + "://" + p_origin_url.hostname + p_origin_url.path + "/" 178 | 179 | elif sec_mode == "cors" and origin_url: 180 | headers["Origin"] = p_origin_url.scheme + "://" + p_origin_url.hostname 181 | headers["Referer"] = p_origin_url.scheme + "://" + p_origin_url.hostname + p_origin_url.path + (("?" + p_origin_url.query) if p_origin_url.query else "") 182 | 183 | elif sec_mode == "no-cors" and origin_url: 184 | headers["Origin"] = p_origin_url.scheme + "://" + p_origin_url.hostname 185 | headers["Referer"] = p_origin_url.scheme + "://" + p_origin_url.hostname + p_origin_url.path + (("?" + p_origin_url.query) if p_origin_url.query else "") 186 | 187 | headers = dict(sorted( 188 | headers.items(), 189 | key=lambda x: self.header_order.get(x[0].lower(), 9999) 190 | )) 191 | return headers -------------------------------------------------------------------------------- /hcaptcha/challenges.py: -------------------------------------------------------------------------------- 1 | from .constants import * 2 | from .agents import Agent, random_agent 3 | from .models import Tile 4 | from .structures import EventRecorder 5 | from .proofs import get_proof 6 | from .curves import gen_mouse_move 7 | from .utils import parse_proxy_string, random_widget_id, latest_version_id 8 | from .exceptions import * 9 | from typing import Iterator, List 10 | from random import randint 11 | from urllib.parse import urlsplit 12 | from http.client import HTTPSConnection 13 | import json 14 | import ssl 15 | import zlib 16 | 17 | class Challenge: 18 | _version_id = latest_version_id() 19 | _default_ssl_context = ssl.create_default_context() 20 | 21 | id: str 22 | token: str 23 | config: dict 24 | mode: str 25 | question: dict 26 | tiles: List[Tile] 27 | 28 | def __init__( 29 | self, 30 | site_key: str, 31 | site_url: str, 32 | agent: Agent = None, 33 | http_proxy: str = None, 34 | timeout: float = 5, 35 | ssl_context: ssl.SSLContext = _default_ssl_context 36 | ): 37 | agent = agent or random_agent() 38 | 39 | self._site_key = site_key 40 | self._site_url = site_url 41 | self._site_hostname = urlsplit(site_url).hostname 42 | self._agent = agent 43 | self._http_proxy = parse_proxy_string(http_proxy) 44 | self._timeout = timeout 45 | self._ssl_context = ssl_context 46 | self._conn_map = {} 47 | 48 | self._widget_id = random_widget_id() 49 | self._spec = None 50 | self._answers = [] 51 | self.id = None 52 | self.token = None 53 | self.config = None 54 | self.mode = None 55 | self.question = None 56 | self.tiles = None 57 | 58 | self._agent.epoch_travel(-10) 59 | self._setup_frames() 60 | self._validate_config() 61 | self._get_captcha() 62 | self._frame.set_data("dct", self._frame._manifest["st"]) 63 | 64 | def __iter__(self) -> Iterator[Tile]: 65 | if not self.tiles: return 66 | yield from self.tiles 67 | 68 | def close(self) -> None: 69 | for conn in self._conn_map.values(): 70 | conn.close() 71 | 72 | def answer(self, tile: Tile) -> None: 73 | assert isinstance(tile, Tile), "Not a tile object." 74 | self._answers.append(tile) 75 | 76 | def submit(self) -> str: 77 | if self.token: return self.token 78 | 79 | self._simulate_solve() 80 | self._agent.epoch_wait() 81 | data = self._request( 82 | method="POST", 83 | url=f"https://hcaptcha.com/checkcaptcha/{self.id}" 84 | f"?s={self._site_key}", 85 | headers={ 86 | "Accept": "*/*", 87 | "Content-type": "application/json;charset=UTF-8" 88 | }, 89 | body=self._agent.json_encode({ 90 | "v": self._version_id, 91 | "job_mode": self.mode, 92 | "answers": { 93 | tile.id: "true" if tile in self._answers else "false" 94 | for tile in self.tiles 95 | }, 96 | "serverdomain": self._site_hostname, 97 | "sitekey": self._site_key, 98 | "motionData": self._agent.json_encode({ 99 | **self._frame.get_data(), 100 | "topLevel": self._top.get_data(), 101 | "v": 1 102 | }), 103 | "n": self._get_proof(), 104 | "c": self._agent.json_encode(self._spec) 105 | }), 106 | origin_url="https://newassets.hcaptcha.com/", 107 | sec_site="same-site", 108 | sec_mode="cors", 109 | sec_dest="empty" 110 | ) 111 | self.close() 112 | 113 | if not data.get("pass"): 114 | raise RequestRejected("Submit request was rejected.") 115 | 116 | self.token = data["generated_pass_UUID"] 117 | return self.token 118 | 119 | def _setup_frames(self): 120 | self._top = EventRecorder(agent=self._agent) 121 | self._top.record() 122 | self._top.set_data("dr", "") # refferer 123 | self._top.set_data("inv", False) 124 | self._top.set_data("sc", self._agent.get_screen_properties()) 125 | self._top.set_data("nv", self._agent.get_navigator_properties()) 126 | self._top.set_data("exec", False) 127 | self._agent.epoch_travel(randint(200, 400)) 128 | 129 | self._frame = EventRecorder(agent=self._agent) 130 | self._frame.record() 131 | 132 | def _get_proof(self): 133 | if not self._spec: return 134 | return get_proof(self._spec["type"], self._spec["req"]) 135 | 136 | def _simulate_solve(self): 137 | total_pages = max(1, int(len(self.tiles)/TILES_PER_PAGE)) 138 | cursor_pos = ( 139 | randint(1, 5), 140 | randint(300, 350) 141 | ) 142 | 143 | for page in range(total_pages): 144 | page_tiles = self.tiles[page * TILES_PER_PAGE : (page + 1) * TILES_PER_PAGE] 145 | for tile in page_tiles: 146 | if not tile in self._answers: 147 | continue 148 | tile_pos = ( 149 | (TILE_IMAGE_SIZE[0] * int(tile.index % TILES_PER_ROW)) 150 | + TILE_IMAGE_PADDING[0] * int(tile.index % TILES_PER_ROW) 151 | + randint(10, TILE_IMAGE_SIZE[0]) 152 | + TILE_IMAGE_START_POS[0], 153 | (TILE_IMAGE_SIZE[1] * int(tile.index / TILES_PER_ROW)) 154 | + TILE_IMAGE_PADDING[1] * int(tile.index / TILES_PER_ROW) 155 | + randint(10, TILE_IMAGE_SIZE[1]) 156 | + TILE_IMAGE_START_POS[1], 157 | ) 158 | for event in gen_mouse_move(cursor_pos, tile_pos, self._agent, 159 | offsetBoundaryX=0, offsetBoundaryY=0, leftBoundary=0, 160 | rightBoundary=FRAME_SIZE[0], upBoundary=FRAME_SIZE[1], 161 | downBoundary=0): 162 | self._frame.record_event("mm", event) 163 | # TODO: add time delay for mouse down and mouse up 164 | self._frame.record_event("md", event) 165 | self._frame.record_event("mu", event) 166 | cursor_pos = tile_pos 167 | 168 | # click verify/next/skip btn 169 | btn_pos = ( 170 | VERIFY_BTN_POS[0] + randint(5, 50), 171 | VERIFY_BTN_POS[1] + randint(5, 15), 172 | ) 173 | for event in gen_mouse_move(cursor_pos, btn_pos, self._agent, 174 | offsetBoundaryX=0, offsetBoundaryY=0, leftBoundary=0, 175 | rightBoundary=FRAME_SIZE[0], upBoundary=FRAME_SIZE[1], 176 | downBoundary=0): 177 | self._frame.record_event("mm", event) 178 | # TODO: add time delay for mouse down and mouse up 179 | self._frame.record_event("md", event) 180 | self._frame.record_event("mu", event) 181 | cursor_pos = btn_pos 182 | 183 | def _validate_config(self): 184 | data = self._request( 185 | method="GET", 186 | url="https://hcaptcha.com/checksiteconfig" 187 | f"?host={self._site_hostname}&sitekey={self._site_key}&sc=1&swa=1", 188 | headers={ 189 | "Cache-Control": "no-cache", 190 | "Content-type": "application/json; charset=utf-8" 191 | }, 192 | origin_url="https://newassets.hcaptcha.com/", 193 | sec_site="same-site", 194 | sec_mode="cors", 195 | sec_dest="empty" 196 | ) 197 | 198 | if not data.get("pass"): 199 | raise RequestRejected( 200 | "Validation request failed. Are you sure the site key is valid?") 201 | 202 | def _get_captcha(self): 203 | data = self._request( 204 | method="POST", 205 | url="https://hcaptcha.com/getcaptcha" 206 | f"?s={self._site_key}", 207 | headers={ 208 | "Accept": "application/json", 209 | "Content-type": "application/x-www-form-urlencoded" 210 | }, 211 | body=self._agent.url_encode({ 212 | "v": self._version_id, 213 | "sitekey": self._site_key, 214 | "host": self._site_hostname, 215 | "hl": "en", 216 | "motionData": self._agent.json_encode({ 217 | "v": 1, 218 | **self._frame.get_data(), 219 | "topLevel": self._top.get_data(), 220 | "session": {}, 221 | "widgetList": [self._widget_id], 222 | "widgetId": self._widget_id, 223 | "href": self._site_url, 224 | "prev": { 225 | "escaped": False, 226 | "passed": False, 227 | "expiredChallenge": False, 228 | "expiredResponse": False 229 | } 230 | }), 231 | "n": self._get_proof(), 232 | "c": self._agent.json_encode(self._spec) 233 | }), 234 | origin_url="https://newassets.hcaptcha.com/", 235 | sec_site="same-site", 236 | sec_mode="cors", 237 | sec_dest="empty" 238 | ) 239 | 240 | if data.get("pass"): 241 | self.token = data["generated_pass_UUID"] 242 | return 243 | 244 | self.id = data["key"] 245 | self.config = data["request_config"] 246 | self.mode = data["request_type"] 247 | self.question = data["requester_question"] 248 | self.tiles = [ 249 | Tile(id=info["task_key"], 250 | image_url=info["datapoint_uri"], 251 | index=index, 252 | challenge=self) 253 | for index, info in enumerate(data["tasklist"]) 254 | ] 255 | 256 | def _get_tile_image(self, image_url): 257 | data = self._request( 258 | method="GET", 259 | url=image_url, 260 | headers={"Accept-Encoding": "gzip, deflate, br"} 261 | ) 262 | return data 263 | 264 | def _request( 265 | self, 266 | method: str, 267 | url: str, 268 | headers: dict = {}, 269 | body: bytes = None, 270 | origin_url: str = None, 271 | sec_site: str = "cross-site", 272 | sec_mode: str = "cors", 273 | sec_dest: str = "empty" 274 | ): 275 | if isinstance(body, str): 276 | body = body.encode() 277 | 278 | p_url = urlsplit(url) 279 | addr = (p_url.hostname.lower(), p_url.port or 443) 280 | 281 | conn = self._conn_map.get(addr) 282 | if not conn: 283 | if not self._http_proxy: 284 | conn = HTTPSConnection( 285 | *addr, 286 | timeout=self._timeout, 287 | context=self._ssl_context) 288 | else: 289 | conn = HTTPSConnection( 290 | *self._http_proxy[1], 291 | timeout=self._timeout, 292 | context=self._ssl_context) 293 | conn.set_tunnel( 294 | *addr, 295 | headers={"Proxy-Authorization": self._http_proxy[0]}) 296 | self._conn_map[addr] = conn 297 | 298 | conn.putrequest( 299 | method=method, 300 | url=p_url.path + (f"?{p_url.query}" if p_url.query else ""), 301 | skip_host=True, 302 | skip_accept_encoding=True) 303 | 304 | headers = self._agent.format_headers( 305 | url=url, 306 | body=body, 307 | headers=headers, 308 | origin_url=origin_url, 309 | sec_site=sec_site, 310 | sec_mode=sec_mode, 311 | sec_dest=sec_dest) 312 | 313 | for name, value in headers.items(): 314 | conn.putheader(name, value) 315 | conn.endheaders(body) 316 | 317 | resp = conn.getresponse() 318 | data = resp.read() 319 | 320 | if (encoding := resp.headers.get("content-encoding")): 321 | if encoding == "gzip": 322 | data = zlib.decompress(data, 16 + zlib.MAX_WBITS) 323 | 324 | if resp.status > 403: 325 | raise RequestRejected( 326 | f"Unrecognized status code: {resp.status}: {resp.reason}") 327 | 328 | if resp.headers["content-type"].startswith("application/json"): 329 | data = json.loads(data) 330 | if "c" in data: 331 | self._spec = data["c"] 332 | 333 | return data -------------------------------------------------------------------------------- /hcaptcha/constants.py: -------------------------------------------------------------------------------- 1 | FRAME_SIZE = (400, 600) 2 | TILES_PER_PAGE = 9 3 | TILES_PER_ROW = 3 4 | TILE_IMAGE_SIZE = (123, 123) 5 | TILE_IMAGE_START_POS = (11, 130) 6 | TILE_IMAGE_PADDING = (5, 6) 7 | VERIFY_BTN_POS = (314, 559) -------------------------------------------------------------------------------- /hcaptcha/curves/__init__.py: -------------------------------------------------------------------------------- 1 | # hastily re-purposed from https://github.com/patrikoss/pyclick 2 | from .humancurve import gen_mouse_move -------------------------------------------------------------------------------- /hcaptcha/curves/_beziercurve.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | class BezierCurve(): 4 | @staticmethod 5 | def binomial(n, k): 6 | """Returns the binomial coefficient "n choose k" """ 7 | return math.factorial(n) / float(math.factorial(k) * math.factorial(n - k)) 8 | 9 | @staticmethod 10 | def bernsteinPolynomialPoint(x, i, n): 11 | """Calculate the i-th component of a bernstein polynomial of degree n""" 12 | return BezierCurve.binomial(n, i) * (x ** i) * ((1 - x) ** (n - i)) 13 | 14 | @staticmethod 15 | def bernsteinPolynomial(points): 16 | """ 17 | Given list of control points, returns a function, which given a point [0,1] returns 18 | a point in the bezier curve described by these points 19 | """ 20 | def bern(t): 21 | n = len(points) - 1 22 | x = y = 0 23 | for i, point in enumerate(points): 24 | bern = BezierCurve.bernsteinPolynomialPoint(t, i, n) 25 | x += point[0] * bern 26 | y += point[1] * bern 27 | return x, y 28 | return bern 29 | 30 | @staticmethod 31 | def curvePoints(n, points): 32 | """ 33 | Given list of control points, returns n points in the bezier curve, 34 | described by these points 35 | """ 36 | curvePoints = [] 37 | bernstein_polynomial = BezierCurve.bernsteinPolynomial(points) 38 | for i in range(n): 39 | t = i / (n - 1) 40 | curvePoints += bernstein_polynomial(t), 41 | return curvePoints 42 | -------------------------------------------------------------------------------- /hcaptcha/curves/_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def isNumeric(val): 4 | return isinstance(val, (float, int, np.int32, np.int64, np.float32, np.float64)) 5 | 6 | def isListOfPoints(l): 7 | if not isinstance(l, list): 8 | return False 9 | try: 10 | isPoint = lambda p: ((len(p) == 2) and isNumeric(p[0]) and isNumeric(p[1])) 11 | return all(map(isPoint, l)) 12 | except (KeyError, TypeError) as e: 13 | return False -------------------------------------------------------------------------------- /hcaptcha/curves/humancurve.py: -------------------------------------------------------------------------------- 1 | from ._utils import isListOfPoints, isNumeric 2 | from ._beziercurve import BezierCurve 3 | from random import random, randint 4 | import pytweening 5 | import numpy as np 6 | 7 | def gen_mouse_move(from_point, to_point, agent, **kwargs): 8 | obj = HumanCurve(from_point, to_point, agent, **kwargs) 9 | return obj.points 10 | 11 | class HumanCurve(): 12 | """ 13 | Generates a human-like mouse curve starting at given source point, 14 | and finishing in a given destination point 15 | """ 16 | 17 | def __init__(self, fromPoint, toPoint, agent, **kwargs): 18 | self.fromPoint = fromPoint 19 | self.toPoint = toPoint 20 | points = self.generateCurve(**kwargs) 21 | points = list(dict.fromkeys([ 22 | (int(x), int(y)) 23 | for x,y in points 24 | ])) 25 | self.points = [] 26 | for x, y in points: 27 | agent.epoch_travel(randint(5, 15)) 28 | t = agent.epoch() 29 | self.points.append((x, y, t)) 30 | 31 | 32 | def generateCurve(self, **kwargs): 33 | """ 34 | Generates a curve according to the parameters specified below. 35 | You can override any of the below parameters. If no parameter is 36 | passed, the default value is used. 37 | """ 38 | offsetBoundaryX = kwargs.get("offsetBoundaryX", 100) 39 | offsetBoundaryY = kwargs.get("offsetBoundaryY", 100) 40 | leftBoundary = kwargs.get("leftBoundary", min(self.fromPoint[0], self.toPoint[0])) - offsetBoundaryX 41 | rightBoundary = kwargs.get("rightBoundary", max(self.fromPoint[0], self.toPoint[0])) + offsetBoundaryX 42 | downBoundary = kwargs.get("downBoundary", min(self.fromPoint[1], self.toPoint[1])) - offsetBoundaryY 43 | upBoundary = kwargs.get("upBoundary", max(self.fromPoint[1], self.toPoint[1])) + offsetBoundaryY 44 | knotsCount = kwargs.get("knotsCount", 2) 45 | distortionMean = kwargs.get("distortionMean", 1) 46 | distortionStdev = kwargs.get("distortionStdev", 1) 47 | distortionFrequency = kwargs.get("distortionFrequency", 0.5) 48 | tween = kwargs.get("tweening", pytweening.easeOutQuad) 49 | targetPoints = kwargs.get("targetPoints", 100) 50 | 51 | internalKnots = self.generateInternalKnots(leftBoundary,rightBoundary, \ 52 | downBoundary, upBoundary, knotsCount) 53 | points = self.generatePoints(internalKnots) 54 | points = self.distortPoints(points, distortionMean, distortionStdev, distortionFrequency) 55 | points = self.tweenPoints(points, tween, targetPoints) 56 | return points 57 | 58 | def generateInternalKnots(self, \ 59 | leftBoundary, rightBoundary, \ 60 | downBoundary, upBoundary,\ 61 | knotsCount): 62 | """ 63 | Generates the internal knots used during generation of bezier curvePoints 64 | or any interpolation function. The points are taken at random from 65 | a surface delimited by given boundaries. 66 | Exactly knotsCount internal knots are randomly generated. 67 | """ 68 | if not (isNumeric(leftBoundary) and isNumeric(rightBoundary) and 69 | isNumeric(downBoundary) and isNumeric(upBoundary)): 70 | raise ValueError("Boundaries must be numeric") 71 | if not isinstance(knotsCount, int) or knotsCount < 0: 72 | raise ValueError("knotsCount must be non-negative integer") 73 | if leftBoundary > rightBoundary: 74 | raise ValueError("leftBoundary must be less than or equal to rightBoundary") 75 | if downBoundary > upBoundary: 76 | raise ValueError("downBoundary must be less than or equal to upBoundary") 77 | 78 | knotsX = np.random.choice(range(leftBoundary, rightBoundary), size=knotsCount) 79 | knotsY = np.random.choice(range(downBoundary, upBoundary), size=knotsCount) 80 | knots = list(zip(knotsX, knotsY)) 81 | return knots 82 | 83 | def generatePoints(self, knots): 84 | """ 85 | Generates bezier curve points on a curve, according to the internal 86 | knots passed as parameter. 87 | """ 88 | if not isListOfPoints(knots): 89 | raise ValueError("knots must be valid list of points") 90 | 91 | midPtsCnt = max( \ 92 | abs(self.fromPoint[0] - self.toPoint[0]), \ 93 | abs(self.fromPoint[1] - self.toPoint[1]), \ 94 | 2) 95 | knots = [self.fromPoint] + knots + [self.toPoint] 96 | return BezierCurve.curvePoints(midPtsCnt, knots) 97 | 98 | def distortPoints(self, points, distortionMean, distortionStdev, distortionFrequency): 99 | """ 100 | Distorts the curve described by (x,y) points, so that the curve is 101 | not ideally smooth. 102 | Distortion happens by randomly, according to normal distribution, 103 | adding an offset to some of the points. 104 | """ 105 | if not(isNumeric(distortionMean) and isNumeric(distortionStdev) and \ 106 | isNumeric(distortionFrequency)): 107 | raise ValueError("Distortions must be numeric") 108 | if not isListOfPoints(points): 109 | raise ValueError("points must be valid list of points") 110 | if not (0 <= distortionFrequency <= 1): 111 | raise ValueError("distortionFrequency must be in range [0,1]") 112 | 113 | distorted = [] 114 | for i in range(1, len(points)-1): 115 | x,y = points[i] 116 | delta = np.random.normal(distortionMean, distortionStdev) if \ 117 | random() < distortionFrequency else 0 118 | distorted += (x,y+delta), 119 | distorted = [points[0]] + distorted + [points[-1]] 120 | return distorted 121 | 122 | def tweenPoints(self, points, tween, targetPoints): 123 | """ 124 | Chooses a number of points(targetPoints) from the list(points) 125 | according to tweening function(tween). 126 | This function in fact controls the velocity of mouse movement 127 | """ 128 | if not isListOfPoints(points): 129 | raise ValueError("points must be valid list of points") 130 | if not isinstance(targetPoints, int) or targetPoints < 2: 131 | raise ValueError("targetPoints must be an integer greater or equal to 2") 132 | 133 | # tween is a function that takes a float 0..1 and returns a float 0..1 134 | res = [] 135 | for i in range(targetPoints): 136 | index = int(tween(float(i)/(targetPoints-1)) * (len(points)-1)) 137 | res += points[index], 138 | return res 139 | -------------------------------------------------------------------------------- /hcaptcha/exceptions.py: -------------------------------------------------------------------------------- 1 | class ChallengeError(Exception): 2 | pass 3 | 4 | class RequestRejected(ChallengeError): 5 | pass -------------------------------------------------------------------------------- /hcaptcha/models.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from io import BytesIO 3 | 4 | class Tile: 5 | id: str 6 | image_url: str 7 | index: int 8 | challenge: "Challenge" 9 | 10 | def __init__(self, id, image_url, index=None, challenge=None): 11 | self.id = id 12 | self.image_url = image_url 13 | self.index = index 14 | self.challenge = challenge 15 | 16 | def __repr__(self): 17 | return self.image_url 18 | 19 | def get_image(self, raw=False): 20 | data = self.challenge._get_tile_image(self.image_url) 21 | if raw: return data 22 | image = Image.open(BytesIO(data)) 23 | return image -------------------------------------------------------------------------------- /hcaptcha/proofs/__init__.py: -------------------------------------------------------------------------------- 1 | from . import hsl 2 | from . import hsw 3 | 4 | def get_proof(type, data): 5 | if type == "hsl": 6 | return hsl.get_proof(data) 7 | elif type == "hsw": 8 | return hsw.get_proof(data) 9 | else: 10 | raise Exception(f"Unrecognized proof type '{type}'") -------------------------------------------------------------------------------- /hcaptcha/proofs/hsl.py: -------------------------------------------------------------------------------- 1 | from ..utils import parse_jsw 2 | from datetime import datetime 3 | import math 4 | import hashlib 5 | 6 | def get_proof(req): 7 | x = "0123456789/:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 8 | req = parse_jsw(req) 9 | 10 | def a(r): 11 | for t in range(len(r) - 1, -1, -1): 12 | if r[t] < len(x) - 1: 13 | r[t] += 1 14 | return True 15 | r[t] = 0 16 | return False 17 | 18 | def i(r): 19 | t = "" 20 | for n in range(len(r)): 21 | t += x[r[n]] 22 | return t 23 | 24 | def o(r, e): 25 | n = e 26 | hashed = hashlib.sha1(e.encode()) 27 | o = hashed.hexdigest() 28 | t = hashed.digest() 29 | e = None 30 | n = -1 31 | o = [] 32 | for n in range(n + 1, 8 * len(t)): 33 | e = t[math.floor(n / 8)] >> n % 8 & 1 34 | o.append(e) 35 | a = o[:r] 36 | def index2(x,y): 37 | if y in x: 38 | return x.index(y) 39 | return -1 40 | return 0 == a[0] and index2(a, 1) >= r - 1 or -1 == index2(a, 1) 41 | 42 | def get(): 43 | for e in range(25): 44 | n = [0 for i in range(e)] 45 | while a(n): 46 | u = req["payload"]["d"] + "::" + i(n) 47 | if o(req["payload"]["s"], u): 48 | return i(n) 49 | 50 | result = get() 51 | hsl = ":".join([ 52 | "1", 53 | str(req["payload"]["s"]), 54 | datetime.now().isoformat()[:19] \ 55 | .replace("T", "") \ 56 | .replace("-", "") \ 57 | .replace(":", ""), 58 | req["payload"]["d"], 59 | "", 60 | result 61 | ]) 62 | return hsl -------------------------------------------------------------------------------- /hcaptcha/proofs/hsw.py: -------------------------------------------------------------------------------- 1 | from ..utils import is_main_process 2 | import multiprocessing 3 | import threading 4 | import subprocess 5 | import socketio 6 | 7 | if is_main_process(): 8 | from flask import Flask 9 | from flask_socketio import SocketIO 10 | 11 | app = Flask(__name__) 12 | sio_server = SocketIO(app) 13 | 14 | @sio_server.on("request") 15 | def request_passer(data): 16 | sio_server.emit("request", data) 17 | 18 | @sio_server.on("response") 19 | def response_passer(token): 20 | sio_server.emit("response", token) 21 | 22 | @app.route("/") 23 | def index_view(): 24 | with open("hcaptcha-js/hsw.js") as fp: 25 | code = fp.read() 26 | 27 | return f""" 28 | 29 |
30 | 31 |