├── tests ├── __init__.py ├── test_browser.py └── test_solver.py ├── captcha_solver ├── types.py ├── backend │ ├── __init__.py │ ├── rucaptcha.py │ ├── twocaptcha.py │ ├── base.py │ ├── browser.py │ └── antigate.py ├── __init__.py ├── error.py ├── network.py └── solver.py ├── .bumpversion.cfg ├── .gitignore ├── .github └── workflows │ ├── mypy.yml │ ├── check.yml │ └── test.yml ├── requirements_dev.txt ├── tox.ini ├── LICENSE ├── Makefile ├── .flake8 ├── README.md └── pyproject.toml /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /captcha_solver/types.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /captcha_solver/backend/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.5 3 | files = captcha_solver/__init__.py pyproject.toml 4 | commit = True 5 | tag = True 6 | -------------------------------------------------------------------------------- /captcha_solver/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | from captcha_solver.solver import CaptchaSolver 3 | from captcha_solver.error import * # noqa 4 | 5 | __version__ = '0.1.5' 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | *.swp 4 | *.swo 5 | *.orig 6 | 7 | /web/settings_local.py 8 | pip-log.txt 9 | /.env 10 | /var/ 11 | /dump/ 12 | /src/ 13 | /ioweb 14 | /.hg/ 15 | /.pytest_cache/ 16 | /build/ 17 | /.coverage 18 | /.tox/ 19 | *.egg-info 20 | /dist/ 21 | /build/ 22 | /.coverage 23 | -------------------------------------------------------------------------------- /captcha_solver/backend/rucaptcha.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from .twocaptcha import TwocaptchaBackend 4 | 5 | 6 | class RucaptchaBackend(TwocaptchaBackend): 7 | def __init__( 8 | self, 9 | api_key: str, 10 | service_url: str = "https://rucaptcha.com", 11 | ) -> None: 12 | super().__init__(api_key=api_key, service_url=service_url) 13 | -------------------------------------------------------------------------------- /.github/workflows/mypy.yml: -------------------------------------------------------------------------------- 1 | name: Types 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | os: [ubuntu-latest] 11 | python: ['3.7', '3.8', '3.9', '3.10', '3.11'] 12 | steps: 13 | 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Python ${{ matrix.python }} 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: ${{ matrix.python }} 20 | 21 | - name: Install dependencies 22 | run: | 23 | pip install -U -r requirements_dev.txt 24 | pip install -U -e . 25 | 26 | - name: Run tests 27 | run: | 28 | make mypy 29 | -------------------------------------------------------------------------------- /captcha_solver/error.py: -------------------------------------------------------------------------------- 1 | __all__ = ('CaptchaSolverError', 'CaptchaServiceError', 'SolutionNotReady', 2 | 'ServiceTooBusy', 'BalanceTooLow', 'SolutionTimeoutError', 3 | 'InvalidServiceBackend') 4 | 5 | 6 | class CaptchaSolverError(Exception): 7 | pass 8 | 9 | 10 | class CaptchaServiceError(CaptchaSolverError): 11 | pass 12 | 13 | 14 | class SolutionNotReady(CaptchaServiceError): 15 | pass 16 | 17 | 18 | class SolutionTimeoutError(SolutionNotReady): 19 | pass 20 | 21 | 22 | class ServiceTooBusy(CaptchaServiceError): 23 | pass 24 | 25 | 26 | class BalanceTooLow(CaptchaServiceError): 27 | pass 28 | 29 | 30 | class InvalidServiceBackend(CaptchaSolverError): 31 | pass 32 | -------------------------------------------------------------------------------- /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | name: Linters 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | os: [ubuntu-latest] 11 | python: ['3.7', '3.8', '3.9', '3.10', '3.11'] 12 | steps: 13 | 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Python ${{ matrix.python }} 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: ${{ matrix.python }} 20 | 21 | - name: Install dependencies 22 | run: | 23 | pip install -U -r requirements_dev.txt 24 | pip install -U -e . 25 | 26 | - name: Run tests 27 | run: | 28 | make pylint && make flake8 && make bandit 29 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | pip 2 | bumpversion 3 | pytest 4 | build 5 | twine 6 | pyyaml 7 | pymongo 8 | runscript 9 | coverage 10 | pytest-cov 11 | mock 12 | test_server 13 | pytest-xdist 14 | 15 | # Code Quality 16 | bandit[toml] 17 | flake8 18 | # flake8-broken-line # DISABLED, DEPENCIES ISSUES 19 | flake8-bugbear 20 | # flake8-commas # DISABLED, do not like C816 missing trailing comma in Python 3.6+ 21 | flake8-comprehensions 22 | flake8-debugger 23 | flake8-docstrings 24 | flake8-expression-complexity 25 | flake8-isort 26 | flake8-pep585 27 | flake8-pie 28 | # flake8-quotes # DISABLED, BREAKS FLAKE8 29 | flake8-return 30 | flake8-simplify 31 | flake8-string-format 32 | flake8-cognitive-complexity 33 | mccabe 34 | mypy 35 | pep8-naming 36 | pylint 37 | -------------------------------------------------------------------------------- /captcha_solver/backend/twocaptcha.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from ..network import NetworkRequest 4 | from .antigate import AntigateBackend 5 | 6 | SOFTWARE_ID = 2373 7 | 8 | 9 | class TwocaptchaBackend(AntigateBackend): 10 | def __init__( 11 | self, 12 | api_key: str, 13 | service_url: str = "http://antigate.com", 14 | ) -> None: 15 | super().__init__(api_key=api_key, service_url=service_url) 16 | 17 | def get_submit_captcha_request_data( 18 | self, data: bytes, **kwargs: Any 19 | ) -> NetworkRequest: 20 | res = super().get_submit_captcha_request_data(data, **kwargs) 21 | assert res["post_data"] is not None 22 | res["post_data"]["soft_id"] = SOFTWARE_ID 23 | return res 24 | -------------------------------------------------------------------------------- /captcha_solver/backend/base.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from abc import abstractmethod 4 | from typing import Any 5 | 6 | from ..network import NetworkRequest, NetworkResponse 7 | 8 | 9 | class ServiceBackend: 10 | @abstractmethod 11 | def get_submit_captcha_request_data( 12 | self, data: bytes, **kwargs: Any 13 | ) -> NetworkRequest: 14 | raise NotImplementedError 15 | 16 | @abstractmethod 17 | def parse_submit_captcha_response(self, res: NetworkResponse) -> str: 18 | raise NotImplementedError 19 | 20 | @abstractmethod 21 | def get_check_solution_request_data(self, captcha_id: str) -> NetworkRequest: 22 | raise NotImplementedError 23 | 24 | @abstractmethod 25 | def parse_check_solution_response(self, res: NetworkResponse) -> str: 26 | raise NotImplementedError 27 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py3 3 | isolated_build = true 4 | 5 | [testenv] 6 | allowlist_externals = 7 | make 8 | echo 9 | skip_install = true 10 | deps = 11 | -r requirements_dev.txt 12 | . 13 | 14 | [testenv:py3-test] 15 | commands = 16 | make test 17 | 18 | [testenv:py37-test] 19 | commands = 20 | make test 21 | basepython=/opt/python37/bin/python3.7 22 | 23 | [testenv:py3-check] 24 | commands = 25 | python -V 26 | echo "pylint" 27 | make pylint 28 | echo "flake8" 29 | make flake8 30 | echo "OK" 31 | 32 | [testenv:py37-check] 33 | commands = 34 | python -V 35 | echo "pylint" 36 | make pylint 37 | echo "flake8" 38 | make flake8 39 | echo "OK" 40 | basepython=/opt/python37/bin/python3.7 41 | 42 | [testenv:py37-mypy] 43 | commands = 44 | python -V 45 | make mypy 46 | basepython=/opt/python37/bin/python3.7 47 | -------------------------------------------------------------------------------- /tests/test_browser.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from unittest import TestCase 4 | 5 | from mock import patch 6 | 7 | from captcha_solver import CaptchaSolver 8 | 9 | 10 | class BrowserTestCase(TestCase): 11 | def setUp(self): 12 | self.solver = CaptchaSolver("browser") 13 | self.wb_patcher = patch("webbrowser.open") 14 | self.mock_wb_open = self.wb_patcher.start() 15 | self.raw_input_patcher = patch("captcha_solver.backend.browser.input") 16 | self.mock_raw_input = self.raw_input_patcher.start() 17 | 18 | def tearDown(self): 19 | self.wb_patcher.stop() 20 | self.raw_input_patcher.stop() 21 | 22 | def test_captcha_decoded(self): 23 | self.mock_wb_open.return_value = None 24 | self.mock_raw_input.return_value = "decoded_captcha" 25 | 26 | self.assertEqual(self.solver.solve_captcha(b"image_data"), "decoded_captcha") 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2022, Gregory Petukhov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /captcha_solver/network.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import typing 4 | from collections.abc import Mapping 5 | from urllib.error import HTTPError 6 | from urllib.parse import urlencode 7 | from urllib.request import Request, urlopen 8 | 9 | from typing_extensions import TypedDict 10 | 11 | 12 | # pylint: disable=consider-alternative-union-syntax,deprecated-typing-alias 13 | class NetworkRequest(TypedDict): 14 | url: str 15 | post_data: typing.Optional[typing.MutableMapping[str, str | float]] 16 | 17 | 18 | # pylint: enable=consider-alternative-union-syntax,deprecated-typing-alias 19 | 20 | 21 | class NetworkResponse(TypedDict): 22 | code: int 23 | body: bytes 24 | url: str 25 | 26 | 27 | def request( 28 | url: str, data: None | Mapping[str, str | float], timeout: float 29 | ) -> NetworkResponse: 30 | req_data = urlencode(data).encode("ascii") if data else None 31 | req = Request(url, req_data) 32 | try: 33 | with urlopen(req, timeout=timeout) as resp: # nosec B310 34 | body = resp.read() 35 | code = resp.getcode() 36 | except HTTPError as ex: 37 | code = ex.code 38 | body = ex.fp.read() 39 | return { 40 | "code": code, 41 | "body": body, 42 | "url": url, 43 | } 44 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | os: [ubuntu-latest, macos-latest, windows-latest] 11 | python: ['3.7', '3.8', '3.9', '3.10', '3.11'] 12 | steps: 13 | 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Python ${{ matrix.python }} 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: ${{ matrix.python }} 20 | 21 | - name: Install dependencies 22 | run: | 23 | pip install -U -r requirements_dev.txt 24 | pip install -U -e . 25 | 26 | - name: Run tests 27 | run: | 28 | make pytest 29 | coverage lcov -o .coverage.lcov 30 | 31 | - name: Coveralls Parallel 32 | uses: coverallsapp/github-action@master 33 | with: 34 | github-token: ${{ secrets.github_token }} 35 | path-to-lcov: ./.coverage.lcov 36 | flag-name: run-${{ matrix.os }}-${{ matrix.python }} 37 | parallel: true 38 | 39 | finish: 40 | needs: test 41 | runs-on: ubuntu-latest 42 | steps: 43 | - name: Coveralls Finished 44 | uses: coverallsapp/github-action@master 45 | with: 46 | github-token: ${{ secrets.github_token }} 47 | path-to-lcov: ./.coverage.lcov 48 | parallel-finished: true 49 | -------------------------------------------------------------------------------- /captcha_solver/backend/browser.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import os 4 | import tempfile 5 | import time 6 | import webbrowser 7 | from typing import Any 8 | 9 | from ..network import NetworkRequest, NetworkResponse 10 | from .base import ServiceBackend 11 | 12 | 13 | class BrowserBackend(ServiceBackend): 14 | def setup(self, **_kwargs: Any) -> None: 15 | pass 16 | 17 | def get_submit_captcha_request_data( 18 | self, data: bytes, **kwargs: Any 19 | ) -> NetworkRequest: 20 | fd, path = tempfile.mkstemp() 21 | with open(path, "wb") as out: 22 | out.write(data) 23 | os.close(fd) 24 | url = "file://" + path 25 | return {"url": url, "post_data": None} 26 | 27 | def parse_submit_captcha_response(self, res: NetworkResponse) -> str: 28 | return res["url"].replace("file://", "") 29 | 30 | def get_check_solution_request_data(self, captcha_id: str) -> NetworkRequest: 31 | url = "file://" + captcha_id 32 | return {"url": url, "post_data": None} 33 | 34 | def parse_check_solution_response(self, res: NetworkResponse) -> str: 35 | webbrowser.open(url=res["url"]) 36 | # Wait some time, skip some debug messages 37 | # which browser could dump to console 38 | time.sleep(0.5) 39 | path = res["url"].replace("file://", "") 40 | os.unlink(path) 41 | return input("Enter solution: ") 42 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: bootstrap venv deps dirs clean pytest test release mypy pylint flake8 bandit check build coverage 2 | 3 | FILES_CHECK_MYPY = captcha_solver 4 | FILES_CHECK_ALL = $(FILES_CHECK_MYPY) tests 5 | 6 | bootstrap: venv deps dirs 7 | 8 | venv: 9 | virtualenv -p python3 .env 10 | 11 | deps: 12 | .env/bin/pip install -r requirements_dev.txt 13 | .env/bin/pip install -e . 14 | 15 | dirs: 16 | if [ ! -e var/run ]; then mkdir -p var/run; fi 17 | if [ ! -e var/log ]; then mkdir -p var/log; fi 18 | 19 | clean: 20 | find -name '*.pyc' -delete 21 | find -name '*.swp' -delete 22 | find -name '__pycache__' -delete 23 | 24 | pytest: 25 | pytest --cov captcha_solver --cov-report term-missing 26 | 27 | test: 28 | make check \ 29 | && make pytest \ 30 | && tox -e py37-check 31 | 32 | 33 | release: 34 | git push \ 35 | && git push --tags \ 36 | && make build \ 37 | && twine upload dist/* 38 | 39 | mypy: 40 | mypy --strict $(FILES_CHECK_MYPY) 41 | 42 | pylint: 43 | pylint -j0 $(FILES_CHECK_ALL) 44 | 45 | flake8: 46 | flake8 -j auto --max-cognitive-complexity=17 $(FILES_CHECK_ALL) 47 | 48 | bandit: 49 | bandit -qc pyproject.toml -r $(FILES_CHECK_ALL) 50 | 51 | check: 52 | echo "mypy" \ 53 | && make mypy \ 54 | && echo "pylint" \ 55 | && make pylint \ 56 | && echo "flake8" \ 57 | && make flake8 \ 58 | && echo "bandit" \ 59 | && make bandit 60 | 61 | build: 62 | rm -rf *.egg-info 63 | rm -rf dist/* 64 | python -m build --sdist 65 | 66 | 67 | coverage: 68 | pytest --cov captcha_solver --cov-report term-missing 69 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # --- flake8 3 | # E121 continuation line under-indented for hanging indent 4 | # E125 continuation line with same indent as next logical line 5 | # E203 whitespace before ':' 6 | # E261 at least two spaces before inline comment 7 | # E265 block comment should start with '# ' 8 | # F401 'pprint.pprint' imported but unused, CHECKED BY PYLINT 9 | # F841 local variable 'suffix' is assigned to but never used, CHECKED BY PYLINT 10 | # W503 line break before binary operator 11 | # N818 exception name 'ElementNotFound' should be named with an Error suffix 12 | # F403: used; unable to detect undefined names # disabled because pylint "wildcard-import" does the same 13 | # --- flake8-commas 14 | # C812 missing trailing comma 15 | # C813 missing trailing comma in Python 3 16 | # --- flake8-docstrings 17 | # D100 Missing docstring in public module 18 | # D101 Missing docstring in public class 19 | # D102 Missing docstring in public method 20 | # D103 Missing docstring in public function 21 | # D104 Missing docstring in public package 22 | # D105 Missing docstring in magic method 23 | # D107 Missing docstring in __init__ 24 | # D106 Missing docstring in public nested class 25 | # --- flake8-string-format 26 | # P101 format string does contain unindexed parameters 27 | # --- flake8-pie 28 | # PIE798 no-unnecessary-class: Consider using a module for namespacing instead 29 | # PIE786 Use precise exception handlers 30 | # PEA001 typing.Match is deprecated, use re.Match instead. # is not possible in py<3.9 31 | # E203 Colons should not have any space before them. # does not work with Black formatting of "foo[(1 + 1) : ]" 32 | ignore=F401,C812,C813,D100,D101,D102,D103,D104,D106,D107,D105,P101,PIE798,PIE786,W503,N818,PEA001,E203,F403 33 | max-line-length=88 34 | inline-quotes = double 35 | max-complexity=10 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![Test Status](https://github.com/ivan-0224/python-captcha/actions/workflows/test.yml/badge.svg)](https://github.com/lorien/captcha_solver/actions/workflows/test.yml) 3 | [![Code Quality](https://github.com/ivan-0224/python-captcha/actions/workflows/check.yml/badge.svg)](https://github.com/lorien/captcha_solver/actions/workflows/test.yml) 4 | [![Type Check](https://github.com/ivan-0224/python-captcha/actions/workflows/mypy.yml/badge.svg)](https://github.com/lorien/captcha_solver/actions/workflows/mypy.yml) 5 | [![Test Coverage Status](https://coveralls.io/repos/github/lorien/captcha_solver/badge.svg)](https://coveralls.io/github/lorien/captcha_solver) 6 | [![Documentation Status](https://readthedocs.org/projects/captcha_solver/badge/?version=latest)](https://captcha_solver.readthedocs.org) 7 | 8 | ## Installation 9 | 10 | Run: `pip install -U python-captcha` 11 | 12 | ## Twocaptcha Backend Example 13 | 14 | Service website is https://2captcha.com?from=3019071 15 | 16 | ```python 17 | from captcha_solver import CaptchaSolver 18 | 19 | solver = CaptchaSolver('twocaptcha', api_key='2captcha.com API HERE') 20 | raw_data = open('captcha.png', 'rb').read() 21 | print(solver.solve_captcha(raw_data)) 22 | ``` 23 | 24 | ## Rucaptcha Backend Example 25 | 26 | Service website is https://rucaptcha.com?from=3019071 27 | 28 | ```python 29 | from captcha_solver import CaptchaSolver 30 | 31 | solver = CaptchaSolver('rucaptcha', api_key='RUCAPTCHA_KEY') 32 | raw_data = open('captcha.png', 'rb').read() 33 | print(solver.solve_captcha(raw_data)) 34 | ``` 35 | 36 | ## Browser Backend Example 37 | ```python 38 | from captcha_solver import CaptchaSolver 39 | 40 | solver = CaptchaSolver('browser') 41 | raw_data = open('captcha.png', 'rb').read() 42 | print(solver.solve_captcha(raw_data)) 43 | ``` 44 | 45 | ## Antigate Backend Example 46 | 47 | Service website is http://getcaptchasolution.com/ijykrofoxz 48 | 49 | ```python 50 | from captcha_solver import CaptchaSolver 51 | 52 | solver = CaptchaSolver('antigate', api_key='ANTIGATE_KEY') 53 | raw_data = open('captcha.png', 'rb').read() 54 | print(solver.solve_captcha(raw_data)) 55 | ``` 56 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "captcha_solver" 3 | version = "0.1.5" 4 | 5 | description = "Universal API to access captcha solving services." 6 | readme = "README.md" 7 | requires-python = ">=3.7" 8 | license = {"file" = "LICENSE"} 9 | keywords = [] 10 | authors = [ 11 | {name = "Gregory Petukhov", email = "lorien@lorien.name"} 12 | ] 13 | # https://pypi.org/pypi?%3Aaction=list_classifiers 14 | classifiers = [ 15 | "Programming Language :: Python", 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.7", 18 | "Programming Language :: Python :: 3.8", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "License :: OSI Approved :: MIT License", 23 | "Development Status :: 4 - Beta", 24 | "Intended Audience :: Developers", 25 | "Operating System :: OS Independent", 26 | "Topic :: Software Development :: Libraries :: Python Modules", 27 | "Topic :: Internet :: WWW/HTTP", 28 | "Typing :: Typed", 29 | ] 30 | dependencies = [] 31 | 32 | [project.urls] 33 | homepage = "http://github.com/lorien/captcha_solver" 34 | 35 | [build-system] 36 | requires = ["setuptools"] 37 | build-backend = "setuptools.build_meta" 38 | 39 | [tool.setuptools] 40 | packages = ["captcha_solver"] 41 | 42 | [tool.setuptools.package-data] 43 | "*" = ["py.typed"] 44 | 45 | [tool.isort] 46 | profile = "black" 47 | line_length = 88 48 | # skip_gitignore = true # throws errors in stderr when ".git" dir does not exist 49 | 50 | [tool.bandit] 51 | # B101 assert_used 52 | # B410 Using HtmlElement to parse untrusted XML data 53 | skips = ["B101", "B410"] 54 | 55 | #[[tool.mypy.overrides]] 56 | #module = "procstat" 57 | #ignore_missing_imports = true 58 | 59 | [tool.pylint.main] 60 | jobs=4 61 | extension-pkg-whitelist="lxml" 62 | disable="missing-docstring,broad-except,too-few-public-methods,consider-using-f-string,fixme" 63 | variable-rgx="[a-z_][a-z0-9_]{1,30}$" 64 | attr-rgx="[a-z_][a-z0-9_]{1,30}$" 65 | argument-rgx="[a-z_][a-z0-9_]{1,30}$" 66 | max-line-length=88 67 | max-args=9 68 | load-plugins=[ 69 | "pylint.extensions.check_elif", 70 | "pylint.extensions.comparetozero", 71 | "pylint.extensions.comparison_placement", 72 | "pylint.extensions.consider_ternary_expression", 73 | "pylint.extensions.docstyle", 74 | "pylint.extensions.emptystring", 75 | "pylint.extensions.for_any_all", 76 | "pylint.extensions.overlapping_exceptions", 77 | "pylint.extensions.redefined_loop_name", 78 | "pylint.extensions.redefined_variable_type", 79 | "pylint.extensions.set_membership", 80 | "pylint.extensions.typing", 81 | ] 82 | 83 | 84 | [tool.pytest.ini_options] 85 | testpaths = ["tests"] 86 | -------------------------------------------------------------------------------- /captcha_solver/backend/antigate.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from base64 import b64encode 4 | from typing import Any 5 | from urllib.parse import urlencode, urljoin 6 | 7 | from ..error import BalanceTooLow, CaptchaServiceError, ServiceTooBusy, SolutionNotReady 8 | from ..network import NetworkRequest, NetworkResponse 9 | from .base import ServiceBackend 10 | 11 | SOFTWARE_ID = 901 12 | 13 | 14 | class AntigateBackend(ServiceBackend): 15 | def __init__( 16 | self, 17 | api_key: str, 18 | service_url: str = "http://antigate.com", 19 | ) -> None: 20 | super().__init__() 21 | self.api_key: None | str = api_key 22 | self.service_url: None | str = service_url 23 | 24 | def get_submit_captcha_request_data( 25 | self, data: bytes, **kwargs: Any 26 | ) -> NetworkRequest: 27 | assert self.api_key is not None 28 | post: dict[str, str | float] = { 29 | "key": self.api_key, 30 | "method": "base64", 31 | "body": b64encode(data).decode("ascii"), 32 | "soft_id": SOFTWARE_ID, 33 | } 34 | post.update(kwargs) 35 | assert self.service_url is not None 36 | url = urljoin(self.service_url, "in.php") 37 | return {"url": url, "post_data": post} 38 | 39 | def parse_submit_captcha_response(self, res: NetworkResponse) -> str: 40 | if res["code"] == 200: 41 | if res["body"].startswith(b"OK|"): 42 | return res["body"].split(b"|", 1)[1].decode("ascii") 43 | if res["body"] == b"ERROR_NO_SLOT_AVAILABLE": 44 | raise ServiceTooBusy("Service too busy") 45 | if res["body"] == b"ERROR_ZERO_BALANCE": 46 | raise BalanceTooLow("Balance too low") 47 | raise CaptchaServiceError(res["body"]) 48 | raise CaptchaServiceError("Returned HTTP code: %d" % res["code"]) 49 | 50 | def get_check_solution_request_data(self, captcha_id: str) -> NetworkRequest: 51 | assert self.api_key is not None 52 | assert self.service_url is not None 53 | params = {"key": self.api_key, "action": "get", "id": captcha_id} 54 | url = urljoin(self.service_url, "res.php?%s" % urlencode(params)) 55 | return {"url": url, "post_data": None} 56 | 57 | def parse_check_solution_response(self, res: NetworkResponse) -> str: 58 | if res["code"] == 200: 59 | if res["body"].startswith(b"OK|"): 60 | return res["body"].split(b"|", 1)[1].decode("utf8") 61 | if res["body"] == b"CAPCHA_NOT_READY": 62 | raise SolutionNotReady("Solution is not ready") 63 | raise CaptchaServiceError(res["body"]) 64 | raise CaptchaServiceError("Returned HTTP code: %d" % res["code"]) 65 | -------------------------------------------------------------------------------- /tests/test_solver.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import time 4 | from unittest import TestCase 5 | 6 | from test_server import Response, TestServer 7 | 8 | from captcha_solver import CaptchaSolver, error 9 | 10 | # These timings means the solver will do only 11 | # one attempt to submit captcha and 12 | # one attempt to receive solution 13 | # Assuming the network timeout is greater than 14 | # submiting/recognition delays 15 | TESTING_TIME_PARAMS = { 16 | "submiting_time": 0.1, 17 | "submiting_delay": 0.2, 18 | "recognition_time": 0.1, 19 | "recognition_delay": 0.2, 20 | } 21 | TEST_SERVER_HOST = "127.0.0.1" 22 | 23 | 24 | class BaseSolverTestCase(TestCase): 25 | @classmethod 26 | def setUpClass(cls): 27 | cls.server = TestServer(address=TEST_SERVER_HOST) 28 | cls.server.start() 29 | 30 | @classmethod 31 | def tearDownClass(cls): 32 | cls.server.stop() 33 | 34 | def setUp(self): 35 | self.server.reset() 36 | 37 | 38 | class AntigateTestCase(BaseSolverTestCase): 39 | def setUp(self): 40 | super().setUp() 41 | self.solver = self.create_solver() 42 | 43 | def create_solver(self, **kwargs): 44 | config = { 45 | "service_url": self.server.get_url(), 46 | "api_key": "does not matter", 47 | } 48 | config.update(kwargs) 49 | return CaptchaSolver("antigate", **config) 50 | 51 | def test_post_data(self): 52 | data = b"foo" 53 | res = self.solver.backend.get_submit_captcha_request_data(data) 54 | body = res["post_data"]["body"] 55 | 56 | self.assertTrue(isinstance(body, str)) 57 | 58 | def test_antigate_decoded(self): 59 | self.server.add_response(Response(data=b"OK|captcha_id")) 60 | self.server.add_response(Response(data=b"OK|decoded_captcha")) 61 | self.assertEqual(self.solver.solve_captcha(b"image_data"), "decoded_captcha") 62 | 63 | def test_antigate_no_slot_available(self): 64 | self.server.add_response(Response(data=b"ERROR_NO_SLOT_AVAILABLE"), count=-1) 65 | with self.assertRaises(error.SolutionTimeoutError): 66 | self.solver.solve_captcha(b"image_data", **TESTING_TIME_PARAMS) 67 | 68 | def test_antigate_zero_balance(self): 69 | self.server.add_response(Response(data=b"ERROR_ZERO_BALANCE")) 70 | self.assertRaises(error.BalanceTooLow, self.solver.solve_captcha, b"image_data") 71 | 72 | def test_antigate_unknown_error(self): 73 | self.server.add_response(Response(data=b"UNKNOWN_ERROR")) 74 | self.assertRaises( 75 | error.CaptchaServiceError, self.solver.solve_captcha, b"image_data" 76 | ) 77 | 78 | def test_antigate_unknown_code(self): 79 | self.server.add_response(Response(status=404)) 80 | self.assertRaises( 81 | error.CaptchaServiceError, self.solver.solve_captcha, b"image_data" 82 | ) 83 | 84 | def test_solution_timeout_error(self): 85 | self.server.add_response(Response(data=b"OK|captcha_id")) 86 | self.server.add_response(Response(data=b"CAPCHA_NOT_READY")) 87 | with self.assertRaises(error.SolutionTimeoutError): 88 | self.solver.solve_captcha(b"image_data", **TESTING_TIME_PARAMS) 89 | 90 | def test_solution_unknown_error(self): 91 | self.server.add_response(Response(data=b"OK|captcha_id")) 92 | self.server.add_response(Response(data=b"UNKONWN_ERROR")) 93 | with self.assertRaises(error.CaptchaServiceError): 94 | self.solver.solve_captcha(b"image_data", **TESTING_TIME_PARAMS) 95 | 96 | def test_solution_unknown_code(self): 97 | self.server.add_response(Response(data=b"OK|captcha_id")) 98 | self.server.add_response(Response(data=b"OK|solution", status=500)) 99 | with self.assertRaises(error.CaptchaServiceError): 100 | self.solver.solve_captcha(b"image_data", **TESTING_TIME_PARAMS) 101 | 102 | def test_network_error_while_sending_captcha(self): 103 | self.server.add_response(Response(data=b"that would be timeout", sleep=0.5)) 104 | self.server.add_response(Response(data=b"OK|captcha_id")) 105 | self.server.add_response(Response(data=b"OK|solution")) 106 | 107 | solver = self.create_solver() 108 | solver.setup_network_config(timeout=0.4) 109 | solver.solve_captcha( 110 | b"image_data", 111 | submiting_time=2, 112 | submiting_delay=0, 113 | recognition_time=0, 114 | recognition_delay=0, 115 | ) 116 | 117 | def test_network_error_while_receiving_solution(self): 118 | class Callback: 119 | def __init__(self): 120 | self.step = 0 121 | 122 | def __call__(self): 123 | self.step += 1 124 | if self.step == 1: 125 | return { 126 | "type": "response", 127 | "data": b"OK|captcha_id", 128 | } 129 | if self.step in {2, 3, 4}: 130 | time.sleep(0.2) 131 | return { 132 | "type": "response", 133 | "data": b"that will be timeout", 134 | } 135 | return { 136 | "type": "response", 137 | "data": b"OK|solution", 138 | } 139 | 140 | solver = self.create_solver() 141 | solver.setup_network_config(timeout=0.1) 142 | self.server.add_response(Response(callback=Callback()), count=-1) 143 | solution = solver.solve_captcha( 144 | b"image_data", 145 | submiting_time=0, 146 | submiting_delay=0, 147 | recognition_time=1, 148 | recognition_delay=0.09, 149 | ) 150 | assert solution == "solution" 151 | -------------------------------------------------------------------------------- /captcha_solver/solver.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | import socket 5 | import time 6 | from copy import copy 7 | from pprint import pprint # pylint: disable=unused-import 8 | from typing import Any 9 | from urllib.error import URLError 10 | 11 | from typing_extensions import TypedDict 12 | 13 | from .backend.antigate import AntigateBackend 14 | from .backend.base import ServiceBackend 15 | from .backend.browser import BrowserBackend 16 | from .backend.rucaptcha import RucaptchaBackend 17 | from .backend.twocaptcha import TwocaptchaBackend 18 | from .error import ( 19 | InvalidServiceBackend, 20 | ServiceTooBusy, 21 | SolutionNotReady, 22 | SolutionTimeoutError, 23 | ) 24 | from .network import request 25 | 26 | LOGGER = logging.getLogger("captcha_solver") 27 | BACKEND_ALIAS: dict[str, type[ServiceBackend]] = { 28 | "2captcha": TwocaptchaBackend, 29 | "rucaptcha": RucaptchaBackend, 30 | "antigate": AntigateBackend, 31 | "browser": BrowserBackend, 32 | } 33 | 34 | 35 | class NetworkConfig(TypedDict): 36 | timeout: float 37 | 38 | 39 | DEFAULT_NETWORK_CONFIG: NetworkConfig = { 40 | "timeout": 5, 41 | } 42 | 43 | 44 | class InvalidBackend(Exception): 45 | pass 46 | 47 | 48 | class CaptchaSolver: 49 | """This class implements API to communicate with remote captcha solving service.""" 50 | 51 | def __init__(self, backend: str | type[ServiceBackend], **kwargs: Any) -> None: 52 | """Create CaptchaSolver instance. 53 | 54 | Parameters 55 | ---------- 56 | backend : string | ServiceBackend subclass 57 | Alias name of one of standard backends or class inherited from SolverBackend 58 | """ 59 | backend_cls = self.get_backend_class(backend) 60 | self.backend = backend_cls(**kwargs) 61 | self.network_config: NetworkConfig = copy(DEFAULT_NETWORK_CONFIG) 62 | 63 | def setup_network_config(self, timeout: None | int = None) -> None: 64 | if timeout is not None: 65 | self.network_config["timeout"] = timeout 66 | 67 | def get_backend_class( 68 | self, alias: str | type[ServiceBackend] 69 | ) -> type[ServiceBackend]: 70 | if isinstance(alias, str): 71 | return BACKEND_ALIAS[alias] 72 | if issubclass(alias, ServiceBackend): 73 | return alias 74 | raise InvalidServiceBackend("Invalid backend alias: %s" % alias) 75 | 76 | def submit_captcha(self, image_data: bytes, **kwargs: Any) -> str: 77 | LOGGER.debug("Submiting captcha") 78 | data = self.backend.get_submit_captcha_request_data(image_data, **kwargs) 79 | # pprint(data['post_data']) 80 | # print('URL: %s' % data['url']) 81 | response = request( 82 | data["url"], data["post_data"], timeout=self.network_config["timeout"] 83 | ) 84 | return self.backend.parse_submit_captcha_response(response) 85 | 86 | def check_solution(self, captcha_id: str) -> str: 87 | """Check if service has solved requested captcha. 88 | 89 | Raises 90 | ------ 91 | - SolutionNotReady 92 | - ServiceTooBusy 93 | """ 94 | data = self.backend.get_check_solution_request_data(captcha_id) 95 | response = request( 96 | data["url"], 97 | data["post_data"], 98 | timeout=self.network_config["timeout"], 99 | ) 100 | return self.backend.parse_check_solution_response(response) 101 | 102 | def submit_captcha_with_retry( 103 | self, submiting_time: float, submiting_delay: float, data: bytes, **kwargs: Any 104 | ) -> str: 105 | fail: None | Exception = None 106 | start_time = time.time() 107 | while True: 108 | # pylint: disable=overlapping-except 109 | try: 110 | return self.submit_captcha(image_data=data, **kwargs) 111 | except (ServiceTooBusy, URLError, socket.error, TimeoutError) as ex: 112 | fail = ex 113 | if ((time.time() + submiting_delay) - start_time) > submiting_time: 114 | break 115 | time.sleep(submiting_delay) 116 | if isinstance(fail, ServiceTooBusy): 117 | raise SolutionTimeoutError("Service has no available slots") from fail 118 | raise SolutionTimeoutError( 119 | "Could not access the service, reason: {}".format(fail) 120 | ) from fail 121 | 122 | def check_solution_with_retry( 123 | self, recognition_time: float, recognition_delay: float, captcha_id: str 124 | ) -> str: 125 | fail: None | Exception = None 126 | start_time = time.time() 127 | while True: 128 | # pylint: disable=overlapping-except 129 | try: 130 | return self.check_solution(captcha_id) 131 | except ( 132 | SolutionNotReady, 133 | socket.error, 134 | TimeoutError, 135 | ServiceTooBusy, 136 | URLError, 137 | ) as ex: 138 | fail = ex 139 | if ((time.time() + recognition_delay) - start_time) > recognition_time: 140 | break 141 | time.sleep(recognition_delay) 142 | if isinstance(fail, (ServiceTooBusy, SolutionNotReady)): 143 | raise SolutionTimeoutError( 144 | "Captcha is not ready after" " %s seconds" % recognition_time 145 | ) 146 | raise SolutionTimeoutError("Service is not available." " Error: %s" % fail) 147 | 148 | def solve_captcha( 149 | self, 150 | data: bytes, 151 | submiting_time: float = 30, 152 | submiting_delay: float = 3, 153 | recognition_time: float = 120, 154 | recognition_delay: float = 5, 155 | **kwargs: Any, 156 | ) -> str: 157 | assert submiting_time >= 0 158 | assert submiting_delay >= 0 159 | assert recognition_time >= 0 160 | assert recognition_delay >= 0 161 | captcha_id = self.submit_captcha_with_retry( 162 | submiting_time, submiting_delay, data, **kwargs 163 | ) 164 | return self.check_solution_with_retry( 165 | recognition_time, recognition_delay, captcha_id 166 | ) 167 | --------------------------------------------------------------------------------