├── .deepsource.toml ├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── publish-package.yml │ ├── ruff.yml │ └── test-package.yml ├── .gitignore ├── LICENSE ├── README.md ├── examples ├── recaptchav2 │ ├── async_solve_audio.py │ ├── async_solve_image.py │ ├── solve_with_sitekey.py │ ├── sync_solve_audio.py │ └── sync_solve_image.py └── recaptchav3 │ ├── async_solve.py │ └── sync_solve.py ├── playwright_recaptcha ├── __init__.py ├── errors.py ├── recaptchav2 │ ├── __init__.py │ ├── async_solver.py │ ├── base_solver.py │ ├── recaptcha_box.py │ ├── sync_solver.py │ └── translations.py └── recaptchav3 │ ├── __init__.py │ ├── async_solver.py │ ├── base_solver.py │ └── sync_solver.py ├── pytest.ini ├── requirements.txt ├── setup.py └── tests ├── test_async_recaptchav2.py ├── test_async_recaptchav3.py ├── test_sync_recaptchav2.py └── test_sync_recaptchav3.py /.deepsource.toml: -------------------------------------------------------------------------------- 1 | version = 1 2 | 3 | [[analyzers]] 4 | name = "python" 5 | 6 | [analyzers.meta] 7 | runtime_version = "3.x.x" 8 | 9 | [[transformers]] 10 | name = "black" -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | - package-ecosystem: "pip" 5 | directory: "/" 6 | schedule: 7 | interval: "daily" 8 | time: "03:00" 9 | timezone: "America/Chicago" 10 | -------------------------------------------------------------------------------- /.github/workflows/publish-package.yml: -------------------------------------------------------------------------------- 1 | name: Publish Package to PyPI 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | publish-package: 12 | name: Publish Package to PyPI 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: 3.x 21 | 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install -U pip 25 | pip install build 26 | 27 | - name: Build package 28 | run: python -m build 29 | 30 | - name: Publish package to PyPI 31 | uses: pypa/gh-action-pypi-publish@release/v1 32 | with: 33 | user: __token__ 34 | password: ${{ secrets.PYPI_API_TOKEN }} 35 | -------------------------------------------------------------------------------- /.github/workflows/ruff.yml: -------------------------------------------------------------------------------- 1 | name: Analyze With Ruff 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | permissions: 10 | actions: read 11 | contents: read 12 | security-events: write 13 | 14 | jobs: 15 | analyze-with-ruff: 16 | name: Analyze With Ruff 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: actions/setup-python@v5 22 | with: 23 | python-version: 3.x 24 | 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install -U pip 28 | pip install ruff 29 | 30 | - name: Run ruff 31 | run: ruff check 32 | --no-cache 33 | --exit-zero 34 | --output-format sarif > ruff-results.sarif 35 | 36 | - name: Upload ruff results to GitHub 37 | uses: github/codeql-action/upload-sarif@v2 38 | with: 39 | sarif_file: ruff-results.sarif 40 | wait-for-processing: true 41 | -------------------------------------------------------------------------------- /.github/workflows/test-package.yml: -------------------------------------------------------------------------------- 1 | name: Test Package 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - playwright_recaptcha/** 9 | pull_request: 10 | paths: 11 | - playwright_recaptcha/** 12 | 13 | jobs: 14 | test-package: 15 | name: Test Package With Python ${{ matrix.python-version }} 16 | runs-on: ubuntu-latest 17 | strategy: 18 | fail-fast: false 19 | max-parallel: 1 20 | matrix: 21 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v5 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | 30 | - name: Install dependencies 31 | run: | 32 | sudo apt-get update 33 | sudo apt-get install -y ffmpeg 34 | python -m pip install -U pip 35 | pip install -r requirements.txt . 36 | playwright install --with-deps firefox 37 | 38 | - name: Test with pytest 39 | run: pytest 40 | env: 41 | CAPSOLVER_API_KEY: ${{ secrets.CAPSOLVER_API_KEY }} 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ruff 2 | .ruff_cache/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # poetry 101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 102 | # This is especially recommended for binary packages to ensure reproducibility, and is more 103 | # commonly ignored for libraries. 104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 105 | #poetry.lock 106 | 107 | # pdm 108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 109 | #pdm.lock 110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 111 | # in version control. 112 | # https://pdm.fming.dev/#use-with-ide 113 | .pdm.toml 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | # pytype static type analyzer 153 | .pytype/ 154 | 155 | # Cython debug symbols 156 | cython_debug/ 157 | 158 | # PyCharm 159 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 161 | # and can be added to the global gitignore or merged into this file. For a more nuclear 162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 163 | #.idea/ 164 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Xewdy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Python](https://img.shields.io/pypi/pyversions/playwright-recaptcha.svg)](https://www.python.org/downloads/) 2 | [![PyPI](https://img.shields.io/pypi/v/playwright-recaptcha.svg)](https://pypi.org/project/playwright-recaptcha/) 3 | [![Downloads](https://img.shields.io/pypi/dm/playwright-recaptcha.svg)](https://pypi.org/project/playwright-recaptcha/) 4 | [![License](https://img.shields.io/badge/license-MIT-red)](https://github.com/Xewdy444/Playwright-reCAPTCHA/blob/main/LICENSE) 5 | 6 | --- 7 | 8 |
9 | 10 | 11 | 12 |
13 | Capsolver.com is an AI-powered service that specializes in solving various types of captchas automatically. It supports captchas such as reCAPTCHA V2, reCAPTCHA V3, hCaptcha, FunCaptcha, DataDome, AWS Captcha, Geetest, and Cloudflare Captcha / Challenge 5s, Imperva / Incapsula, among others. 14 | For developers, Capsolver offers API integration options detailed in their documentation, facilitating the integration of captcha solving into applications. They also provide browser extensions for Chrome and Firefox, making it easy to use their service directly within a browser. Different pricing packages are available to accommodate varying needs, ensuring flexibility for users. 15 |
16 | 17 | --- 18 | 19 | # Playwright-reCAPTCHA 20 | A Python library for solving reCAPTCHA v2 and v3 with Playwright. 21 | 22 | ## Solving reCAPTCHA v2 23 | reCAPTCHA v2 is solved by using the following methods: 24 | 25 | - Solving the audio challenge by transcribing the audio using the Google speech recognition API and entering the text as the response. 26 | - Solving the image challenge using the [CapSolver](https://www.capsolver.com/?utm_source=github&utm_medium=banner_github&utm_campaign=Playwright-reCAPTCHA) API for image classification. 27 | 28 | ## Solving reCAPTCHA v3 29 | The solving of reCAPTCHA v3 is done by the browser itself, so this library simply waits for the browser to make a POST request to https://www.google.com/recaptcha/api2/reload or https://www.google.com/recaptcha/enterprise/reload and parses the response to get the `g-recaptcha-response` token. 30 | 31 | --- 32 | 33 | All solvers return the `g-recaptcha-response` token, which is required for form submissions. If you are unsure about the version of reCAPTCHA being used, you can check out [this blog post](https://www.capsolver.com/blog/reCAPTCHA/identify-what-recaptcha-version-is-being-used) for more information. 34 | 35 | ## Installation 36 | pip install playwright-recaptcha 37 | 38 | This library requires FFmpeg to be installed on your system for the transcription of reCAPTCHA v2 audio challenges. 39 | 40 | | OS | Command | 41 | | :-----: | :--------------------: | 42 | | Debian | apt-get install ffmpeg | 43 | | MacOS | brew install ffmpeg | 44 | | Windows | winget install ffmpeg | 45 | 46 | You can also download the latest static build from [here](https://ffmpeg.org/download.html). 47 | 48 | > **Note** 49 | > Make sure to have the ffmpeg and ffprobe binaries in your system's PATH so that pydub can find them. 50 | 51 | ## Supported Languages 52 | - Chinese (zh-CN) 53 | - Dutch (nl) 54 | - English (en) 55 | - French (fr) 56 | - German (de) 57 | - Italian (it) 58 | - Portuguese (pt) 59 | - Russian (ru) 60 | - Spanish (es) 61 | 62 | If you would like to request support for a new language, please open an issue. You can also open a pull request if you would like to contribute. 63 | 64 | ## reCAPTCHA v2 Example 65 | For more reCAPTCHA v2 examples, see the [examples folder](https://github.com/Xewdy444/Playwright-reCAPTCHA/tree/main/examples/recaptchav2). 66 | 67 | ```python 68 | from playwright.sync_api import sync_playwright 69 | from playwright_recaptcha import recaptchav2 70 | 71 | with sync_playwright() as playwright: 72 | browser = playwright.firefox.launch() 73 | page = browser.new_page() 74 | page.goto("https://www.google.com/recaptcha/api2/demo") 75 | 76 | with recaptchav2.SyncSolver(page) as solver: 77 | token = solver.solve_recaptcha(wait=True) 78 | print(token) 79 | ``` 80 | 81 | By default, the audio challenge will be solved. If you would like to solve the image challenge, you can set the `CAPSOLVER_API_KEY` environment variable to your [CapSolver](https://www.capsolver.com/?utm_source=github&utm_medium=banner_github&utm_campaign=Playwright-reCAPTCHA) API key. You can also pass the API key as an argument to `recaptchav2.SyncSolver()` with `capsolver_api_key="your_api_key"`. Then, set `image_challenge=True` in `solver.solve_recaptcha()`. 82 | 83 | ```python 84 | with recaptchav2.SyncSolver(page, capsolver_api_key="your_api_key") as solver: 85 | token = solver.solve_recaptcha(wait=True, image_challenge=True) 86 | print(token) 87 | ``` 88 | 89 | ## reCAPTCHA v3 Example 90 | For more reCAPTCHA v3 examples, see the [examples folder](https://github.com/Xewdy444/Playwright-reCAPTCHA/tree/main/examples/recaptchav3). 91 | 92 | ```python 93 | from playwright.sync_api import sync_playwright 94 | from playwright_recaptcha import recaptchav3 95 | 96 | with sync_playwright() as playwright: 97 | browser = playwright.firefox.launch() 98 | page = browser.new_page() 99 | 100 | with recaptchav3.SyncSolver(page) as solver: 101 | page.goto("https://antcpt.com/score_detector/") 102 | token = solver.solve_recaptcha() 103 | print(token) 104 | ``` 105 | 106 | It is best to initialize the solver before navigating to the page with the reCAPTCHA v3 challenge. This is because the solver adds a listener for the POST request to https://www.google.com/recaptcha/api2/reload or https://www.google.com/recaptcha/enterprise/reload and if the request is made before the listener is added, the `g-recaptcha-response` token will not be captured. 107 | 108 | 109 | ## Disclaimer 110 | This library is intended for use in automated testing and development environments only and should not be used for any illegal or malicious purposes. Any use of this library for activities that violate the terms of service of any website or service is strictly prohibited. The contributors of this library will not be held liable for any damages or legal issues that may arise from the use of this library. By using this library, you agree to these terms and take full responsibility for your actions. 111 | -------------------------------------------------------------------------------- /examples/recaptchav2/async_solve_audio.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from playwright.async_api import async_playwright 4 | 5 | from playwright_recaptcha import recaptchav2 6 | 7 | 8 | async def main() -> None: 9 | async with async_playwright() as playwright: 10 | browser = await playwright.firefox.launch() 11 | page = await browser.new_page() 12 | await page.goto("https://www.google.com/recaptcha/api2/demo") 13 | 14 | async with recaptchav2.AsyncSolver(page) as solver: 15 | token = await solver.solve_recaptcha(wait=True) 16 | print(token) 17 | 18 | 19 | if __name__ == "__main__": 20 | asyncio.run(main()) 21 | -------------------------------------------------------------------------------- /examples/recaptchav2/async_solve_image.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from playwright.async_api import async_playwright 4 | 5 | from playwright_recaptcha import recaptchav2 6 | 7 | 8 | async def main() -> None: 9 | async with async_playwright() as playwright: 10 | browser = await playwright.firefox.launch() 11 | page = await browser.new_page() 12 | await page.goto("https://www.google.com/recaptcha/api2/demo") 13 | 14 | async with recaptchav2.AsyncSolver(page) as solver: 15 | token = await solver.solve_recaptcha(wait=True, image_challenge=True) 16 | print(token) 17 | 18 | 19 | if __name__ == "__main__": 20 | asyncio.run(main()) 21 | -------------------------------------------------------------------------------- /examples/recaptchav2/solve_with_sitekey.py: -------------------------------------------------------------------------------- 1 | from playwright.sync_api import sync_playwright 2 | 3 | from playwright_recaptcha import recaptchav2 4 | 5 | RECAPTCHA_HTML = """ 6 | 7 | 8 | 9 | 11 | 12 | 13 |
16 | 17 | 18 | """ 19 | 20 | 21 | def main() -> None: 22 | with sync_playwright() as playwright: 23 | browser = playwright.firefox.launch() 24 | page = browser.new_page() 25 | 26 | # It is important to load a website before setting the reCAPTCHA HTML. 27 | # If you don't, the reCAPTCHA will give you an "Invalid domain for site key" error. 28 | page.goto("https://www.google.com/", wait_until="commit") 29 | 30 | page.set_content( 31 | RECAPTCHA_HTML.format(sitekey="6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-") 32 | ) 33 | 34 | with recaptchav2.SyncSolver(page) as solver: 35 | token = solver.solve_recaptcha(wait=True) 36 | print(token) 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /examples/recaptchav2/sync_solve_audio.py: -------------------------------------------------------------------------------- 1 | from playwright.sync_api import sync_playwright 2 | 3 | from playwright_recaptcha import recaptchav2 4 | 5 | 6 | def main() -> None: 7 | with sync_playwright() as playwright: 8 | browser = playwright.firefox.launch() 9 | page = browser.new_page() 10 | page.goto("https://www.google.com/recaptcha/api2/demo") 11 | 12 | with recaptchav2.SyncSolver(page) as solver: 13 | token = solver.solve_recaptcha(wait=True) 14 | print(token) 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /examples/recaptchav2/sync_solve_image.py: -------------------------------------------------------------------------------- 1 | from playwright.sync_api import sync_playwright 2 | 3 | from playwright_recaptcha import recaptchav2 4 | 5 | 6 | def main() -> None: 7 | with sync_playwright() as playwright: 8 | browser = playwright.firefox.launch() 9 | page = browser.new_page() 10 | page.goto("https://www.google.com/recaptcha/api2/demo") 11 | 12 | with recaptchav2.SyncSolver(page) as solver: 13 | token = solver.solve_recaptcha(wait=True, image_challenge=True) 14 | print(token) 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /examples/recaptchav3/async_solve.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import re 3 | 4 | from playwright.async_api import async_playwright 5 | 6 | from playwright_recaptcha import recaptchav3 7 | 8 | 9 | async def main() -> None: 10 | async with async_playwright() as playwright: 11 | browser = await playwright.firefox.launch() 12 | page = await browser.new_page() 13 | 14 | async with recaptchav3.AsyncSolver(page) as solver: 15 | await page.goto("https://antcpt.com/score_detector/") 16 | token = await solver.solve_recaptcha() 17 | print(token) 18 | 19 | score_pattern = re.compile(r"Your score is: (\d\.\d)") 20 | score_locator = page.get_by_text(score_pattern) 21 | print(await score_locator.inner_text()) 22 | 23 | 24 | if __name__ == "__main__": 25 | asyncio.run(main()) 26 | -------------------------------------------------------------------------------- /examples/recaptchav3/sync_solve.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from playwright.sync_api import sync_playwright 4 | 5 | from playwright_recaptcha import recaptchav3 6 | 7 | 8 | def main() -> None: 9 | with sync_playwright() as playwright: 10 | browser = playwright.firefox.launch() 11 | page = browser.new_page() 12 | 13 | with recaptchav3.SyncSolver(page) as solver: 14 | page.goto("https://antcpt.com/score_detector/") 15 | token = solver.solve_recaptcha() 16 | print(token) 17 | 18 | score_pattern = re.compile(r"Your score is: (\d\.\d)") 19 | score_locator = page.get_by_text(score_pattern) 20 | print(score_locator.inner_text()) 21 | 22 | 23 | if __name__ == "__main__": 24 | main() 25 | -------------------------------------------------------------------------------- /playwright_recaptcha/__init__.py: -------------------------------------------------------------------------------- 1 | """A library for solving reCAPTCHA v2 and v3 with Playwright.""" 2 | 3 | __author__ = "Xewdy444" 4 | __version__ = "0.5.1" 5 | __license__ = "MIT" 6 | 7 | from .errors import ( 8 | CapSolverError, 9 | RecaptchaError, 10 | RecaptchaNotFoundError, 11 | RecaptchaRateLimitError, 12 | RecaptchaSolveError, 13 | RecaptchaTimeoutError, 14 | ) 15 | from .recaptchav2 import AsyncSolver as AsyncSolverV2 16 | from .recaptchav2 import SyncSolver as SyncSolverV2 17 | from .recaptchav3 import AsyncSolver as AsyncSolverV3 18 | from .recaptchav3 import SyncSolver as SyncSolverV3 19 | 20 | __all__ = [ 21 | "CapSolverError", 22 | "RecaptchaError", 23 | "RecaptchaNotFoundError", 24 | "RecaptchaRateLimitError", 25 | "RecaptchaSolveError", 26 | "RecaptchaTimeoutError", 27 | "AsyncSolverV2", 28 | "SyncSolverV2", 29 | "AsyncSolverV3", 30 | "SyncSolverV3", 31 | ] 32 | -------------------------------------------------------------------------------- /playwright_recaptcha/errors.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | 4 | class CapSolverError(Exception): 5 | """An exception raised when the CapSolver API returns an error.""" 6 | 7 | def __init__(self, message: Optional[str] = None) -> None: 8 | super().__init__(message or "The CapSolver API returned an error.") 9 | 10 | 11 | class RecaptchaError(Exception): 12 | """Base class for reCAPTCHA exceptions.""" 13 | 14 | 15 | class RecaptchaNotFoundError(RecaptchaError): 16 | """An exception raised when the reCAPTCHA was not found.""" 17 | 18 | def __init__(self, message: Optional[str] = None) -> None: 19 | super().__init__(message or "The reCAPTCHA was not found.") 20 | 21 | 22 | class RecaptchaSolveError(RecaptchaError): 23 | """Base class for reCAPTCHA solve exceptions.""" 24 | 25 | def __init__(self, message: Optional[str] = None) -> None: 26 | super().__init__(message or "The reCAPTCHA could not be solved.") 27 | 28 | 29 | class RecaptchaRateLimitError(RecaptchaSolveError): 30 | """An exception raised when the reCAPTCHA rate limit has been exceeded.""" 31 | 32 | def __init__(self) -> None: 33 | super().__init__("The reCAPTCHA rate limit has been exceeded.") 34 | 35 | 36 | class RecaptchaTimeoutError(RecaptchaSolveError): 37 | """An exception raised when the reCAPTCHA solve timeout has been exceeded.""" 38 | 39 | def __init__(self) -> None: 40 | super().__init__("The reCAPTCHA solve timeout has been exceeded.") 41 | -------------------------------------------------------------------------------- /playwright_recaptcha/recaptchav2/__init__.py: -------------------------------------------------------------------------------- 1 | """reCAPTCHA v2 solver for Playwright.""" 2 | from .async_solver import AsyncSolver 3 | from .sync_solver import SyncSolver 4 | 5 | __all__ = ["AsyncSolver", "SyncSolver"] 6 | -------------------------------------------------------------------------------- /playwright_recaptcha/recaptchav2/async_solver.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | import base64 5 | import functools 6 | import re 7 | import time 8 | from concurrent.futures import ThreadPoolExecutor 9 | from datetime import datetime 10 | from io import BytesIO 11 | from json import JSONDecodeError 12 | from typing import Any, BinaryIO, Dict, List, Optional, Union 13 | from urllib.parse import parse_qs, urlparse 14 | 15 | import speech_recognition 16 | from playwright.async_api import Locator, Page, Response 17 | from pydub import AudioSegment 18 | from pydub.exceptions import CouldntDecodeError 19 | from tenacity import ( 20 | AsyncRetrying, 21 | retry_if_exception_type, 22 | stop_after_delay, 23 | wait_fixed, 24 | ) 25 | 26 | from ..errors import ( 27 | CapSolverError, 28 | RecaptchaNotFoundError, 29 | RecaptchaRateLimitError, 30 | RecaptchaSolveError, 31 | ) 32 | from .base_solver import BaseSolver 33 | from .recaptcha_box import AsyncRecaptchaBox 34 | from .translations import OBJECT_TRANSLATIONS, ORIGINAL_LANGUAGE_AUDIO 35 | 36 | 37 | class AsyncAudioFile(speech_recognition.AudioFile): 38 | """ 39 | A subclass of `speech_recognition.AudioFile` that can be used asynchronously. 40 | 41 | Parameters 42 | ---------- 43 | file : Union[BinaryIO, str] 44 | The audio file handle or file path. 45 | executor : Optional[ThreadPoolExecutor], optional 46 | The thread pool executor to use, by default None. 47 | """ 48 | 49 | def __init__( 50 | self, 51 | file: Union[BinaryIO, str], 52 | *, 53 | executor: Optional[ThreadPoolExecutor] = None, 54 | ) -> None: 55 | super().__init__(file) 56 | self._loop = asyncio.get_event_loop() 57 | self._executor = executor 58 | 59 | async def __aenter__(self) -> AsyncAudioFile: 60 | await self._loop.run_in_executor(self._executor, self.__enter__) 61 | return self 62 | 63 | async def __aexit__(self, *args: Any) -> None: 64 | await self._loop.run_in_executor(self._executor, self.__exit__, *args) 65 | 66 | 67 | class AsyncSolver(BaseSolver[Page]): 68 | """ 69 | A class for solving reCAPTCHA v2 asynchronously with Playwright. 70 | 71 | Parameters 72 | ---------- 73 | page : Page 74 | The Playwright page to solve the reCAPTCHA on. 75 | attempts : int, optional 76 | The number of solve attempts, by default 5. 77 | capsolver_api_key : Optional[str], optional 78 | The CapSolver API key, by default None. 79 | If None, the `CAPSOLVER_API_KEY` environment variable will be used. 80 | """ 81 | 82 | async def __aenter__(self) -> AsyncSolver: 83 | return self 84 | 85 | async def __aexit__(self, *_: Any) -> None: 86 | self.close() 87 | 88 | @staticmethod 89 | async def _get_task_object(recaptcha_box: AsyncRecaptchaBox) -> Optional[str]: 90 | """ 91 | Get the ID of the object in the reCAPTCHA image challenge task. 92 | 93 | Parameters 94 | ---------- 95 | recaptcha_box : AsyncRecaptchaBox 96 | The reCAPTCHA box. 97 | 98 | Returns 99 | ------- 100 | Optional[str] 101 | The object ID. Returns None if the task object is not recognized. 102 | """ 103 | object_dict = { 104 | "/m/0pg52": OBJECT_TRANSLATIONS["taxis"], 105 | "/m/01bjv": OBJECT_TRANSLATIONS["bus"], 106 | "/m/04_sv": OBJECT_TRANSLATIONS["motorcycles"], 107 | "/m/013xlm": OBJECT_TRANSLATIONS["tractors"], 108 | "/m/01jk_4": OBJECT_TRANSLATIONS["chimneys"], 109 | "/m/014xcs": OBJECT_TRANSLATIONS["crosswalks"], 110 | "/m/015qff": OBJECT_TRANSLATIONS["traffic_lights"], 111 | "/m/0199g": OBJECT_TRANSLATIONS["bicycles"], 112 | "/m/015qbp": OBJECT_TRANSLATIONS["parking_meters"], 113 | "/m/0k4j": OBJECT_TRANSLATIONS["cars"], 114 | "/m/015kr": OBJECT_TRANSLATIONS["bridges"], 115 | "/m/019jd": OBJECT_TRANSLATIONS["boats"], 116 | "/m/0cdl1": OBJECT_TRANSLATIONS["palm_trees"], 117 | "/m/09d_r": OBJECT_TRANSLATIONS["mountains_or_hills"], 118 | "/m/01pns0": OBJECT_TRANSLATIONS["fire_hydrant"], 119 | "/m/01lynh": OBJECT_TRANSLATIONS["stairs"], 120 | } 121 | 122 | task = await recaptcha_box.bframe_frame.locator("div").all_inner_texts() 123 | object_ = task[0].split("\n")[1] 124 | 125 | for object_id, translations in object_dict.items(): 126 | if object_ in translations: 127 | return object_id 128 | 129 | return None 130 | 131 | async def _response_callback(self, response: Response) -> None: 132 | """ 133 | The callback for intercepting payload and userverify responses. 134 | 135 | Parameters 136 | ---------- 137 | response : Response 138 | The response. 139 | """ 140 | if ( 141 | re.search("/recaptcha/(api2|enterprise)/payload", response.url) is not None 142 | and self._payload_response is None 143 | ): 144 | self._payload_response = response 145 | elif ( 146 | re.search("/recaptcha/(api2|enterprise)/userverify", response.url) 147 | is not None 148 | ): 149 | token_match = re.search('"uvresp","(.*?)"', await response.text()) 150 | 151 | if token_match is not None: 152 | self._token = token_match.group(1) 153 | 154 | async def _get_capsolver_response( 155 | self, recaptcha_box: AsyncRecaptchaBox, image_data: bytes 156 | ) -> Optional[Dict[str, Any]]: 157 | """ 158 | Get the CapSolver JSON response for an image. 159 | 160 | Parameters 161 | ---------- 162 | recaptcha_box : AsyncRecaptchaBox 163 | The reCAPTCHA box. 164 | image_data : bytes 165 | The image data. 166 | 167 | Returns 168 | ------- 169 | Optional[Dict[str, Any]] 170 | The CapSolver JSON response. 171 | Returns None if the task object is not recognized. 172 | 173 | Raises 174 | ------ 175 | CapSolverError 176 | If the CapSolver API returned an error. 177 | """ 178 | image = base64.b64encode(image_data).decode("utf-8") 179 | task_object = await self._get_task_object(recaptcha_box) 180 | 181 | if task_object is None: 182 | return None 183 | 184 | payload = { 185 | "clientKey": self._capsolver_api_key, 186 | "task": { 187 | "type": "ReCaptchaV2Classification", 188 | "image": image, 189 | "question": task_object, 190 | }, 191 | } 192 | 193 | response = await self._page.request.post( 194 | "https://api.capsolver.com/createTask", data=payload 195 | ) 196 | 197 | try: 198 | response_json = await response.json() 199 | except JSONDecodeError as err: 200 | raise CapSolverError from err 201 | 202 | if response_json["errorId"] != 0: 203 | raise CapSolverError(response_json["errorDescription"]) 204 | 205 | return response_json 206 | 207 | async def _solve_tiles( 208 | self, recaptcha_box: AsyncRecaptchaBox, indexes: List[int] 209 | ) -> None: 210 | """ 211 | Solve the tiles in the reCAPTCHA image challenge. 212 | 213 | Parameters 214 | ---------- 215 | recaptcha_box : AsyncRecaptchaBox 216 | The reCAPTCHA box. 217 | indexes : List[int] 218 | The indexes of the tiles that contain the task object. 219 | 220 | Raises 221 | ------ 222 | CapSolverError 223 | If the CapSolver API returned an error. 224 | """ 225 | changing_tiles: Dict[Locator, str] = {} 226 | indexes = indexes.copy() 227 | 228 | style_script = """ 229 | (element) => { 230 | element.style = ""; 231 | element.className = "rc-imageselect-tile"; 232 | } 233 | """ 234 | 235 | for index in indexes: 236 | tile = recaptcha_box.tile_selector.nth(index) 237 | await tile.click() 238 | 239 | if "rc-imageselect-dynamic-selected" not in await tile.get_attribute( 240 | "class" 241 | ): 242 | continue 243 | 244 | changing_tiles[tile] = await tile.locator("img").get_attribute("src") 245 | await tile.evaluate(style_script) 246 | 247 | start_time = datetime.now() 248 | 249 | while changing_tiles and (datetime.now() - start_time).seconds < 60: 250 | for tile in changing_tiles.copy(): 251 | image_url = await tile.locator("img").get_attribute("src") 252 | 253 | if changing_tiles[tile] == image_url: 254 | continue 255 | 256 | changing_tiles[tile] = image_url 257 | response = await self._page.request.get(image_url) 258 | 259 | capsolver_response = await self._get_capsolver_response( 260 | recaptcha_box, await response.body() 261 | ) 262 | 263 | if ( 264 | capsolver_response is None 265 | or not capsolver_response["solution"]["hasObject"] 266 | ): 267 | changing_tiles.pop(tile) 268 | continue 269 | 270 | await tile.click() 271 | await tile.evaluate(style_script) 272 | 273 | async def _transcribe_audio( 274 | self, audio_url: str, *, language: str = "en-US" 275 | ) -> Optional[str]: 276 | """ 277 | Transcribe the reCAPTCHA audio challenge. 278 | 279 | Parameters 280 | ---------- 281 | audio_url : str 282 | The reCAPTCHA audio URL. 283 | language : str, optional 284 | The language of the audio, by default en-US. 285 | 286 | Returns 287 | ------- 288 | Optional[str] 289 | The reCAPTCHA audio text. 290 | Returns None if the audio could not be converted. 291 | """ 292 | loop = asyncio.get_event_loop() 293 | response = await self._page.request.get(audio_url) 294 | 295 | wav_audio = BytesIO() 296 | mp3_audio = BytesIO(await response.body()) 297 | 298 | try: 299 | audio: AudioSegment = await loop.run_in_executor( 300 | None, AudioSegment.from_mp3, mp3_audio 301 | ) 302 | except CouldntDecodeError: 303 | return None 304 | 305 | await loop.run_in_executor( 306 | None, functools.partial(audio.export, wav_audio, format="wav") 307 | ) 308 | 309 | recognizer = speech_recognition.Recognizer() 310 | 311 | async with AsyncAudioFile(wav_audio) as source: 312 | audio_data = await loop.run_in_executor(None, recognizer.record, source) 313 | 314 | try: 315 | return await loop.run_in_executor( 316 | None, 317 | functools.partial( 318 | recognizer.recognize_google, audio_data, language=language 319 | ), 320 | ) 321 | except speech_recognition.UnknownValueError: 322 | return None 323 | 324 | async def _click_checkbox(self, recaptcha_box: AsyncRecaptchaBox) -> None: 325 | """ 326 | Click the reCAPTCHA checkbox. 327 | 328 | Parameters 329 | ---------- 330 | recaptcha_box : AsyncRecaptchaBox 331 | The reCAPTCHA box. 332 | 333 | Raises 334 | ------ 335 | RecaptchaRateLimitError 336 | If the reCAPTCHA rate limit has been exceeded. 337 | """ 338 | await recaptcha_box.checkbox.click() 339 | 340 | while recaptcha_box.frames_are_attached() and self._token is None: 341 | if await recaptcha_box.rate_limit_is_visible(): 342 | raise RecaptchaRateLimitError 343 | 344 | if await recaptcha_box.any_challenge_is_visible(): 345 | return 346 | 347 | await self._page.wait_for_timeout(250) 348 | 349 | async def _get_audio_url(self, recaptcha_box: AsyncRecaptchaBox) -> str: 350 | """ 351 | Get the reCAPTCHA audio URL. 352 | 353 | Parameters 354 | ---------- 355 | recaptcha_box : AsyncRecaptchaBox 356 | The reCAPTCHA box. 357 | 358 | Returns 359 | ------- 360 | str 361 | The reCAPTCHA audio URL. 362 | 363 | Raises 364 | ------ 365 | RecaptchaRateLimitError 366 | If the reCAPTCHA rate limit has been exceeded. 367 | """ 368 | while True: 369 | if await recaptcha_box.rate_limit_is_visible(): 370 | raise RecaptchaRateLimitError 371 | 372 | if await recaptcha_box.audio_challenge_is_visible(): 373 | return await recaptcha_box.audio_download_button.get_attribute("href") 374 | 375 | await self._page.wait_for_timeout(250) 376 | 377 | async def _submit_audio_text( 378 | self, recaptcha_box: AsyncRecaptchaBox, text: str 379 | ) -> None: 380 | """ 381 | Submit the reCAPTCHA audio text. 382 | 383 | Parameters 384 | ---------- 385 | recaptcha_box : AsyncRecaptchaBox 386 | The reCAPTCHA box. 387 | text : str 388 | The reCAPTCHA audio text. 389 | 390 | Raises 391 | ------ 392 | RecaptchaRateLimitError 393 | If the reCAPTCHA rate limit has been exceeded. 394 | """ 395 | await recaptcha_box.audio_challenge_textbox.fill(text) 396 | 397 | async with self._page.expect_response( 398 | re.compile("/recaptcha/(api2|enterprise)/userverify") 399 | ) as response: 400 | await recaptcha_box.verify_button.click() 401 | 402 | await response.value 403 | 404 | while recaptcha_box.frames_are_attached(): 405 | if await recaptcha_box.rate_limit_is_visible(): 406 | raise RecaptchaRateLimitError 407 | 408 | if ( 409 | not await recaptcha_box.audio_challenge_is_visible() 410 | or await recaptcha_box.solve_failure_is_visible() 411 | or await recaptcha_box.challenge_is_solved() 412 | ): 413 | return 414 | 415 | await self._page.wait_for_timeout(250) 416 | 417 | async def _submit_tile_answers(self, recaptcha_box: AsyncRecaptchaBox) -> None: 418 | """ 419 | Submit the reCAPTCHA image challenge tile answers. 420 | 421 | Parameters 422 | ---------- 423 | recaptcha_box : AsyncRecaptchaBox 424 | The reCAPTCHA box. 425 | 426 | Raises 427 | ------ 428 | RecaptchaRateLimitError 429 | If the reCAPTCHA rate limit has been exceeded. 430 | """ 431 | await recaptcha_box.verify_button.click() 432 | 433 | while recaptcha_box.frames_are_attached(): 434 | if await recaptcha_box.rate_limit_is_visible(): 435 | raise RecaptchaRateLimitError 436 | 437 | if ( 438 | await recaptcha_box.challenge_is_solved() 439 | or await recaptcha_box.try_again_is_visible() 440 | ): 441 | return 442 | 443 | if ( 444 | await recaptcha_box.check_new_images_is_visible() 445 | or await recaptcha_box.select_all_matching_is_visible() 446 | ): 447 | async with self._page.expect_response( 448 | re.compile("/recaptcha/(api2|enterprise)/payload") 449 | ) as response: 450 | await recaptcha_box.new_challenge_button.click() 451 | 452 | await response.value 453 | return 454 | 455 | await self._page.wait_for_timeout(250) 456 | 457 | async def _solve_image_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None: 458 | """ 459 | Solve the reCAPTCHA image challenge. 460 | 461 | Parameters 462 | ---------- 463 | recaptcha_box : AsyncRecaptchaBox 464 | The reCAPTCHA box. 465 | 466 | Raises 467 | ------ 468 | CapSolverError 469 | If the CapSolver API returned an error. 470 | RecaptchaRateLimitError 471 | If the reCAPTCHA rate limit has been exceeded. 472 | """ 473 | while recaptcha_box.frames_are_attached(): 474 | capsolver_response = await self._get_capsolver_response( 475 | recaptcha_box, await self._payload_response.body() 476 | ) 477 | 478 | if ( 479 | capsolver_response is None 480 | or not capsolver_response["solution"]["objects"] 481 | ): 482 | self._payload_response = None 483 | 484 | async with self._page.expect_response( 485 | re.compile("/recaptcha/(api2|enterprise)/reload") 486 | ) as response: 487 | await recaptcha_box.new_challenge_button.click() 488 | 489 | await response.value 490 | 491 | while self._payload_response is None: 492 | if await recaptcha_box.rate_limit_is_visible(): 493 | raise RecaptchaRateLimitError 494 | 495 | await self._page.wait_for_timeout(250) 496 | 497 | continue 498 | 499 | await self._solve_tiles( 500 | recaptcha_box, capsolver_response["solution"]["objects"] 501 | ) 502 | 503 | self._payload_response = None 504 | button = recaptcha_box.skip_button.or_(recaptcha_box.next_button) 505 | 506 | if await button.is_hidden(): 507 | await self._submit_tile_answers(recaptcha_box) 508 | return 509 | 510 | async with self._page.expect_response( 511 | re.compile("/recaptcha/(api2|enterprise)/payload") 512 | ): 513 | await button.click() 514 | 515 | async def _solve_audio_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None: 516 | """ 517 | Solve the reCAPTCHA audio challenge. 518 | 519 | Parameters 520 | ---------- 521 | recaptcha_box : AsyncRecaptchaBox 522 | The reCAPTCHA box. 523 | 524 | Raises 525 | ------ 526 | RecaptchaRateLimitError 527 | If the reCAPTCHA rate limit has been exceeded. 528 | """ 529 | parsed_url = urlparse(recaptcha_box.anchor_frame.url) 530 | query_params = parse_qs(parsed_url.query) 531 | language = query_params["hl"][0] 532 | 533 | if language not in ORIGINAL_LANGUAGE_AUDIO: 534 | language = "en-US" 535 | 536 | while True: 537 | url = await self._get_audio_url(recaptcha_box) 538 | text = await self._transcribe_audio(url, language=language) 539 | 540 | if text is not None: 541 | break 542 | 543 | async with self._page.expect_response( 544 | re.compile("/recaptcha/(api2|enterprise)/reload") 545 | ) as response: 546 | await recaptcha_box.new_challenge_button.click() 547 | 548 | await response.value 549 | 550 | while url == await self._get_audio_url(recaptcha_box): 551 | await self._page.wait_for_timeout(250) 552 | 553 | await self._submit_audio_text(recaptcha_box, text) 554 | 555 | async def recaptcha_is_visible(self) -> bool: 556 | """ 557 | Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible. 558 | 559 | Returns 560 | ------- 561 | bool 562 | Whether a reCAPTCHA challenge or unchecked reCAPTCHA box is visible. 563 | """ 564 | try: 565 | await AsyncRecaptchaBox.from_frames(self._page.frames) 566 | except RecaptchaNotFoundError: 567 | return False 568 | 569 | return True 570 | 571 | async def solve_recaptcha( 572 | self, 573 | *, 574 | attempts: Optional[int] = None, 575 | wait: bool = False, 576 | wait_timeout: float = 30, 577 | image_challenge: bool = False, 578 | ) -> str: 579 | """ 580 | Solve the reCAPTCHA and return the `g-recaptcha-response` token. 581 | 582 | Parameters 583 | ---------- 584 | attempts : Optional[int], optional 585 | The number of solve attempts, by default 5. 586 | wait : bool, optional 587 | Whether to wait for the reCAPTCHA to appear, by default False. 588 | wait_timeout : float, optional 589 | The amount of time in seconds to wait for the reCAPTCHA to appear, 590 | by default 30. Only used if `wait` is True. 591 | image_challenge : bool, optional 592 | Whether to solve the image challenge, by default False. 593 | 594 | Returns 595 | ------- 596 | str 597 | The `g-recaptcha-response` token. 598 | 599 | Raises 600 | ------ 601 | CapSolverError 602 | If the CapSolver API returned an error. 603 | RecaptchaNotFoundError 604 | If the reCAPTCHA was not found. 605 | RecaptchaRateLimitError 606 | If the reCAPTCHA rate limit has been exceeded. 607 | RecaptchaSolveError 608 | If the reCAPTCHA could not be solved. 609 | """ 610 | if image_challenge and self._capsolver_api_key is None: 611 | raise CapSolverError( 612 | "You must provide a CapSolver API key to solve image challenges." 613 | ) 614 | 615 | self._token = None 616 | attempts = attempts or self._attempts 617 | 618 | if wait: 619 | retry = AsyncRetrying( 620 | sleep=self._page.wait_for_timeout, 621 | stop=stop_after_delay(wait_timeout), 622 | wait=wait_fixed(0.25), 623 | retry=retry_if_exception_type(RecaptchaNotFoundError), 624 | reraise=True, 625 | ) 626 | 627 | recaptcha_box = await retry( 628 | lambda: AsyncRecaptchaBox.from_frames(self._page.frames) 629 | ) 630 | else: 631 | recaptcha_box = await AsyncRecaptchaBox.from_frames(self._page.frames) 632 | 633 | if await recaptcha_box.rate_limit_is_visible(): 634 | raise RecaptchaRateLimitError 635 | 636 | if await recaptcha_box.checkbox.is_visible(): 637 | click_timestamp = time.time() 638 | await self._click_checkbox(recaptcha_box) 639 | 640 | if self._token is not None: 641 | return self._token 642 | 643 | if ( 644 | recaptcha_box.frames_are_detached() 645 | or not await recaptcha_box.any_challenge_is_visible() 646 | or await recaptcha_box.challenge_is_solved() 647 | ): 648 | while self._token is None: 649 | await self._page.wait_for_timeout(250) 650 | 651 | return self._token 652 | 653 | time_to_wait = max(1 - (time.time() - click_timestamp), 0) 654 | await self._page.wait_for_timeout(time_to_wait * 1000) 655 | 656 | while not await recaptcha_box.any_challenge_is_visible(): 657 | await self._page.wait_for_timeout(250) 658 | 659 | if image_challenge and await recaptcha_box.image_challenge_button.is_visible(): 660 | await recaptcha_box.image_challenge_button.click() 661 | elif ( 662 | not image_challenge 663 | and await recaptcha_box.audio_challenge_button.is_visible() 664 | ): 665 | await recaptcha_box.audio_challenge_button.click() 666 | 667 | if image_challenge: 668 | image = recaptcha_box.image_challenge.locator("img").first 669 | image_url = await image.get_attribute("src") 670 | self._payload_response = await self._page.request.get(image_url) 671 | 672 | while attempts > 0: 673 | self._token = None 674 | 675 | if image_challenge: 676 | await self._solve_image_challenge(recaptcha_box) 677 | else: 678 | await self._solve_audio_challenge(recaptcha_box) 679 | 680 | if ( 681 | recaptcha_box.frames_are_detached() 682 | or not await recaptcha_box.any_challenge_is_visible() 683 | or await recaptcha_box.challenge_is_solved() 684 | ): 685 | while self._token is None: 686 | await self._page.wait_for_timeout(250) 687 | 688 | return self._token 689 | 690 | attempts -= 1 691 | 692 | raise RecaptchaSolveError 693 | -------------------------------------------------------------------------------- /playwright_recaptcha/recaptchav2/base_solver.py: -------------------------------------------------------------------------------- 1 | import os 2 | from abc import ABC, abstractmethod 3 | from typing import Any, Dict, Generic, Iterable, Optional, TypeVar, Union 4 | 5 | from playwright.async_api import APIResponse as AsyncAPIResponse 6 | from playwright.async_api import Page as AsyncPage 7 | from playwright.async_api import Response as AsyncResponse 8 | from playwright.sync_api import APIResponse as SyncAPIResponse 9 | from playwright.sync_api import Page as SyncPage 10 | from playwright.sync_api import Response as SyncResponse 11 | 12 | from .recaptcha_box import RecaptchaBox 13 | 14 | PageT = TypeVar("PageT", AsyncPage, SyncPage) 15 | APIResponse = Union[AsyncAPIResponse, SyncAPIResponse] 16 | Response = Union[AsyncResponse, SyncResponse] 17 | 18 | 19 | class BaseSolver(ABC, Generic[PageT]): 20 | """ 21 | The base class for reCAPTCHA v2 solvers. 22 | 23 | Parameters 24 | ---------- 25 | page : PageT 26 | The Playwright page to solve the reCAPTCHA on. 27 | attempts : int, optional 28 | The number of solve attempts, by default 5. 29 | capsolver_api_key : Optional[str], optional 30 | The CapSolver API key, by default None. 31 | If None, the `CAPSOLVER_API_KEY` environment variable will be used. 32 | """ 33 | 34 | def __init__( 35 | self, page: PageT, *, attempts: int = 5, capsolver_api_key: Optional[str] = None 36 | ) -> None: 37 | self._page = page 38 | self._attempts = attempts 39 | self._capsolver_api_key = capsolver_api_key or os.getenv("CAPSOLVER_API_KEY") 40 | 41 | self._token: Optional[str] = None 42 | self._payload_response: Union[APIResponse, Response, None] = None 43 | self._page.on("response", self._response_callback) 44 | 45 | def __repr__(self) -> str: 46 | return ( 47 | f"{self.__class__.__name__}(page={self._page!r}, " 48 | f"attempts={self._attempts!r}, " 49 | f"capsolver_api_key={self._capsolver_api_key!r})" 50 | ) 51 | 52 | def close(self) -> None: 53 | """Remove the response listener.""" 54 | try: 55 | self._page.remove_listener("response", self._response_callback) 56 | except KeyError: 57 | pass 58 | 59 | @staticmethod 60 | @abstractmethod 61 | def _get_task_object(recaptcha_box: RecaptchaBox) -> Optional[str]: 62 | """ 63 | Get the ID of the object in the reCAPTCHA image challenge task. 64 | 65 | Parameters 66 | ---------- 67 | recaptcha_box : RecaptchaBox 68 | The reCAPTCHA box. 69 | 70 | Returns 71 | ------- 72 | Optional[str] 73 | The object ID. Returns None if the task object is not recognized. 74 | """ 75 | 76 | @abstractmethod 77 | def _response_callback(self, response: Response) -> None: 78 | """ 79 | The callback for intercepting payload and userverify responses. 80 | 81 | Parameters 82 | ---------- 83 | response : Response 84 | The response. 85 | """ 86 | 87 | @abstractmethod 88 | def _get_capsolver_response( 89 | self, recaptcha_box: RecaptchaBox, image_data: bytes 90 | ) -> Optional[Dict[str, Any]]: 91 | """ 92 | Get the CapSolver JSON response for an image. 93 | 94 | Parameters 95 | ---------- 96 | recaptcha_box : RecaptchaBox 97 | The reCAPTCHA box. 98 | image_data : bytes 99 | The image data. 100 | 101 | Returns 102 | ------- 103 | Optional[Dict[str, Any]] 104 | The CapSolver JSON response. 105 | Returns None if the task object is not recognized. 106 | 107 | Raises 108 | ------ 109 | CapSolverError 110 | If the CapSolver API returned an error. 111 | """ 112 | 113 | @abstractmethod 114 | def _solve_tiles(self, recaptcha_box: RecaptchaBox, indexes: Iterable[int]) -> None: 115 | """ 116 | Solve the tiles in the reCAPTCHA image challenge. 117 | 118 | Parameters 119 | ---------- 120 | recaptcha_box : RecaptchaBox 121 | The reCAPTCHA box. 122 | indexes : Iterable[int] 123 | The indexes of the tiles that contain the task object. 124 | 125 | Raises 126 | ------ 127 | CapSolverError 128 | If the CapSolver API returned an error. 129 | """ 130 | 131 | @abstractmethod 132 | def _transcribe_audio(self, audio_url: str, *, language: str) -> Optional[str]: 133 | """ 134 | Transcribe the reCAPTCHA audio challenge. 135 | 136 | Parameters 137 | ---------- 138 | audio_url : str 139 | The reCAPTCHA audio URL. 140 | language : str 141 | The language of the audio. 142 | 143 | Returns 144 | ------- 145 | Optional[str] 146 | The reCAPTCHA audio text. 147 | Returns None if the audio could not be converted. 148 | """ 149 | 150 | @abstractmethod 151 | def _click_checkbox(self, recaptcha_box: RecaptchaBox) -> None: 152 | """ 153 | Click the reCAPTCHA checkbox. 154 | 155 | Parameters 156 | ---------- 157 | recaptcha_box : RecaptchaBox 158 | The reCAPTCHA box. 159 | 160 | Raises 161 | ------ 162 | RecaptchaRateLimitError 163 | If the reCAPTCHA rate limit has been exceeded. 164 | """ 165 | 166 | @abstractmethod 167 | def _get_audio_url(self, recaptcha_box: RecaptchaBox) -> str: 168 | """ 169 | Get the reCAPTCHA audio URL. 170 | 171 | Parameters 172 | ---------- 173 | recaptcha_box : RecaptchaBox 174 | The reCAPTCHA box. 175 | 176 | Returns 177 | ------- 178 | str 179 | The reCAPTCHA audio URL. 180 | 181 | Raises 182 | ------ 183 | RecaptchaRateLimitError 184 | If the reCAPTCHA rate limit has been exceeded. 185 | """ 186 | 187 | @abstractmethod 188 | def _submit_audio_text(self, recaptcha_box: RecaptchaBox, text: str) -> None: 189 | """ 190 | Submit the reCAPTCHA audio text. 191 | 192 | Parameters 193 | ---------- 194 | recaptcha_box : RecaptchaBox 195 | The reCAPTCHA box. 196 | text : str 197 | The reCAPTCHA audio text. 198 | 199 | Raises 200 | ------ 201 | RecaptchaRateLimitError 202 | If the reCAPTCHA rate limit has been exceeded. 203 | """ 204 | 205 | @abstractmethod 206 | def _submit_tile_answers(self, recaptcha_box: RecaptchaBox) -> None: 207 | """ 208 | Submit the reCAPTCHA image challenge tile answers. 209 | 210 | Parameters 211 | ---------- 212 | recaptcha_box : RecaptchaBox 213 | The reCAPTCHA box. 214 | 215 | Raises 216 | ------ 217 | RecaptchaRateLimitError 218 | If the reCAPTCHA rate limit has been exceeded. 219 | """ 220 | 221 | @abstractmethod 222 | def _solve_image_challenge(self, recaptcha_box: RecaptchaBox) -> None: 223 | """ 224 | Solve the reCAPTCHA image challenge. 225 | 226 | Parameters 227 | ---------- 228 | recaptcha_box : RecaptchaBox 229 | The reCAPTCHA box. 230 | 231 | Raises 232 | ------ 233 | CapSolverError 234 | If the CapSolver API returned an error. 235 | RecaptchaRateLimitError 236 | If the reCAPTCHA rate limit has been exceeded. 237 | """ 238 | 239 | @abstractmethod 240 | def _solve_audio_challenge(self, recaptcha_box: RecaptchaBox) -> None: 241 | """ 242 | Solve the reCAPTCHA audio challenge. 243 | 244 | Parameters 245 | ---------- 246 | recaptcha_box : RecaptchaBox 247 | The reCAPTCHA box. 248 | 249 | Raises 250 | ------ 251 | RecaptchaRateLimitError 252 | If the reCAPTCHA rate limit has been exceeded. 253 | """ 254 | 255 | @abstractmethod 256 | def recaptcha_is_visible(self) -> bool: 257 | """ 258 | Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible. 259 | 260 | Returns 261 | ------- 262 | bool 263 | Whether a reCAPTCHA challenge or unchecked reCAPTCHA box is visible. 264 | """ 265 | 266 | @abstractmethod 267 | def solve_recaptcha( 268 | self, 269 | *, 270 | attempts: Optional[int] = None, 271 | wait: bool = False, 272 | wait_timeout: float = 30, 273 | image_challenge: bool = False, 274 | ) -> str: 275 | """ 276 | Solve the reCAPTCHA and return the `g-recaptcha-response` token. 277 | 278 | Parameters 279 | ---------- 280 | attempts : Optional[int], optional 281 | The number of solve attempts, by default 5. 282 | wait : bool, optional 283 | Whether to wait for the reCAPTCHA to appear, by default False. 284 | wait_timeout : float, optional 285 | The amount of time in seconds to wait for the reCAPTCHA to appear, 286 | by default 30. Only used if `wait` is True. 287 | image_challenge : bool, optional 288 | Whether to solve the image challenge, by default False. 289 | 290 | Returns 291 | ------- 292 | str 293 | The `g-recaptcha-response` token. 294 | 295 | Raises 296 | ------ 297 | CapSolverError 298 | If the CapSolver API returned an error. 299 | RecaptchaNotFoundError 300 | If the reCAPTCHA was not found. 301 | RecaptchaRateLimitError 302 | If the reCAPTCHA rate limit has been exceeded. 303 | RecaptchaSolveError 304 | If the reCAPTCHA could not be solved. 305 | """ 306 | -------------------------------------------------------------------------------- /playwright_recaptcha/recaptchav2/recaptcha_box.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | from abc import ABC, abstractmethod 5 | from functools import wraps 6 | from typing import Generic, Iterable, List, Pattern, Tuple, TypeVar, Union 7 | 8 | from playwright.async_api import Frame as AsyncFrame 9 | from playwright.async_api import Locator as AsyncLocator 10 | from playwright.sync_api import Frame as SyncFrame 11 | from playwright.sync_api import Locator as SyncLocator 12 | 13 | from ..errors import RecaptchaNotFoundError 14 | from .translations import ELEMENT_TRANSLATIONS 15 | 16 | FrameT = TypeVar("FrameT", AsyncFrame, SyncFrame) 17 | Locator = Union[AsyncLocator, SyncLocator] 18 | 19 | 20 | class RecaptchaBox(ABC, Generic[FrameT]): 21 | """ 22 | The base class for reCAPTCHA v2 boxes. 23 | 24 | Parameters 25 | ---------- 26 | anchor_frame : FrameT 27 | The reCAPTCHA anchor frame. 28 | bframe_frame : FrameT 29 | The reCAPTCHA bframe frame. 30 | """ 31 | 32 | def __init__(self, anchor_frame: FrameT, bframe_frame: FrameT) -> None: 33 | self._anchor_frame = anchor_frame 34 | self._bframe_frame = bframe_frame 35 | 36 | def __repr__(self) -> str: 37 | return ( 38 | f"{self.__class__.__name__}(anchor_frame={self._anchor_frame!r}, " 39 | f"bframe_frame={self._bframe_frame!r})" 40 | ) 41 | 42 | @staticmethod 43 | def _get_recaptcha_frame_pairs( 44 | frames: Iterable[FrameT], 45 | ) -> List[Tuple[FrameT, FrameT]]: 46 | """ 47 | Get the reCAPTCHA anchor and bframe frame pairs. 48 | 49 | Parameters 50 | ---------- 51 | frames : Iterable[FrameT] 52 | A list of frames to search for the reCAPTCHA anchor and bframe frames. 53 | 54 | Returns 55 | ------- 56 | List[Tuple[FrameT, FrameT]] 57 | A list of reCAPTCHA anchor and bframe frame pairs. 58 | 59 | Raises 60 | ------ 61 | RecaptchaNotFoundError 62 | If no reCAPTCHA anchor and bframe frame pairs were found. 63 | """ 64 | anchor_frames = [ 65 | frame 66 | for frame in frames 67 | if re.search("/recaptcha/(api2|enterprise)/anchor", frame.url) is not None 68 | ] 69 | 70 | bframe_frames = [ 71 | frame 72 | for frame in frames 73 | if re.search("/recaptcha/(api2|enterprise)/bframe", frame.url) is not None 74 | ] 75 | 76 | frame_pairs = [] 77 | 78 | for anchor_frame in anchor_frames: 79 | frame_id = anchor_frame.name[2:] 80 | 81 | for bframe_frame in bframe_frames: 82 | if frame_id not in bframe_frame.name: 83 | continue 84 | 85 | frame_pairs.append((anchor_frame, bframe_frame)) 86 | 87 | if not frame_pairs: 88 | raise RecaptchaNotFoundError 89 | 90 | return frame_pairs 91 | 92 | @staticmethod 93 | def _get_translations_pattern(translations: Iterable[str]) -> Pattern: 94 | """ 95 | Get a compiled regex pattern from a list of translations. 96 | 97 | Parameters 98 | ---------- 99 | translations : Iterable[str] 100 | A list of translations to compile into a regex pattern. 101 | 102 | Returns 103 | ------- 104 | Pattern 105 | The compiled regex pattern. 106 | """ 107 | escaped_translations = [re.escape(translation) for translation in translations] 108 | return re.compile(f'^({"|".join(escaped_translations)}).?$') 109 | 110 | @property 111 | def checkbox(self) -> Locator: 112 | """The reCAPTCHA checkbox locator.""" 113 | return self.anchor_frame.get_by_role( 114 | "checkbox", 115 | name=self._get_translations_pattern(ELEMENT_TRANSLATIONS["im_not_a_robot"]), 116 | ) 117 | 118 | @property 119 | def audio_challenge_button(self) -> Locator: 120 | """The reCAPTCHA audio challenge button locator.""" 121 | return self.bframe_frame.get_by_role( 122 | "button", 123 | name=self._get_translations_pattern( 124 | ELEMENT_TRANSLATIONS["get_an_audio_challenge"] 125 | ), 126 | ) 127 | 128 | @property 129 | def image_challenge_button(self) -> Locator: 130 | """The reCAPTCHA image challenge button locator.""" 131 | return self.bframe_frame.get_by_role( 132 | "button", 133 | name=self._get_translations_pattern( 134 | ELEMENT_TRANSLATIONS["get_a_visual_challenge"] 135 | ), 136 | ) 137 | 138 | @property 139 | def new_challenge_button(self) -> Locator: 140 | """The reCAPTCHA new challenge button locator.""" 141 | return self.bframe_frame.get_by_role( 142 | "button", 143 | name=self._get_translations_pattern( 144 | ELEMENT_TRANSLATIONS["get_a_new_challenge"] 145 | ), 146 | ) 147 | 148 | @property 149 | def audio_download_button(self) -> Locator: 150 | """The reCAPTCHA audio download button locator.""" 151 | return self.bframe_frame.get_by_role( 152 | "link", 153 | name=self._get_translations_pattern( 154 | ELEMENT_TRANSLATIONS["alternatively_download_audio_as_mp3"] 155 | ), 156 | ) 157 | 158 | @property 159 | def audio_challenge_textbox(self) -> Locator: 160 | """The reCAPTCHA audio challenge textbox locator.""" 161 | return self.bframe_frame.get_by_role( 162 | "textbox", 163 | name=self._get_translations_pattern( 164 | ELEMENT_TRANSLATIONS["enter_what_you_hear"] 165 | ), 166 | ) 167 | 168 | @property 169 | def skip_button(self) -> Locator: 170 | """The reCAPTCHA skip button locator.""" 171 | return self.bframe_frame.get_by_role( 172 | "button", name=self._get_translations_pattern(ELEMENT_TRANSLATIONS["skip"]) 173 | ) 174 | 175 | @property 176 | def next_button(self) -> Locator: 177 | """The reCAPTCHA next button locator.""" 178 | return self.bframe_frame.get_by_role( 179 | "button", name=self._get_translations_pattern(ELEMENT_TRANSLATIONS["next"]) 180 | ) 181 | 182 | @property 183 | def verify_button(self) -> Locator: 184 | """The reCAPTCHA verify button locator.""" 185 | return self.bframe_frame.get_by_role( 186 | "button", 187 | name=self._get_translations_pattern(ELEMENT_TRANSLATIONS["verify"]), 188 | ) 189 | 190 | @property 191 | def tile_selector(self) -> Locator: 192 | """The reCAPTCHA tile selector locator.""" 193 | return self.bframe_frame.locator(".rc-imageselect-tile") 194 | 195 | @property 196 | def image_challenge(self) -> Locator: 197 | """The reCAPTCHA image challenge locator.""" 198 | return self.bframe_frame.locator(".rc-imageselect-challenge") 199 | 200 | def frames_are_attached(self) -> bool: 201 | """ 202 | Check if all of the reCAPTCHA frames are attached. 203 | 204 | Returns 205 | ------- 206 | bool 207 | True if all of the reCAPTCHA frames are attached, False otherwise. 208 | """ 209 | return not self.frames_are_detached() 210 | 211 | def frames_are_detached(self) -> bool: 212 | """ 213 | Check if any of the reCAPTCHA frames are detached. 214 | 215 | Returns 216 | ------- 217 | bool 218 | True if any of the reCAPTCHA frames are detached, False otherwise. 219 | """ 220 | return self.anchor_frame.is_detached() or self.bframe_frame.is_detached() 221 | 222 | @abstractmethod 223 | def _check_if_attached(func): 224 | """ 225 | A decorator for checking if the reCAPTCHA frames are attached 226 | before running the decorated function. 227 | """ 228 | 229 | @property 230 | @abstractmethod 231 | def anchor_frame(self) -> FrameT: 232 | """The reCAPTCHA anchor frame.""" 233 | 234 | @property 235 | @abstractmethod 236 | def bframe_frame(self) -> FrameT: 237 | """The reCAPTCHA bframe frame.""" 238 | 239 | @classmethod 240 | @abstractmethod 241 | def from_frames(cls, frames: Iterable[FrameT]) -> RecaptchaBox: 242 | """ 243 | Create a reCAPTCHA box using a list of frames. 244 | 245 | Parameters 246 | ---------- 247 | frames : Iterable[FrameT] 248 | A list of frames to search for the reCAPTCHA frames. 249 | 250 | Returns 251 | ------- 252 | RecaptchaBox 253 | The reCAPTCHA box. 254 | 255 | Raises 256 | ------ 257 | RecaptchaNotFoundError 258 | If the reCAPTCHA frames were not found 259 | or if no unchecked reCAPTCHA boxes were found. 260 | """ 261 | 262 | @abstractmethod 263 | def rate_limit_is_visible(self) -> bool: 264 | """ 265 | Check if the reCAPTCHA rate limit message is visible. 266 | 267 | Returns 268 | ------- 269 | bool 270 | True if the reCAPTCHA rate limit message is visible, False otherwise. 271 | """ 272 | 273 | @abstractmethod 274 | def solve_failure_is_visible(self) -> bool: 275 | """ 276 | Check if the reCAPTCHA solve failure message is visible. 277 | 278 | Returns 279 | ------- 280 | bool 281 | True if the reCAPTCHA solve failure message is visible, False otherwise. 282 | """ 283 | 284 | @abstractmethod 285 | def image_challenge_is_visible(self) -> bool: 286 | """ 287 | Check if the reCAPTCHA image challenge is visible. 288 | 289 | Returns 290 | ------- 291 | bool 292 | True if the reCAPTCHA challenge is visible, False otherwise. 293 | """ 294 | 295 | @abstractmethod 296 | def audio_challenge_is_visible(self) -> bool: 297 | """ 298 | Check if the reCAPTCHA audio challenge is visible. 299 | 300 | Returns 301 | ------- 302 | bool 303 | True if the reCAPTCHA audio challenge is visible, False otherwise. 304 | """ 305 | 306 | @abstractmethod 307 | def any_challenge_is_visible(self) -> bool: 308 | """ 309 | Check if any reCAPTCHA challenge is visible. 310 | 311 | Returns 312 | ------- 313 | bool 314 | True if any reCAPTCHA challenge is visible, False otherwise. 315 | """ 316 | 317 | @abstractmethod 318 | def try_again_is_visible(self) -> bool: 319 | """ 320 | Check if the reCAPTCHA try again message is visible. 321 | 322 | Returns 323 | ------- 324 | bool 325 | True if the reCAPTCHA try again message is visible, False otherwise. 326 | """ 327 | 328 | @abstractmethod 329 | def check_new_images_is_visible(self) -> bool: 330 | """ 331 | Check if the reCAPTCHA check new images message is visible. 332 | 333 | Returns 334 | ------- 335 | bool 336 | True if the reCAPTCHA check new images message is visible, False otherwise. 337 | """ 338 | 339 | @abstractmethod 340 | def select_all_matching_is_visible(self) -> bool: 341 | """ 342 | Check if the reCAPTCHA select all matching images message is visible. 343 | 344 | Returns 345 | ------- 346 | bool 347 | True if the reCAPTCHA select all matching images message is visible, 348 | False otherwise. 349 | """ 350 | 351 | @abstractmethod 352 | def challenge_is_solved(self) -> bool: 353 | """ 354 | Check if the reCAPTCHA challenge has been solved. 355 | 356 | Returns 357 | ------- 358 | bool 359 | True if the reCAPTCHA challenge has been solved, False otherwise. 360 | """ 361 | 362 | 363 | class SyncRecaptchaBox(RecaptchaBox[SyncFrame]): 364 | """ 365 | The synchronous class for reCAPTCHA v2 boxes. 366 | 367 | Parameters 368 | ---------- 369 | anchor_frame : SyncFrame 370 | The reCAPTCHA anchor frame. 371 | bframe_frame : SyncFrame 372 | The reCAPTCHA bframe frame. 373 | """ 374 | 375 | def _check_if_attached(func=None, /): 376 | """ 377 | A decorator for checking if the reCAPTCHA frames are attached 378 | before running the decorated function. 379 | """ 380 | 381 | def wrap(func): 382 | @wraps(func) 383 | def wrapper(self: SyncRecaptchaBox, *args, **kwargs) -> bool: 384 | if self.frames_are_detached(): 385 | return False 386 | 387 | return func(self, *args, **kwargs) 388 | 389 | return wrapper 390 | 391 | if func is None: 392 | return wrap 393 | 394 | return wrap(func) 395 | 396 | @classmethod 397 | def from_frames(cls, frames: Iterable[SyncFrame]) -> SyncRecaptchaBox: 398 | """ 399 | Create a reCAPTCHA box using a list of frames. 400 | 401 | Parameters 402 | ---------- 403 | frames : Iterable[SyncFrame] 404 | A list of frames to search for the reCAPTCHA frames. 405 | 406 | Returns 407 | ------- 408 | SyncRecaptchaBox 409 | The reCAPTCHA box. 410 | 411 | Raises 412 | ------ 413 | RecaptchaNotFoundError 414 | If the reCAPTCHA frames were not found 415 | or if no unchecked reCAPTCHA boxes were found. 416 | """ 417 | frame_pairs = cls._get_recaptcha_frame_pairs(frames) 418 | 419 | for anchor_frame, bframe_frame in frame_pairs: 420 | recaptcha_box = cls(anchor_frame, bframe_frame) 421 | 422 | if ( 423 | recaptcha_box.frames_are_attached() 424 | and recaptcha_box.checkbox.is_visible() 425 | and not recaptcha_box.checkbox.is_checked() 426 | or recaptcha_box.audio_challenge_button.is_visible() 427 | and recaptcha_box.audio_challenge_button.is_enabled() 428 | or recaptcha_box.image_challenge_button.is_visible() 429 | and recaptcha_box.image_challenge_button.is_enabled() 430 | ): 431 | return recaptcha_box 432 | 433 | raise RecaptchaNotFoundError("No unchecked reCAPTCHA boxes were found.") 434 | 435 | @property 436 | def anchor_frame(self) -> SyncFrame: 437 | """The reCAPTCHA anchor frame.""" 438 | return self._anchor_frame 439 | 440 | @property 441 | def bframe_frame(self) -> SyncFrame: 442 | """The reCAPTCHA bframe frame.""" 443 | return self._bframe_frame 444 | 445 | @_check_if_attached 446 | def rate_limit_is_visible(self) -> bool: 447 | """ 448 | Check if the reCAPTCHA rate limit message is visible. 449 | 450 | Returns 451 | ------- 452 | bool 453 | True if the reCAPTCHA rate limit message is visible, False otherwise. 454 | """ 455 | return self.bframe_frame.get_by_text( 456 | self._get_translations_pattern(ELEMENT_TRANSLATIONS["try_again_later"]) 457 | ).is_visible() 458 | 459 | @_check_if_attached 460 | def solve_failure_is_visible(self) -> bool: 461 | """ 462 | Check if the reCAPTCHA solve failure message is visible. 463 | 464 | Returns 465 | ------- 466 | bool 467 | True if the reCAPTCHA solve failure message is visible, False otherwise. 468 | """ 469 | return self.bframe_frame.get_by_text( 470 | self._get_translations_pattern( 471 | ELEMENT_TRANSLATIONS["multiple_correct_solutions_required"] 472 | ) 473 | ).is_visible() 474 | 475 | @_check_if_attached 476 | def image_challenge_is_visible(self) -> bool: 477 | """ 478 | Check if the reCAPTCHA image challenge is visible. 479 | 480 | Returns 481 | ------- 482 | bool 483 | True if the reCAPTCHA challenge is visible, False otherwise. 484 | """ 485 | button = self.skip_button.or_(self.next_button).or_(self.verify_button) 486 | return button.is_enabled() 487 | 488 | @_check_if_attached 489 | def audio_challenge_is_visible(self) -> bool: 490 | """ 491 | Check if the reCAPTCHA audio challenge is visible. 492 | 493 | Returns 494 | ------- 495 | bool 496 | True if the reCAPTCHA audio challenge is visible, False otherwise. 497 | """ 498 | return ( 499 | self.bframe_frame.get_by_text( 500 | self._get_translations_pattern( 501 | ELEMENT_TRANSLATIONS["press_play_to_listen"] 502 | ) 503 | ).is_visible() 504 | and self.new_challenge_button.is_visible() 505 | and self.new_challenge_button.is_enabled() 506 | ) 507 | 508 | @_check_if_attached 509 | def any_challenge_is_visible(self) -> bool: 510 | """ 511 | Check if any reCAPTCHA challenge is visible. 512 | 513 | Returns 514 | ------- 515 | bool 516 | True if any reCAPTCHA challenge is visible, False otherwise. 517 | """ 518 | return self.image_challenge_is_visible() or self.audio_challenge_is_visible() 519 | 520 | @_check_if_attached 521 | def try_again_is_visible(self) -> bool: 522 | """ 523 | Check if the reCAPTCHA try again message is visible. 524 | 525 | Returns 526 | ------- 527 | bool 528 | True if the reCAPTCHA try again message is visible, False otherwise. 529 | """ 530 | return self.bframe_frame.get_by_text( 531 | self._get_translations_pattern(ELEMENT_TRANSLATIONS["please_try_again"]) 532 | ).is_visible() 533 | 534 | @_check_if_attached 535 | def check_new_images_is_visible(self) -> bool: 536 | """ 537 | Check if the reCAPTCHA check new images message is visible. 538 | 539 | Returns 540 | ------- 541 | bool 542 | True if the reCAPTCHA check new images message is visible, False otherwise. 543 | """ 544 | return self.bframe_frame.get_by_text( 545 | self._get_translations_pattern( 546 | ELEMENT_TRANSLATIONS["please_also_check_the_new_images"] 547 | ) 548 | ).is_visible() 549 | 550 | @_check_if_attached 551 | def select_all_matching_is_visible(self) -> bool: 552 | """ 553 | Check if the reCAPTCHA select all matching images message is visible. 554 | 555 | Returns 556 | ------- 557 | bool 558 | True if the reCAPTCHA select all matching images message is visible, 559 | False otherwise. 560 | """ 561 | return self.bframe_frame.get_by_text( 562 | self._get_translations_pattern( 563 | ELEMENT_TRANSLATIONS["please_select_all_matching_images"] 564 | ) 565 | ).is_visible() 566 | 567 | @_check_if_attached 568 | def challenge_is_solved(self) -> bool: 569 | """ 570 | Check if the reCAPTCHA challenge has been solved. 571 | 572 | Returns 573 | ------- 574 | bool 575 | True if the reCAPTCHA challenge has been solved, False otherwise. 576 | """ 577 | return self.checkbox.is_visible() and self.checkbox.is_checked() 578 | 579 | 580 | class AsyncRecaptchaBox(RecaptchaBox[AsyncFrame]): 581 | """ 582 | The asynchronous class for reCAPTCHA v2 boxes. 583 | 584 | Parameters 585 | ---------- 586 | anchor_frame : AsyncFrame 587 | The reCAPTCHA anchor frame. 588 | bframe_frame : AsyncFrame 589 | The reCAPTCHA bframe frame. 590 | """ 591 | 592 | def _check_if_attached(func=None, /): 593 | """ 594 | A decorator for checking if the reCAPTCHA frames are attached 595 | before running the decorated function. 596 | """ 597 | 598 | def wrap(func): 599 | @wraps(func) 600 | async def wrapper(self: AsyncRecaptchaBox, *args, **kwargs) -> bool: 601 | if self.frames_are_detached(): 602 | return False 603 | 604 | return await func(self, *args, **kwargs) 605 | 606 | return wrapper 607 | 608 | if func is None: 609 | return wrap 610 | 611 | return wrap(func) 612 | 613 | @classmethod 614 | async def from_frames(cls, frames: Iterable[AsyncFrame]) -> AsyncRecaptchaBox: 615 | """ 616 | Create a reCAPTCHA box using a list of frames. 617 | 618 | Parameters 619 | ---------- 620 | frames : Iterable[AsyncFrame] 621 | A list of frames to search for the reCAPTCHA frames. 622 | 623 | Returns 624 | ------- 625 | AsyncRecaptchaBox 626 | The reCAPTCHA box. 627 | 628 | Raises 629 | ------ 630 | RecaptchaNotFoundError 631 | If the reCAPTCHA frames were not found 632 | or if no unchecked reCAPTCHA boxes were found. 633 | """ 634 | frame_pairs = cls._get_recaptcha_frame_pairs(frames) 635 | 636 | for anchor_frame, bframe_frame in frame_pairs: 637 | recaptcha_box = cls(anchor_frame, bframe_frame) 638 | 639 | if ( 640 | recaptcha_box.frames_are_attached() 641 | and await recaptcha_box.checkbox.is_visible() 642 | and not await recaptcha_box.checkbox.is_checked() 643 | or await recaptcha_box.audio_challenge_button.is_visible() 644 | and await recaptcha_box.audio_challenge_button.is_enabled() 645 | or await recaptcha_box.image_challenge_button.is_visible() 646 | and await recaptcha_box.image_challenge_button.is_enabled() 647 | ): 648 | return recaptcha_box 649 | 650 | raise RecaptchaNotFoundError("No unchecked reCAPTCHA boxes were found.") 651 | 652 | @property 653 | def anchor_frame(self) -> AsyncFrame: 654 | """The reCAPTCHA anchor frame.""" 655 | return self._anchor_frame 656 | 657 | @property 658 | def bframe_frame(self) -> AsyncFrame: 659 | """The reCAPTCHA bframe frame.""" 660 | return self._bframe_frame 661 | 662 | @_check_if_attached 663 | async def rate_limit_is_visible(self) -> bool: 664 | """ 665 | Check if the reCAPTCHA rate limit message is visible. 666 | 667 | Returns 668 | ------- 669 | bool 670 | True if the reCAPTCHA rate limit message is visible, False otherwise. 671 | """ 672 | return await self.bframe_frame.get_by_text( 673 | self._get_translations_pattern(ELEMENT_TRANSLATIONS["try_again_later"]) 674 | ).is_visible() 675 | 676 | @_check_if_attached 677 | async def solve_failure_is_visible(self) -> bool: 678 | """ 679 | Check if the reCAPTCHA solve failure message is visible. 680 | 681 | Returns 682 | ------- 683 | bool 684 | True if the reCAPTCHA solve failure message is visible, False otherwise. 685 | """ 686 | return await self.bframe_frame.get_by_text( 687 | self._get_translations_pattern( 688 | ELEMENT_TRANSLATIONS["multiple_correct_solutions_required"] 689 | ) 690 | ).is_visible() 691 | 692 | @_check_if_attached 693 | async def image_challenge_is_visible(self) -> bool: 694 | """ 695 | Check if the reCAPTCHA image challenge is visible. 696 | 697 | Returns 698 | ------- 699 | bool 700 | True if the reCAPTCHA challenge is visible, False otherwise. 701 | """ 702 | button = self.skip_button.or_(self.next_button).or_(self.verify_button) 703 | return await button.is_enabled() 704 | 705 | @_check_if_attached 706 | async def audio_challenge_is_visible(self) -> bool: 707 | """ 708 | Check if the reCAPTCHA audio challenge is visible. 709 | 710 | Returns 711 | ------- 712 | bool 713 | True if the reCAPTCHA audio challenge is visible, False otherwise. 714 | """ 715 | return ( 716 | await self.bframe_frame.get_by_text( 717 | self._get_translations_pattern( 718 | ELEMENT_TRANSLATIONS["press_play_to_listen"] 719 | ) 720 | ).is_visible() 721 | and await self.new_challenge_button.is_visible() 722 | and await self.new_challenge_button.is_enabled() 723 | ) 724 | 725 | @_check_if_attached 726 | async def any_challenge_is_visible(self) -> bool: 727 | """ 728 | Check if any reCAPTCHA challenge is visible. 729 | 730 | Returns 731 | ------- 732 | bool 733 | True if any reCAPTCHA challenge is visible, False otherwise. 734 | """ 735 | return ( 736 | await self.image_challenge_is_visible() 737 | or await self.audio_challenge_is_visible() 738 | ) 739 | 740 | @_check_if_attached 741 | async def try_again_is_visible(self) -> bool: 742 | """ 743 | Check if the reCAPTCHA try again message is visible. 744 | 745 | Returns 746 | ------- 747 | bool 748 | True if the reCAPTCHA try again message is visible, False otherwise. 749 | """ 750 | return await self.bframe_frame.get_by_text( 751 | self._get_translations_pattern(ELEMENT_TRANSLATIONS["please_try_again"]) 752 | ).is_visible() 753 | 754 | @_check_if_attached 755 | async def check_new_images_is_visible(self) -> bool: 756 | """ 757 | Check if the reCAPTCHA check new images message is visible. 758 | 759 | Returns 760 | ------- 761 | bool 762 | True if the reCAPTCHA check new images message is visible, False otherwise. 763 | """ 764 | return await self.bframe_frame.get_by_text( 765 | self._get_translations_pattern( 766 | ELEMENT_TRANSLATIONS["please_also_check_the_new_images"] 767 | ) 768 | ).is_visible() 769 | 770 | @_check_if_attached 771 | async def select_all_matching_is_visible(self) -> bool: 772 | """ 773 | Check if the reCAPTCHA select all matching images message is visible. 774 | 775 | Returns 776 | ------- 777 | bool 778 | True if the reCAPTCHA select all matching images message is visible, 779 | False otherwise. 780 | """ 781 | return await self.bframe_frame.get_by_text( 782 | self._get_translations_pattern( 783 | ELEMENT_TRANSLATIONS["please_select_all_matching_images"] 784 | ) 785 | ).is_visible() 786 | 787 | @_check_if_attached 788 | async def challenge_is_solved(self) -> bool: 789 | """ 790 | Check if the reCAPTCHA challenge has been solved. 791 | 792 | Returns 793 | ------- 794 | bool 795 | True if the reCAPTCHA challenge has been solved, False otherwise. 796 | """ 797 | return await self.checkbox.is_visible() and await self.checkbox.is_checked() 798 | -------------------------------------------------------------------------------- /playwright_recaptcha/recaptchav2/sync_solver.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import base64 4 | import re 5 | import time 6 | from datetime import datetime 7 | from io import BytesIO 8 | from json import JSONDecodeError 9 | from typing import Any, Dict, List, Optional 10 | from urllib.parse import parse_qs, urlparse 11 | 12 | import speech_recognition 13 | from playwright.sync_api import Locator, Page, Response 14 | from pydub import AudioSegment 15 | from pydub.exceptions import CouldntDecodeError 16 | from tenacity import Retrying, retry_if_exception_type, stop_after_delay, wait_fixed 17 | 18 | from ..errors import ( 19 | CapSolverError, 20 | RecaptchaNotFoundError, 21 | RecaptchaRateLimitError, 22 | RecaptchaSolveError, 23 | ) 24 | from .base_solver import BaseSolver 25 | from .recaptcha_box import SyncRecaptchaBox 26 | from .translations import OBJECT_TRANSLATIONS, ORIGINAL_LANGUAGE_AUDIO 27 | 28 | 29 | class SyncSolver(BaseSolver[Page]): 30 | """ 31 | A class for solving reCAPTCHA v2 synchronously with Playwright. 32 | 33 | Parameters 34 | ---------- 35 | page : Page 36 | The Playwright page to solve the reCAPTCHA on. 37 | attempts : int, optional 38 | The number of solve attempts, by default 5. 39 | capsolver_api_key : Optional[str], optional 40 | The CapSolver API key, by default None. 41 | If None, the `CAPSOLVER_API_KEY` environment variable will be used. 42 | """ 43 | 44 | def __enter__(self) -> SyncSolver: 45 | return self 46 | 47 | def __exit__(self, *_: Any) -> None: 48 | self.close() 49 | 50 | @staticmethod 51 | def _get_task_object(recaptcha_box: SyncRecaptchaBox) -> Optional[str]: 52 | """ 53 | Get the ID of the object in the reCAPTCHA image challenge task. 54 | 55 | Parameters 56 | ---------- 57 | recaptcha_box : SyncRecaptchaBox 58 | The reCAPTCHA box. 59 | 60 | Returns 61 | ------- 62 | Optional[str] 63 | The object ID. Returns None if the task object is not recognized. 64 | """ 65 | object_dict = { 66 | "/m/0pg52": OBJECT_TRANSLATIONS["taxis"], 67 | "/m/01bjv": OBJECT_TRANSLATIONS["bus"], 68 | "/m/04_sv": OBJECT_TRANSLATIONS["motorcycles"], 69 | "/m/013xlm": OBJECT_TRANSLATIONS["tractors"], 70 | "/m/01jk_4": OBJECT_TRANSLATIONS["chimneys"], 71 | "/m/014xcs": OBJECT_TRANSLATIONS["crosswalks"], 72 | "/m/015qff": OBJECT_TRANSLATIONS["traffic_lights"], 73 | "/m/0199g": OBJECT_TRANSLATIONS["bicycles"], 74 | "/m/015qbp": OBJECT_TRANSLATIONS["parking_meters"], 75 | "/m/0k4j": OBJECT_TRANSLATIONS["cars"], 76 | "/m/015kr": OBJECT_TRANSLATIONS["bridges"], 77 | "/m/019jd": OBJECT_TRANSLATIONS["boats"], 78 | "/m/0cdl1": OBJECT_TRANSLATIONS["palm_trees"], 79 | "/m/09d_r": OBJECT_TRANSLATIONS["mountains_or_hills"], 80 | "/m/01pns0": OBJECT_TRANSLATIONS["fire_hydrant"], 81 | "/m/01lynh": OBJECT_TRANSLATIONS["stairs"], 82 | } 83 | 84 | task = recaptcha_box.bframe_frame.locator("div").all_inner_texts() 85 | object_ = task[0].split("\n")[1] 86 | 87 | for object_id, translations in object_dict.items(): 88 | if object_ in translations: 89 | return object_id 90 | 91 | return None 92 | 93 | def _response_callback(self, response: Response) -> None: 94 | """ 95 | The callback for intercepting payload and userverify responses. 96 | 97 | Parameters 98 | ---------- 99 | response : Response 100 | The response. 101 | """ 102 | if ( 103 | re.search("/recaptcha/(api2|enterprise)/payload", response.url) is not None 104 | and self._payload_response is None 105 | ): 106 | self._payload_response = response 107 | elif ( 108 | re.search("/recaptcha/(api2|enterprise)/userverify", response.url) 109 | is not None 110 | ): 111 | token_match = re.search('"uvresp","(.*?)"', response.text()) 112 | 113 | if token_match is not None: 114 | self._token = token_match.group(1) 115 | 116 | def _get_capsolver_response( 117 | self, recaptcha_box: SyncRecaptchaBox, image_data: bytes 118 | ) -> Optional[Dict[str, Any]]: 119 | """ 120 | Get the CapSolver JSON response for an image. 121 | 122 | Parameters 123 | ---------- 124 | recaptcha_box : SyncRecaptchaBox 125 | The reCAPTCHA box. 126 | image_data : bytes 127 | The image data. 128 | 129 | Returns 130 | ------- 131 | Optional[Dict[str, Any]] 132 | The CapSolver JSON response. 133 | Returns None if the task object is not recognized. 134 | 135 | Raises 136 | ------ 137 | CapSolverError 138 | If the CapSolver API returned an error. 139 | """ 140 | image = base64.b64encode(image_data).decode("utf-8") 141 | task_object = self._get_task_object(recaptcha_box) 142 | 143 | if task_object is None: 144 | return None 145 | 146 | payload = { 147 | "clientKey": self._capsolver_api_key, 148 | "task": { 149 | "type": "ReCaptchaV2Classification", 150 | "image": image, 151 | "question": task_object, 152 | }, 153 | } 154 | 155 | response = self._page.request.post( 156 | "https://api.capsolver.com/createTask", data=payload 157 | ) 158 | 159 | try: 160 | response_json = response.json() 161 | except JSONDecodeError as err: 162 | raise CapSolverError from err 163 | 164 | if response_json["errorId"] != 0: 165 | raise CapSolverError(response_json["errorDescription"]) 166 | 167 | return response_json 168 | 169 | def _solve_tiles(self, recaptcha_box: SyncRecaptchaBox, indexes: List[int]) -> None: 170 | """ 171 | Solve the tiles in the reCAPTCHA image challenge. 172 | 173 | Parameters 174 | ---------- 175 | recaptcha_box : SyncRecaptchaBox 176 | The reCAPTCHA box. 177 | indexes : List[int] 178 | The indexes of the tiles that contain the task object. 179 | 180 | Raises 181 | ------ 182 | CapSolverError 183 | If the CapSolver API returned an error. 184 | """ 185 | changing_tiles: Dict[Locator, str] = {} 186 | indexes = indexes.copy() 187 | 188 | style_script = """ 189 | (element) => { 190 | element.style = ""; 191 | element.className = "rc-imageselect-tile"; 192 | } 193 | """ 194 | 195 | for index in indexes: 196 | tile = recaptcha_box.tile_selector.nth(index) 197 | tile.click() 198 | 199 | if "rc-imageselect-dynamic-selected" not in tile.get_attribute("class"): 200 | continue 201 | 202 | changing_tiles[tile] = tile.locator("img").get_attribute("src") 203 | tile.evaluate(style_script) 204 | 205 | start_time = datetime.now() 206 | 207 | while changing_tiles and (datetime.now() - start_time).seconds < 60: 208 | for tile in changing_tiles.copy(): 209 | image_url = tile.locator("img").get_attribute("src") 210 | 211 | if changing_tiles[tile] == image_url: 212 | continue 213 | 214 | changing_tiles[tile] = image_url 215 | response = self._page.request.get(image_url) 216 | 217 | capsolver_response = self._get_capsolver_response( 218 | recaptcha_box, response.body() 219 | ) 220 | 221 | if ( 222 | capsolver_response is None 223 | or not capsolver_response["solution"]["hasObject"] 224 | ): 225 | changing_tiles.pop(tile) 226 | continue 227 | 228 | tile.click() 229 | tile.evaluate(style_script) 230 | 231 | def _transcribe_audio( 232 | self, audio_url: str, *, language: str = "en-US" 233 | ) -> Optional[str]: 234 | """ 235 | Transcribe the reCAPTCHA audio challenge. 236 | 237 | Parameters 238 | ---------- 239 | audio_url : str 240 | The reCAPTCHA audio URL. 241 | language : str, optional 242 | The language of the audio, by default en-US. 243 | 244 | Returns 245 | ------- 246 | Optional[str] 247 | The reCAPTCHA audio text. 248 | Returns None if the audio could not be converted. 249 | """ 250 | response = self._page.request.get(audio_url) 251 | 252 | wav_audio = BytesIO() 253 | mp3_audio = BytesIO(response.body()) 254 | 255 | try: 256 | audio: AudioSegment = AudioSegment.from_mp3(mp3_audio) 257 | except CouldntDecodeError: 258 | return None 259 | 260 | audio.export(wav_audio, format="wav") 261 | recognizer = speech_recognition.Recognizer() 262 | 263 | with speech_recognition.AudioFile(wav_audio) as source: 264 | audio_data = recognizer.record(source) 265 | 266 | try: 267 | return recognizer.recognize_google(audio_data, language=language) 268 | except speech_recognition.UnknownValueError: 269 | return None 270 | 271 | def _click_checkbox(self, recaptcha_box: SyncRecaptchaBox) -> None: 272 | """ 273 | Click the reCAPTCHA checkbox. 274 | 275 | Parameters 276 | ---------- 277 | recaptcha_box : SyncRecaptchaBox 278 | The reCAPTCHA box. 279 | 280 | Raises 281 | ------ 282 | RecaptchaRateLimitError 283 | If the reCAPTCHA rate limit has been exceeded. 284 | """ 285 | recaptcha_box.checkbox.click() 286 | 287 | while recaptcha_box.frames_are_attached() and self._token is None: 288 | if recaptcha_box.rate_limit_is_visible(): 289 | raise RecaptchaRateLimitError 290 | 291 | if recaptcha_box.any_challenge_is_visible(): 292 | return 293 | 294 | self._page.wait_for_timeout(250) 295 | 296 | def _get_audio_url(self, recaptcha_box: SyncRecaptchaBox) -> str: 297 | """ 298 | Get the reCAPTCHA audio URL. 299 | 300 | Parameters 301 | ---------- 302 | recaptcha_box : SyncRecaptchaBox 303 | The reCAPTCHA box. 304 | 305 | Returns 306 | ------- 307 | str 308 | The reCAPTCHA audio URL. 309 | 310 | Raises 311 | ------ 312 | RecaptchaRateLimitError 313 | If the reCAPTCHA rate limit has been exceeded. 314 | """ 315 | while True: 316 | if recaptcha_box.rate_limit_is_visible(): 317 | raise RecaptchaRateLimitError 318 | 319 | if recaptcha_box.audio_challenge_is_visible(): 320 | return recaptcha_box.audio_download_button.get_attribute("href") 321 | 322 | self._page.wait_for_timeout(250) 323 | 324 | def _submit_audio_text(self, recaptcha_box: SyncRecaptchaBox, text: str) -> None: 325 | """ 326 | Submit the reCAPTCHA audio text. 327 | 328 | Parameters 329 | ---------- 330 | recaptcha_box : SyncRecaptchaBox 331 | The reCAPTCHA box. 332 | text : str 333 | The reCAPTCHA audio text. 334 | 335 | Raises 336 | ------ 337 | RecaptchaRateLimitError 338 | If the reCAPTCHA rate limit has been exceeded. 339 | """ 340 | recaptcha_box.audio_challenge_textbox.fill(text) 341 | 342 | with self._page.expect_response( 343 | re.compile("/recaptcha/(api2|enterprise)/userverify") 344 | ): 345 | recaptcha_box.verify_button.click() 346 | 347 | while recaptcha_box.frames_are_attached(): 348 | if recaptcha_box.rate_limit_is_visible(): 349 | raise RecaptchaRateLimitError 350 | 351 | if ( 352 | not recaptcha_box.audio_challenge_is_visible() 353 | or recaptcha_box.solve_failure_is_visible() 354 | or recaptcha_box.challenge_is_solved() 355 | ): 356 | return 357 | 358 | self._page.wait_for_timeout(250) 359 | 360 | def _submit_tile_answers(self, recaptcha_box: SyncRecaptchaBox) -> None: 361 | """ 362 | Submit the reCAPTCHA image challenge tile answers. 363 | 364 | Parameters 365 | ---------- 366 | recaptcha_box : SyncRecaptchaBox 367 | The reCAPTCHA box. 368 | 369 | Raises 370 | ------ 371 | RecaptchaRateLimitError 372 | If the reCAPTCHA rate limit has been exceeded. 373 | """ 374 | recaptcha_box.verify_button.click() 375 | 376 | while recaptcha_box.frames_are_attached(): 377 | if recaptcha_box.rate_limit_is_visible(): 378 | raise RecaptchaRateLimitError 379 | 380 | if ( 381 | recaptcha_box.challenge_is_solved() 382 | or recaptcha_box.try_again_is_visible() 383 | ): 384 | return 385 | 386 | if ( 387 | recaptcha_box.check_new_images_is_visible() 388 | or recaptcha_box.select_all_matching_is_visible() 389 | ): 390 | with self._page.expect_response( 391 | re.compile("/recaptcha/(api2|enterprise)/payload") 392 | ): 393 | recaptcha_box.new_challenge_button.click() 394 | 395 | return 396 | 397 | self._page.wait_for_timeout(250) 398 | 399 | def _solve_image_challenge(self, recaptcha_box: SyncRecaptchaBox) -> None: 400 | """ 401 | Solve the reCAPTCHA image challenge. 402 | 403 | Parameters 404 | ---------- 405 | recaptcha_box : SyncRecaptchaBox 406 | The reCAPTCHA box. 407 | 408 | Raises 409 | ------ 410 | CapSolverError 411 | If the CapSolver API returned an error. 412 | RecaptchaRateLimitError 413 | If the reCAPTCHA rate limit has been exceeded. 414 | """ 415 | while recaptcha_box.frames_are_attached(): 416 | capsolver_response = self._get_capsolver_response( 417 | recaptcha_box, self._payload_response.body() 418 | ) 419 | 420 | if ( 421 | capsolver_response is None 422 | or not capsolver_response["solution"]["objects"] 423 | ): 424 | self._payload_response = None 425 | 426 | with self._page.expect_response( 427 | re.compile("/recaptcha/(api2|enterprise)/reload") 428 | ): 429 | recaptcha_box.new_challenge_button.click() 430 | 431 | while self._payload_response is None: 432 | if recaptcha_box.rate_limit_is_visible(): 433 | raise RecaptchaRateLimitError 434 | 435 | self._page.wait_for_timeout(250) 436 | 437 | continue 438 | 439 | self._solve_tiles(recaptcha_box, capsolver_response["solution"]["objects"]) 440 | self._payload_response = None 441 | 442 | button = recaptcha_box.skip_button.or_(recaptcha_box.next_button) 443 | 444 | if button.is_hidden(): 445 | self._submit_tile_answers(recaptcha_box) 446 | return 447 | 448 | with self._page.expect_response( 449 | re.compile("/recaptcha/(api2|enterprise)/payload") 450 | ): 451 | button.click() 452 | 453 | def _solve_audio_challenge(self, recaptcha_box: SyncRecaptchaBox) -> None: 454 | """ 455 | Solve the reCAPTCHA audio challenge. 456 | 457 | Parameters 458 | ---------- 459 | recaptcha_box : SyncRecaptchaBox 460 | The reCAPTCHA box. 461 | 462 | Raises 463 | ------ 464 | RecaptchaRateLimitError 465 | If the reCAPTCHA rate limit has been exceeded. 466 | """ 467 | parsed_url = urlparse(recaptcha_box.anchor_frame.url) 468 | query_params = parse_qs(parsed_url.query) 469 | language = query_params["hl"][0] 470 | 471 | if language not in ORIGINAL_LANGUAGE_AUDIO: 472 | language = "en-US" 473 | 474 | while True: 475 | url = self._get_audio_url(recaptcha_box) 476 | text = self._transcribe_audio(url, language=language) 477 | 478 | if text is not None: 479 | break 480 | 481 | with self._page.expect_response( 482 | re.compile("/recaptcha/(api2|enterprise)/reload") 483 | ): 484 | recaptcha_box.new_challenge_button.click() 485 | 486 | while url == self._get_audio_url(recaptcha_box): 487 | self._page.wait_for_timeout(250) 488 | 489 | self._submit_audio_text(recaptcha_box, text) 490 | 491 | def recaptcha_is_visible(self) -> bool: 492 | """ 493 | Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible. 494 | 495 | Returns 496 | ------- 497 | bool 498 | Whether a reCAPTCHA challenge or unchecked reCAPTCHA box is visible. 499 | """ 500 | try: 501 | SyncRecaptchaBox.from_frames(self._page.frames) 502 | except RecaptchaNotFoundError: 503 | return False 504 | 505 | return True 506 | 507 | def solve_recaptcha( 508 | self, 509 | *, 510 | attempts: Optional[int] = None, 511 | wait: bool = False, 512 | wait_timeout: float = 30, 513 | image_challenge: bool = False, 514 | ) -> str: 515 | """ 516 | Solve the reCAPTCHA and return the `g-recaptcha-response` token. 517 | 518 | Parameters 519 | ---------- 520 | attempts : Optional[int], optional 521 | The number of solve attempts, by default 5. 522 | wait : bool, optional 523 | Whether to wait for the reCAPTCHA to appear, by default False. 524 | wait_timeout : float, optional 525 | The amount of time in seconds to wait for the reCAPTCHA to appear, 526 | by default 30. Only used if `wait` is True. 527 | image_challenge : bool, optional 528 | Whether to solve the image challenge, by default False. 529 | 530 | Returns 531 | ------- 532 | str 533 | The `g-recaptcha-response` token. 534 | 535 | Raises 536 | ------ 537 | CapSolverError 538 | If the CapSolver API returned an error. 539 | RecaptchaNotFoundError 540 | If the reCAPTCHA was not found. 541 | RecaptchaRateLimitError 542 | If the reCAPTCHA rate limit has been exceeded. 543 | RecaptchaSolveError 544 | If the reCAPTCHA could not be solved. 545 | """ 546 | if image_challenge and self._capsolver_api_key is None: 547 | raise CapSolverError( 548 | "You must provide a CapSolver API key to solve image challenges." 549 | ) 550 | 551 | self._token = None 552 | attempts = attempts or self._attempts 553 | 554 | if wait: 555 | retry = Retrying( 556 | sleep=self._page.wait_for_timeout, 557 | stop=stop_after_delay(wait_timeout), 558 | wait=wait_fixed(0.25), 559 | retry=retry_if_exception_type(RecaptchaNotFoundError), 560 | reraise=True, 561 | ) 562 | 563 | recaptcha_box = retry( 564 | lambda: SyncRecaptchaBox.from_frames(self._page.frames) 565 | ) 566 | else: 567 | recaptcha_box = SyncRecaptchaBox.from_frames(self._page.frames) 568 | 569 | if recaptcha_box.rate_limit_is_visible(): 570 | raise RecaptchaRateLimitError 571 | 572 | if recaptcha_box.checkbox.is_visible(): 573 | click_timestamp = time.time() 574 | self._click_checkbox(recaptcha_box) 575 | 576 | if self._token is not None: 577 | return self._token 578 | 579 | if ( 580 | recaptcha_box.frames_are_detached() 581 | or not recaptcha_box.any_challenge_is_visible() 582 | or recaptcha_box.challenge_is_solved() 583 | ): 584 | while self._token is None: 585 | self._page.wait_for_timeout(250) 586 | 587 | return self._token 588 | 589 | time_to_wait = max(1 - (time.time() - click_timestamp), 0) 590 | self._page.wait_for_timeout(time_to_wait * 1000) 591 | 592 | while not recaptcha_box.any_challenge_is_visible(): 593 | self._page.wait_for_timeout(250) 594 | 595 | if image_challenge and recaptcha_box.image_challenge_button.is_visible(): 596 | recaptcha_box.image_challenge_button.click() 597 | elif not image_challenge and recaptcha_box.audio_challenge_button.is_visible(): 598 | recaptcha_box.audio_challenge_button.click() 599 | 600 | if image_challenge: 601 | image = recaptcha_box.image_challenge.locator("img").first 602 | image_url = image.get_attribute("src") 603 | self._payload_response = self._page.request.get(image_url) 604 | 605 | while attempts > 0: 606 | self._token = None 607 | 608 | if image_challenge: 609 | self._solve_image_challenge(recaptcha_box) 610 | else: 611 | self._solve_audio_challenge(recaptcha_box) 612 | 613 | if ( 614 | recaptcha_box.frames_are_detached() 615 | or not recaptcha_box.any_challenge_is_visible() 616 | or recaptcha_box.challenge_is_solved() 617 | ): 618 | while self._token is None: 619 | self._page.wait_for_timeout(250) 620 | 621 | return self._token 622 | 623 | attempts -= 1 624 | 625 | raise RecaptchaSolveError 626 | -------------------------------------------------------------------------------- /playwright_recaptcha/recaptchav2/translations.py: -------------------------------------------------------------------------------- 1 | ELEMENT_TRANSLATIONS = { 2 | "im_not_a_robot": ( 3 | "I'm not a robot", 4 | "Я не робот", 5 | "进行人机身份验证", 6 | "No soy un robot", 7 | "Je ne suis pas un robot", 8 | "Ich bin kein Roboter", 9 | "Ik ben geen robot", 10 | "Non sono un robot", 11 | "Não sou um robô", 12 | ), 13 | "get_an_audio_challenge": ( 14 | "Get an audio challenge", 15 | "Пройти аудиотест", 16 | "改用音频验证", 17 | "Obtener una pista sonora", 18 | "Générer un test audio", 19 | "Audio-Captcha abrufen", 20 | "Een audio-uitdaging ophalen", 21 | "Verifica audio", 22 | "Receber um desafio de áudio", 23 | ), 24 | "get_a_visual_challenge": ( 25 | "Get a visual challenge", 26 | "Пройти визуальный тест", 27 | "改用图片验证", 28 | "Obtener una pista visual", 29 | "Générer un test visuel", 30 | "Visuelles Captcha abrufen", 31 | "Een visuele uitdaging ophalen", 32 | "Verifica visiva", 33 | "Receber um desafio visual", 34 | ), 35 | "get_a_new_challenge": ( 36 | "Get a new challenge", 37 | "Обновить", 38 | "换一个新的验证码", 39 | "Obtener una pista nueva", 40 | "Générer un nouveau test", 41 | "Neues Captcha abrufen", 42 | "Een nieuwe uitdaging ophalen", 43 | "Nuova verifica", 44 | "Receber outro desafio", 45 | ), 46 | "alternatively_download_audio_as_mp3": ( 47 | "Alternatively, download audio as MP3", 48 | "Скачать MP3-файл", 49 | "或者以 MP3 格式下载音频", 50 | "También puedes descargar el audio en formato MP3", 51 | "Ou téléchargez le fichier audio au format MP3", 52 | "Audio als MP3 herunterladen", 53 | "Of download het geluid als MP3-bestand", 54 | "In alternativa, scarica l'audio come MP3", 55 | "Como alternativa, faça o download do áudio como MP3", 56 | ), 57 | "enter_what_you_hear": ( 58 | "Enter what you hear", 59 | "Введите прозвучавшие слова", 60 | "请输入您听到的内容", 61 | "Escribe lo que escuches", 62 | "Saisissez ce que vous entendez", 63 | "Geben Sie ein, was Sie hören", 64 | "Geef op wat je hoort", 65 | "Inserisci quello che senti", 66 | "Digite o que você ouve", 67 | ), 68 | "skip": ( 69 | "Skip", 70 | "Пропустить", 71 | "跳过", 72 | "Saltar", 73 | "Ignorer", 74 | "Überspringen", 75 | "Overslaan", 76 | "Salta", 77 | "Pular", 78 | ), 79 | "next": ( 80 | "Next", 81 | "Далее", 82 | "下一个", 83 | "Siguiente", 84 | "Suivant", 85 | "Weiter", 86 | "Volgende", 87 | "Avanti", 88 | "Avançar", 89 | ), 90 | "verify": ( 91 | "Verify", 92 | "Подтвердить", 93 | "验证", 94 | "Verificar", 95 | "Valider", 96 | "Bestätigen", 97 | "Verifiëren", 98 | "Verifica", 99 | "Verificar", 100 | ), 101 | "try_again_later": ( 102 | "Try again later", 103 | "Повторите попытку позже", 104 | "稍后重试", 105 | "Inténtalo de nuevo más tarde", 106 | "Réessayez plus tard", 107 | "Später noch einmal versuchen", 108 | "Probeer het later opnieuw", 109 | "Riprova più tardi", 110 | "Tente novamente mais tarde", 111 | ), 112 | "multiple_correct_solutions_required": ( 113 | "Multiple correct solutions required - please solve more", 114 | "Вы должны выполнить несколько заданий", 115 | "需要提供多个正确答案 - 请回答更多问题", 116 | "Debes resolver más captchas", 117 | "Veuillez effectuer d'autres tests (vous devez fournir plusieurs solutions correctes)", 118 | "Es sind mehrere richtige Lösungen erforderlich. Bitte weitere Aufgaben lösen", 119 | "Er zijn meerdere juiste oplossingen vereist - geef meer oplossingen op", 120 | "È necessario fornire più soluzioni corrette. Risolvi altri captcha", 121 | "São necessárias várias soluções corretas. Solucione mais", 122 | ), 123 | "press_play_to_listen": ( 124 | "Press PLAY to listen", 125 | 'Чтобы прослушать, нажмите "Воспроизвести"', 126 | "按“播放”可听语音内容", 127 | "Pulsa REPRODUCIR para escuchar el audio", 128 | "Appuyez sur LECTURE pour écouter", 129 | "Wählen Sie WIEDERGABE aus, um die Wiedergabe zu starten", 130 | "Druk op AFSPELEN om te luisteren", 131 | "Premi RIPRODUCI per ascoltare", 132 | "Pressione REPRODUZIR para ouvir", 133 | ), 134 | "please_try_again": ( 135 | "Please try again", 136 | "Повторите попытку", 137 | "请重试", 138 | "Inténtalo de nuevo", 139 | "Veuillez réessayer", 140 | "Versuche es bitte erneut", 141 | "Probeer het opnieuw", 142 | "Riprova", 143 | "Tente novamente", 144 | ), 145 | "please_also_check_the_new_images": ( 146 | "Please also check the new images", 147 | "Просмотрите также новые изображение", 148 | "另外,您还需查看新显示的图片", 149 | "Comprueba también las imágenes nuevas", 150 | "Veuillez également vérifier les nouvelles images", 151 | "Sehen Sie sich auch die neuen Bilder an", 152 | "Controleer ook de nieuwe afbeeldingen", 153 | "Controlla anche le nuove immagini", 154 | "Verifique também as novas imagens", 155 | ), 156 | "please_select_all_matching_images": ( 157 | "Please select all matching images", 158 | "Выберите все совпадающие изображения", 159 | "请选择所有相符的图片", 160 | "Selecciona todas las imágenes que coincidan", 161 | "Veuillez sélectionner toutes les images correspondantes", 162 | "Wählen Sie alle passenden Bilder aus", 163 | "Selecteer alle overeenkomende afbeeldingen", 164 | "Seleziona tutte le immagini corrispondenti", 165 | "Selecione todas as imagens correspondentes", 166 | ), 167 | } 168 | 169 | OBJECT_TRANSLATIONS = { 170 | "taxis": ("taxis", "такси", "出租车", "Taxis", "taxi's", "taxi", "táxis"), 171 | "bus": ( 172 | "bus", 173 | "buses", 174 | "автобус", 175 | "автобусы", 176 | "公交车", 177 | "autobuses", 178 | "autobús", 179 | "Bus", 180 | "Bussen", 181 | "bussen", 182 | "autobus", 183 | "ônibus", 184 | ), 185 | "motorcycles": ( 186 | "motorcycles", 187 | "мотоциклы", 188 | "摩托车", 189 | "motocicletas", 190 | "motos", 191 | "Motorrädern", 192 | "motorfietsen", 193 | "motoren", 194 | "motocicli", 195 | ), 196 | "tractors": ( 197 | "tractors", 198 | "трактора", 199 | "拖拉机", 200 | "tractores", 201 | "tracteurs", 202 | "Traktoren", 203 | "tractoren", 204 | "trattori", 205 | "tratores", 206 | ), 207 | "chimneys": ( 208 | "chimneys", 209 | "дымовые трубы", 210 | "烟囱", 211 | "chimeneas", 212 | "cheminées", 213 | "Schornsteinen", 214 | "schoorstenen", 215 | "camini", 216 | "chaminés", 217 | ), 218 | "crosswalks": ( 219 | "crosswalks", 220 | "пешеходные переходы", 221 | "人行横道", 222 | "过街人行道", 223 | "pasos de peatones", 224 | "passages pour piétons", 225 | "Fußgängerüberwegen", 226 | "oversteekplaatsen", 227 | "zebrapaden", 228 | "strisce pedonali", 229 | "faixas de pedestres", 230 | "faixas de pedestre", 231 | ), 232 | "traffic_lights": ( 233 | "traffic lights", 234 | "светофоры", 235 | "红绿灯", 236 | "semáforos", 237 | "feux de circulation", 238 | "Ampeln", 239 | "verkeerslichten", 240 | "semafori", 241 | ), 242 | "bicycles": ( 243 | "bicycles", 244 | "велосипеды", 245 | "自行车", 246 | "bicicletas", 247 | "vélos", 248 | "Fahrrädern", 249 | "fietsen", 250 | "biciclette", 251 | ), 252 | "parking_meters": ( 253 | "parking meters", 254 | "парковочные автоматы", 255 | "停车计时器", 256 | "parquímetros", 257 | "parcmètres", 258 | "Parkometern", 259 | "parkeermeters", 260 | "parchimetri", 261 | ), 262 | "cars": ( 263 | "cars", 264 | "автомобили", 265 | "小轿车", 266 | "coches", 267 | "voitures", 268 | "Pkws", 269 | "auto's", 270 | "auto", 271 | "carros", 272 | ), 273 | "bridges": ( 274 | "bridges", 275 | "мостами", 276 | "桥", 277 | "puentes", 278 | "ponts", 279 | "Brücken", 280 | "bruggen", 281 | "ponti", 282 | "pontes", 283 | ), 284 | "boats": ("boats", "лодки", "船", "barcos", "bateaux", "Boote", "boten", "barche"), 285 | "palm_trees": ( 286 | "palm trees", 287 | "пальмы", 288 | "棕榈树", 289 | "palmeras", 290 | "palmiers", 291 | "Palmen", 292 | "palmbomen", 293 | "palme", 294 | "palmeiras", 295 | ), 296 | "mountains_or_hills": ( 297 | "mountains or hills", 298 | "mountain", 299 | "горы или холмы", 300 | "montañas o colinas", 301 | "montagnes ou collines", 302 | "Berge oder Hügel", 303 | "bergen of heuvels", 304 | "montagne o colline", 305 | "montanhas ou colinas", 306 | ), 307 | "fire_hydrant": ( 308 | "a fire hydrant", 309 | "fire hydrants", 310 | "гидрантами", 311 | "пожарные гидранты", 312 | "消防栓", 313 | "bocas de incendios", 314 | "una boca de incendios", 315 | "borne d'incendie", 316 | "bouches d'incendie", 317 | "Hydranten", 318 | "Feuerhydranten", 319 | "een brandkraan", 320 | "brandkranen", 321 | "idrante", 322 | "idranti", 323 | "um hidrante", 324 | "hidrantes", 325 | ), 326 | "stairs": ( 327 | "stairs", 328 | "лестницы", 329 | "楼梯", 330 | "escaleras", 331 | "escaliers", 332 | "Treppen(stufen)", 333 | "trappen", 334 | "scale", 335 | "escadas", 336 | ), 337 | } 338 | 339 | ORIGINAL_LANGUAGE_AUDIO = ("de", "es", "fr", "it", "nl", "pt") 340 | -------------------------------------------------------------------------------- /playwright_recaptcha/recaptchav3/__init__.py: -------------------------------------------------------------------------------- 1 | """reCAPTCHA v3 solver for Playwright.""" 2 | from .async_solver import AsyncSolver 3 | from .sync_solver import SyncSolver 4 | 5 | __all__ = ["AsyncSolver", "SyncSolver"] 6 | -------------------------------------------------------------------------------- /playwright_recaptcha/recaptchav3/async_solver.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | import time 5 | from typing import Any, Optional 6 | 7 | from playwright.async_api import Page, Response 8 | 9 | from ..errors import RecaptchaTimeoutError 10 | from .base_solver import BaseSolver 11 | 12 | 13 | class AsyncSolver(BaseSolver[Page]): 14 | """ 15 | A class for solving reCAPTCHA v3 asynchronously with Playwright. 16 | 17 | Parameters 18 | ---------- 19 | page : Page 20 | The Playwright page to solve the reCAPTCHA on. 21 | timeout : float, optional 22 | The solve timeout in seconds, by default 30. 23 | """ 24 | 25 | async def __aenter__(self) -> AsyncSolver: 26 | return self 27 | 28 | async def __aexit__(self, *_: Any) -> None: 29 | self.close() 30 | 31 | async def _response_callback(self, response: Response) -> None: 32 | """ 33 | The callback for intercepting reload responses. 34 | 35 | Parameters 36 | ---------- 37 | response : Response 38 | The response. 39 | """ 40 | if re.search("/recaptcha/(api2|enterprise)/reload", response.url) is None: 41 | return 42 | 43 | token_match = re.search('"rresp","(.*?)"', await response.text()) 44 | 45 | if token_match is not None: 46 | self._token = token_match.group(1) 47 | 48 | async def solve_recaptcha(self, timeout: Optional[float] = None) -> str: 49 | """ 50 | Wait for the reCAPTCHA to be solved and return the `g-recaptcha-response` token. 51 | 52 | Parameters 53 | ---------- 54 | timeout : Optional[float], optional 55 | The solve timeout in seconds, by default 30. 56 | 57 | Returns 58 | ------- 59 | str 60 | The `g-recaptcha-response` token. 61 | 62 | Raises 63 | ------ 64 | RecaptchaTimeoutError 65 | If the solve timeout has been exceeded. 66 | """ 67 | self._token = None 68 | timeout = timeout or self._timeout 69 | start_time = time.time() 70 | 71 | while self._token is None: 72 | if time.time() - start_time >= timeout: 73 | raise RecaptchaTimeoutError 74 | 75 | await self._page.wait_for_timeout(250) 76 | 77 | return self._token 78 | -------------------------------------------------------------------------------- /playwright_recaptcha/recaptchav3/base_solver.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Generic, Optional, TypeVar, Union 3 | 4 | from playwright.async_api import Page as AsyncPage 5 | from playwright.async_api import Response as AsyncResponse 6 | from playwright.sync_api import Page as SyncPage 7 | from playwright.sync_api import Response as SyncResponse 8 | 9 | PageT = TypeVar("PageT", AsyncPage, SyncPage) 10 | Response = Union[AsyncResponse, SyncResponse] 11 | 12 | 13 | class BaseSolver(ABC, Generic[PageT]): 14 | """ 15 | The base class for reCAPTCHA v3 solvers. 16 | 17 | Parameters 18 | ---------- 19 | page : PageT 20 | The Playwright page to solve the reCAPTCHA on. 21 | timeout : float, optional 22 | The solve timeout in seconds, by default 30. 23 | """ 24 | 25 | def __init__(self, page: PageT, timeout: float = 30) -> None: 26 | self._page = page 27 | self._timeout = timeout 28 | 29 | self._token: Optional[str] = None 30 | self._page.on("response", self._response_callback) 31 | 32 | def __repr__(self) -> str: 33 | return ( 34 | f"{self.__class__.__name__}(page={self._page!r}, " 35 | f"timeout={self._timeout!r})" 36 | ) 37 | 38 | def close(self) -> None: 39 | """Remove the reload response listener.""" 40 | try: 41 | self._page.remove_listener("response", self._response_callback) 42 | except KeyError: 43 | pass 44 | 45 | @abstractmethod 46 | def _response_callback(self, response: Response) -> None: 47 | """ 48 | The callback for intercepting reload responses. 49 | 50 | Parameters 51 | ---------- 52 | response : Response 53 | The response. 54 | """ 55 | 56 | @abstractmethod 57 | def solve_recaptcha(self, timeout: Optional[float] = None) -> str: 58 | """ 59 | Wait for the reCAPTCHA to be solved and return the `g-recaptcha-response` token. 60 | 61 | Parameters 62 | ---------- 63 | timeout : Optional[float], optional 64 | The solve timeout in seconds, by default 30. 65 | 66 | Returns 67 | ------- 68 | str 69 | The `g-recaptcha-response` token. 70 | 71 | Raises 72 | ------ 73 | RecaptchaTimeoutError 74 | If the solve timeout has been exceeded. 75 | """ 76 | -------------------------------------------------------------------------------- /playwright_recaptcha/recaptchav3/sync_solver.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | import time 5 | from typing import Any, Optional 6 | 7 | from playwright.sync_api import Page, Response 8 | 9 | from ..errors import RecaptchaTimeoutError 10 | from .base_solver import BaseSolver 11 | 12 | 13 | class SyncSolver(BaseSolver[Page]): 14 | """ 15 | A class for solving reCAPTCHA v3 synchronously with Playwright. 16 | 17 | Parameters 18 | ---------- 19 | page : Page 20 | The Playwright page to solve the reCAPTCHA on. 21 | timeout : float, optional 22 | The solve timeout in seconds, by default 30. 23 | """ 24 | 25 | def __enter__(self) -> SyncSolver: 26 | return self 27 | 28 | def __exit__(self, *_: Any) -> None: 29 | self.close() 30 | 31 | def _response_callback(self, response: Response) -> None: 32 | """ 33 | The callback for intercepting reload responses. 34 | 35 | Parameters 36 | ---------- 37 | response : Response 38 | The response. 39 | """ 40 | if re.search("/recaptcha/(api2|enterprise)/reload", response.url) is None: 41 | return 42 | 43 | token_match = re.search('"rresp","(.*?)"', response.text()) 44 | 45 | if token_match is not None: 46 | self._token = token_match.group(1) 47 | 48 | def solve_recaptcha(self, timeout: Optional[float] = None) -> str: 49 | """ 50 | Wait for the reCAPTCHA to be solved and return the `g-recaptcha-response` token. 51 | 52 | Parameters 53 | ---------- 54 | timeout : Optional[float], optional 55 | The solve timeout in seconds, by default 30. 56 | 57 | Returns 58 | ------- 59 | str 60 | The `g-recaptcha-response` token. 61 | 62 | Raises 63 | ------ 64 | RecaptchaTimeoutError 65 | If the solve timeout has been exceeded. 66 | """ 67 | self._token = None 68 | timeout = timeout or self._timeout 69 | start_time = time.time() 70 | 71 | while self._token is None: 72 | if time.time() - start_time >= timeout: 73 | raise RecaptchaTimeoutError 74 | 75 | self._page.wait_for_timeout(250) 76 | 77 | return self._token 78 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | asyncio_default_fixture_loop_scope = function -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | playwright>=1.33.0,!=1.50.0 2 | pydub==0.25.1 3 | pytest-asyncio==0.24.0; python_version == "3.8" 4 | pytest-asyncio==0.26.0; python_version >= "3.9" 5 | setuptools==75.3.0; python_version == "3.8" 6 | setuptools==80.9.0; python_version >= "3.9" 7 | SpeechRecognition==3.10.4; python_version == "3.8" 8 | SpeechRecognition==3.14.3; python_version >= "3.9" 9 | tenacity==9.1.2 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | with open("README.md", encoding="utf-8") as file: 4 | long_description = file.read() 5 | 6 | setup( 7 | name="playwright-recaptcha", 8 | version="0.5.1", 9 | author="Xewdy444", 10 | author_email="xewdy@xewdy.systems", 11 | description="A library for solving reCAPTCHA v2 and v3 with Playwright", 12 | license="MIT", 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", 15 | url="https://github.com/Xewdy444/Playwright-reCAPTCHA", 16 | packages=find_packages(), 17 | python_requires=">=3.8", 18 | install_requires=[ 19 | "playwright>=1.33.0,!=1.50.0", 20 | "pydub==0.25.1", 21 | 'SpeechRecognition==3.14.3; python_version == "3.8"', 22 | 'SpeechRecognition==3.14.3; python_version >= "3.9"', 23 | "tenacity==9.1.2", 24 | ], 25 | classifiers=[ 26 | "Programming Language :: Python :: 3", 27 | "Programming Language :: Python :: 3.8", 28 | "Programming Language :: Python :: 3.9", 29 | "Programming Language :: Python :: 3.10", 30 | "Programming Language :: Python :: 3.11", 31 | "Programming Language :: Python :: 3.12", 32 | "License :: OSI Approved :: MIT License", 33 | "Operating System :: OS Independent", 34 | "Intended Audience :: Developers", 35 | "Topic :: Software Development :: Testing", 36 | "Topic :: Internet :: WWW/HTTP :: Browsers", 37 | "Framework :: AsyncIO", 38 | ], 39 | ) 40 | -------------------------------------------------------------------------------- /tests/test_async_recaptchav2.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from playwright.async_api import async_playwright 3 | 4 | from playwright_recaptcha import ( 5 | CapSolverError, 6 | RecaptchaNotFoundError, 7 | RecaptchaRateLimitError, 8 | recaptchav2, 9 | ) 10 | 11 | 12 | @pytest.mark.asyncio 13 | @pytest.mark.xfail(raises=RecaptchaRateLimitError) 14 | async def test_solver_with_normal_recaptcha() -> None: 15 | """Test the solver with a normal reCAPTCHA.""" 16 | async with async_playwright() as playwright: 17 | browser = await playwright.firefox.launch() 18 | page = await browser.new_page() 19 | await page.goto("https://www.google.com/recaptcha/api2/demo") 20 | 21 | async with recaptchav2.AsyncSolver(page) as solver: 22 | await solver.solve_recaptcha(wait=True) 23 | 24 | 25 | @pytest.mark.asyncio 26 | @pytest.mark.xfail(raises=(RecaptchaNotFoundError, RecaptchaRateLimitError)) 27 | async def test_solver_with_hidden_recaptcha() -> None: 28 | """Test the solver with a hidden reCAPTCHA.""" 29 | async with async_playwright() as playwright: 30 | browser = await playwright.firefox.launch() 31 | page = await browser.new_page() 32 | 33 | await page.goto("https://www.google.com/recaptcha/api2/demo?invisible=true") 34 | await page.get_by_role("button").click() 35 | 36 | async with recaptchav2.AsyncSolver(page) as solver: 37 | await solver.solve_recaptcha(wait=True) 38 | 39 | 40 | @pytest.mark.asyncio 41 | @pytest.mark.xfail(raises=RecaptchaRateLimitError) 42 | async def test_solver_with_slow_browser() -> None: 43 | """Test the solver with a slow browser.""" 44 | async with async_playwright() as playwright: 45 | browser = await playwright.firefox.launch(slow_mo=1000) 46 | page = await browser.new_page() 47 | await page.goto("https://www.google.com/recaptcha/api2/demo") 48 | 49 | async with recaptchav2.AsyncSolver(page) as solver: 50 | await solver.solve_recaptcha(wait=True) 51 | 52 | 53 | @pytest.mark.asyncio 54 | @pytest.mark.xfail(raises=CapSolverError) 55 | async def test_solver_with_image_challenge() -> None: 56 | """Test the solver with an image challenge.""" 57 | async with async_playwright() as playwright: 58 | browser = await playwright.firefox.launch() 59 | page = await browser.new_page() 60 | await page.goto("https://www.google.com/recaptcha/api2/demo") 61 | 62 | async with recaptchav2.AsyncSolver(page) as solver: 63 | await solver.solve_recaptcha(wait=True, image_challenge=True) 64 | 65 | 66 | @pytest.mark.asyncio 67 | async def test_recaptcha_not_found_error() -> None: 68 | """Test the solver with a page that does not have a reCAPTCHA.""" 69 | async with async_playwright() as playwright: 70 | browser = await playwright.firefox.launch() 71 | page = await browser.new_page() 72 | await page.goto("https://www.google.com/") 73 | 74 | with pytest.raises(RecaptchaNotFoundError): 75 | async with recaptchav2.AsyncSolver(page) as solver: 76 | await solver.solve_recaptcha() 77 | -------------------------------------------------------------------------------- /tests/test_async_recaptchav3.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from playwright.async_api import async_playwright 3 | 4 | from playwright_recaptcha import RecaptchaTimeoutError, recaptchav3 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_solver_with_normal_browser() -> None: 9 | """Test the solver with a normal browser.""" 10 | async with async_playwright() as playwright: 11 | browser = await playwright.firefox.launch() 12 | page = await browser.new_page() 13 | 14 | async with recaptchav3.AsyncSolver(page) as solver: 15 | await page.goto("https://antcpt.com/score_detector/") 16 | await solver.solve_recaptcha() 17 | 18 | 19 | @pytest.mark.asyncio 20 | async def test_solver_with_slow_browser() -> None: 21 | """Test the solver with a slow browser.""" 22 | async with async_playwright() as playwright: 23 | browser = await playwright.firefox.launch(slow_mo=1000) 24 | page = await browser.new_page() 25 | 26 | async with recaptchav3.AsyncSolver(page) as solver: 27 | await page.goto("https://antcpt.com/score_detector/") 28 | await solver.solve_recaptcha() 29 | 30 | 31 | @pytest.mark.asyncio 32 | async def test_recaptcha_not_found_error() -> None: 33 | """Test the solver with a page that does not have a reCAPTCHA.""" 34 | async with async_playwright() as playwright: 35 | browser = await playwright.firefox.launch() 36 | page = await browser.new_page() 37 | 38 | with pytest.raises(RecaptchaTimeoutError): 39 | async with recaptchav3.AsyncSolver(page, timeout=10) as solver: 40 | await page.goto("https://www.google.com/") 41 | await solver.solve_recaptcha() 42 | -------------------------------------------------------------------------------- /tests/test_sync_recaptchav2.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from playwright.sync_api import sync_playwright 3 | 4 | from playwright_recaptcha import ( 5 | CapSolverError, 6 | RecaptchaNotFoundError, 7 | RecaptchaRateLimitError, 8 | recaptchav2, 9 | ) 10 | 11 | 12 | @pytest.mark.xfail(raises=RecaptchaRateLimitError) 13 | def test_solver_with_normal_recaptcha() -> None: 14 | """Test the solver with a normal reCAPTCHA.""" 15 | with sync_playwright() as playwright: 16 | browser = playwright.firefox.launch() 17 | page = browser.new_page() 18 | page.goto("https://www.google.com/recaptcha/api2/demo") 19 | 20 | with recaptchav2.SyncSolver(page) as solver: 21 | solver.solve_recaptcha(wait=True) 22 | 23 | 24 | @pytest.mark.xfail(raises=(RecaptchaNotFoundError, RecaptchaRateLimitError)) 25 | def test_solver_with_hidden_recaptcha() -> None: 26 | """Test the solver with a hidden reCAPTCHA.""" 27 | with sync_playwright() as playwright: 28 | browser = playwright.firefox.launch() 29 | page = browser.new_page() 30 | 31 | page.goto("https://www.google.com/recaptcha/api2/demo?invisible=true") 32 | page.get_by_role("button").click() 33 | 34 | with recaptchav2.SyncSolver(page) as solver: 35 | solver.solve_recaptcha(wait=True) 36 | 37 | 38 | @pytest.mark.xfail(raises=RecaptchaRateLimitError) 39 | def test_solver_with_slow_browser() -> None: 40 | """Test the solver with a slow browser.""" 41 | with sync_playwright() as playwright: 42 | browser = playwright.firefox.launch(slow_mo=1000) 43 | page = browser.new_page() 44 | page.goto("https://www.google.com/recaptcha/api2/demo") 45 | 46 | with recaptchav2.SyncSolver(page) as solver: 47 | solver.solve_recaptcha(wait=True) 48 | 49 | 50 | @pytest.mark.xfail(raises=CapSolverError) 51 | def test_solver_with_image_challenge() -> None: 52 | """Test the solver with an image challenge.""" 53 | with sync_playwright() as playwright: 54 | browser = playwright.firefox.launch() 55 | page = browser.new_page() 56 | page.goto("https://www.google.com/recaptcha/api2/demo") 57 | 58 | with recaptchav2.SyncSolver(page) as solver: 59 | solver.solve_recaptcha(wait=True, image_challenge=True) 60 | 61 | 62 | def test_recaptcha_not_found_error() -> None: 63 | """Test the solver with a page that does not have a reCAPTCHA.""" 64 | with sync_playwright() as playwright: 65 | browser = playwright.firefox.launch() 66 | page = browser.new_page() 67 | page.goto("https://www.google.com/") 68 | 69 | with pytest.raises(RecaptchaNotFoundError), recaptchav2.SyncSolver( 70 | page 71 | ) as solver: 72 | solver.solve_recaptcha() 73 | -------------------------------------------------------------------------------- /tests/test_sync_recaptchav3.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from playwright.sync_api import sync_playwright 3 | 4 | from playwright_recaptcha import RecaptchaTimeoutError, recaptchav3 5 | 6 | 7 | def test_solver_with_normal_browser() -> None: 8 | """Test the solver with a normal browser.""" 9 | with sync_playwright() as playwright: 10 | browser = playwright.firefox.launch() 11 | page = browser.new_page() 12 | 13 | with recaptchav3.SyncSolver(page) as solver: 14 | page.goto("https://antcpt.com/score_detector/") 15 | solver.solve_recaptcha() 16 | 17 | 18 | def test_solver_with_slow_browser() -> None: 19 | """Test the solver with a slow browser.""" 20 | with sync_playwright() as playwright: 21 | browser = playwright.firefox.launch(slow_mo=1000) 22 | page = browser.new_page() 23 | 24 | with recaptchav3.SyncSolver(page) as solver: 25 | page.goto("https://antcpt.com/score_detector/") 26 | solver.solve_recaptcha() 27 | 28 | 29 | def test_recaptcha_not_found_error() -> None: 30 | """Test the solver with a page that does not have a reCAPTCHA.""" 31 | with sync_playwright() as playwright: 32 | browser = playwright.firefox.launch() 33 | page = browser.new_page() 34 | 35 | with pytest.raises(RecaptchaTimeoutError), recaptchav3.SyncSolver( 36 | page, timeout=10 37 | ) as solver: 38 | page.goto("https://www.google.com/") 39 | solver.solve_recaptcha() 40 | --------------------------------------------------------------------------------