├── .deepsource.toml
├── .gitattributes
├── .github
├── dependabot.yml
└── workflows
│ ├── publish-package.yml
│ ├── ruff.yml
│ └── test-package.yml
├── .gitignore
├── LICENSE
├── README.md
├── examples
├── recaptchav2
│ ├── async_solve_audio.py
│ ├── async_solve_image.py
│ ├── solve_with_sitekey.py
│ ├── sync_solve_audio.py
│ └── sync_solve_image.py
└── recaptchav3
│ ├── async_solve.py
│ └── sync_solve.py
├── playwright_recaptcha
├── __init__.py
├── errors.py
├── recaptchav2
│ ├── __init__.py
│ ├── async_solver.py
│ ├── base_solver.py
│ ├── recaptcha_box.py
│ ├── sync_solver.py
│ └── translations.py
└── recaptchav3
│ ├── __init__.py
│ ├── async_solver.py
│ ├── base_solver.py
│ └── sync_solver.py
├── pytest.ini
├── requirements.txt
├── setup.py
└── tests
├── test_async_recaptchav2.py
├── test_async_recaptchav3.py
├── test_sync_recaptchav2.py
└── test_sync_recaptchav3.py
/.deepsource.toml:
--------------------------------------------------------------------------------
1 | version = 1
2 |
3 | [[analyzers]]
4 | name = "python"
5 |
6 | [analyzers.meta]
7 | runtime_version = "3.x.x"
8 |
9 | [[transformers]]
10 | name = "black"
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | updates:
4 | - package-ecosystem: "pip"
5 | directory: "/"
6 | schedule:
7 | interval: "daily"
8 | time: "03:00"
9 | timezone: "America/Chicago"
10 |
--------------------------------------------------------------------------------
/.github/workflows/publish-package.yml:
--------------------------------------------------------------------------------
1 | name: Publish Package to PyPI
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | permissions:
8 | contents: read
9 |
10 | jobs:
11 | publish-package:
12 | name: Publish Package to PyPI
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v4
16 |
17 | - name: Set up Python
18 | uses: actions/setup-python@v5
19 | with:
20 | python-version: 3.x
21 |
22 | - name: Install dependencies
23 | run: |
24 | python -m pip install -U pip
25 | pip install build
26 |
27 | - name: Build package
28 | run: python -m build
29 |
30 | - name: Publish package to PyPI
31 | uses: pypa/gh-action-pypi-publish@release/v1
32 | with:
33 | user: __token__
34 | password: ${{ secrets.PYPI_API_TOKEN }}
35 |
--------------------------------------------------------------------------------
/.github/workflows/ruff.yml:
--------------------------------------------------------------------------------
1 | name: Analyze With Ruff
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 |
9 | permissions:
10 | actions: read
11 | contents: read
12 | security-events: write
13 |
14 | jobs:
15 | analyze-with-ruff:
16 | name: Analyze With Ruff
17 | runs-on: ubuntu-latest
18 | steps:
19 | - uses: actions/checkout@v4
20 |
21 | - uses: actions/setup-python@v5
22 | with:
23 | python-version: 3.x
24 |
25 | - name: Install dependencies
26 | run: |
27 | python -m pip install -U pip
28 | pip install ruff
29 |
30 | - name: Run ruff
31 | run: ruff check
32 | --no-cache
33 | --exit-zero
34 | --output-format sarif > ruff-results.sarif
35 |
36 | - name: Upload ruff results to GitHub
37 | uses: github/codeql-action/upload-sarif@v2
38 | with:
39 | sarif_file: ruff-results.sarif
40 | wait-for-processing: true
41 |
--------------------------------------------------------------------------------
/.github/workflows/test-package.yml:
--------------------------------------------------------------------------------
1 | name: Test Package
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | paths:
8 | - playwright_recaptcha/**
9 | pull_request:
10 | paths:
11 | - playwright_recaptcha/**
12 |
13 | jobs:
14 | test-package:
15 | name: Test Package With Python ${{ matrix.python-version }}
16 | runs-on: ubuntu-latest
17 | strategy:
18 | fail-fast: false
19 | max-parallel: 1
20 | matrix:
21 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
22 | steps:
23 | - uses: actions/checkout@v4
24 |
25 | - name: Set up Python ${{ matrix.python-version }}
26 | uses: actions/setup-python@v5
27 | with:
28 | python-version: ${{ matrix.python-version }}
29 |
30 | - name: Install dependencies
31 | run: |
32 | sudo apt-get update
33 | sudo apt-get install -y ffmpeg
34 | python -m pip install -U pip
35 | pip install -r requirements.txt .
36 | playwright install --with-deps firefox
37 |
38 | - name: Test with pytest
39 | run: pytest
40 | env:
41 | CAPSOLVER_API_KEY: ${{ secrets.CAPSOLVER_API_KEY }}
42 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ruff
2 | .ruff_cache/
3 |
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 | cover/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | .pybuilder/
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | # For a library or package, you might want to ignore these files since the code is
90 | # intended to run in multiple environments; otherwise, check them in:
91 | # .python-version
92 |
93 | # pipenv
94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
97 | # install all needed dependencies.
98 | #Pipfile.lock
99 |
100 | # poetry
101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | # This is especially recommended for binary packages to ensure reproducibility, and is more
103 | # commonly ignored for libraries.
104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 |
107 | # pdm
108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | # in version control.
112 | # https://pdm.fming.dev/#use-with-ide
113 | .pdm.toml
114 |
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116 | __pypackages__/
117 |
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 |
122 | # SageMath parsed files
123 | *.sage.py
124 |
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 |
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 |
138 | # Rope project settings
139 | .ropeproject
140 |
141 | # mkdocs documentation
142 | /site
143 |
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 |
149 | # Pyre type checker
150 | .pyre/
151 |
152 | # pytype static type analyzer
153 | .pytype/
154 |
155 | # Cython debug symbols
156 | cython_debug/
157 |
158 | # PyCharm
159 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161 | # and can be added to the global gitignore or merged into this file. For a more nuclear
162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163 | #.idea/
164 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Xewdy
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://www.python.org/downloads/)
2 | [](https://pypi.org/project/playwright-recaptcha/)
3 | [](https://pypi.org/project/playwright-recaptcha/)
4 | [](https://github.com/Xewdy444/Playwright-reCAPTCHA/blob/main/LICENSE)
5 |
6 | ---
7 |
8 |
9 |
10 |
11 |
12 |
13 |
Capsolver.com is an AI-powered service that specializes in solving various types of captchas automatically. It supports captchas such as
reCAPTCHA V2,
reCAPTCHA V3,
hCaptcha,
FunCaptcha,
DataDome,
AWS Captcha,
Geetest, and Cloudflare
Captcha /
Challenge 5s,
Imperva / Incapsula, among others.
14 | For developers, Capsolver offers API integration options detailed in their
documentation, facilitating the integration of captcha solving into applications. They also provide browser extensions for
Chrome and
Firefox, making it easy to use their service directly within a browser. Different pricing packages are available to accommodate varying needs, ensuring flexibility for users.
15 |
16 |
17 | ---
18 |
19 | # Playwright-reCAPTCHA
20 | A Python library for solving reCAPTCHA v2 and v3 with Playwright.
21 |
22 | ## Solving reCAPTCHA v2
23 | reCAPTCHA v2 is solved by using the following methods:
24 |
25 | - Solving the audio challenge by transcribing the audio using the Google speech recognition API and entering the text as the response.
26 | - Solving the image challenge using the [CapSolver](https://www.capsolver.com/?utm_source=github&utm_medium=banner_github&utm_campaign=Playwright-reCAPTCHA) API for image classification.
27 |
28 | ## Solving reCAPTCHA v3
29 | The solving of reCAPTCHA v3 is done by the browser itself, so this library simply waits for the browser to make a POST request to https://www.google.com/recaptcha/api2/reload or https://www.google.com/recaptcha/enterprise/reload and parses the response to get the `g-recaptcha-response` token.
30 |
31 | ---
32 |
33 | All solvers return the `g-recaptcha-response` token, which is required for form submissions. If you are unsure about the version of reCAPTCHA being used, you can check out [this blog post](https://www.capsolver.com/blog/reCAPTCHA/identify-what-recaptcha-version-is-being-used) for more information.
34 |
35 | ## Installation
36 | pip install playwright-recaptcha
37 |
38 | This library requires FFmpeg to be installed on your system for the transcription of reCAPTCHA v2 audio challenges.
39 |
40 | | OS | Command |
41 | | :-----: | :--------------------: |
42 | | Debian | apt-get install ffmpeg |
43 | | MacOS | brew install ffmpeg |
44 | | Windows | winget install ffmpeg |
45 |
46 | You can also download the latest static build from [here](https://ffmpeg.org/download.html).
47 |
48 | > **Note**
49 | > Make sure to have the ffmpeg and ffprobe binaries in your system's PATH so that pydub can find them.
50 |
51 | ## Supported Languages
52 | - Chinese (zh-CN)
53 | - Dutch (nl)
54 | - English (en)
55 | - French (fr)
56 | - German (de)
57 | - Italian (it)
58 | - Portuguese (pt)
59 | - Russian (ru)
60 | - Spanish (es)
61 |
62 | If you would like to request support for a new language, please open an issue. You can also open a pull request if you would like to contribute.
63 |
64 | ## reCAPTCHA v2 Example
65 | For more reCAPTCHA v2 examples, see the [examples folder](https://github.com/Xewdy444/Playwright-reCAPTCHA/tree/main/examples/recaptchav2).
66 |
67 | ```python
68 | from playwright.sync_api import sync_playwright
69 | from playwright_recaptcha import recaptchav2
70 |
71 | with sync_playwright() as playwright:
72 | browser = playwright.firefox.launch()
73 | page = browser.new_page()
74 | page.goto("https://www.google.com/recaptcha/api2/demo")
75 |
76 | with recaptchav2.SyncSolver(page) as solver:
77 | token = solver.solve_recaptcha(wait=True)
78 | print(token)
79 | ```
80 |
81 | By default, the audio challenge will be solved. If you would like to solve the image challenge, you can set the `CAPSOLVER_API_KEY` environment variable to your [CapSolver](https://www.capsolver.com/?utm_source=github&utm_medium=banner_github&utm_campaign=Playwright-reCAPTCHA) API key. You can also pass the API key as an argument to `recaptchav2.SyncSolver()` with `capsolver_api_key="your_api_key"`. Then, set `image_challenge=True` in `solver.solve_recaptcha()`.
82 |
83 | ```python
84 | with recaptchav2.SyncSolver(page, capsolver_api_key="your_api_key") as solver:
85 | token = solver.solve_recaptcha(wait=True, image_challenge=True)
86 | print(token)
87 | ```
88 |
89 | ## reCAPTCHA v3 Example
90 | For more reCAPTCHA v3 examples, see the [examples folder](https://github.com/Xewdy444/Playwright-reCAPTCHA/tree/main/examples/recaptchav3).
91 |
92 | ```python
93 | from playwright.sync_api import sync_playwright
94 | from playwright_recaptcha import recaptchav3
95 |
96 | with sync_playwright() as playwright:
97 | browser = playwright.firefox.launch()
98 | page = browser.new_page()
99 |
100 | with recaptchav3.SyncSolver(page) as solver:
101 | page.goto("https://antcpt.com/score_detector/")
102 | token = solver.solve_recaptcha()
103 | print(token)
104 | ```
105 |
106 | It is best to initialize the solver before navigating to the page with the reCAPTCHA v3 challenge. This is because the solver adds a listener for the POST request to https://www.google.com/recaptcha/api2/reload or https://www.google.com/recaptcha/enterprise/reload and if the request is made before the listener is added, the `g-recaptcha-response` token will not be captured.
107 |
108 |
109 | ## Disclaimer
110 | This library is intended for use in automated testing and development environments only and should not be used for any illegal or malicious purposes. Any use of this library for activities that violate the terms of service of any website or service is strictly prohibited. The contributors of this library will not be held liable for any damages or legal issues that may arise from the use of this library. By using this library, you agree to these terms and take full responsibility for your actions.
111 |
--------------------------------------------------------------------------------
/examples/recaptchav2/async_solve_audio.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from playwright.async_api import async_playwright
4 |
5 | from playwright_recaptcha import recaptchav2
6 |
7 |
8 | async def main() -> None:
9 | async with async_playwright() as playwright:
10 | browser = await playwright.firefox.launch()
11 | page = await browser.new_page()
12 | await page.goto("https://www.google.com/recaptcha/api2/demo")
13 |
14 | async with recaptchav2.AsyncSolver(page) as solver:
15 | token = await solver.solve_recaptcha(wait=True)
16 | print(token)
17 |
18 |
19 | if __name__ == "__main__":
20 | asyncio.run(main())
21 |
--------------------------------------------------------------------------------
/examples/recaptchav2/async_solve_image.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from playwright.async_api import async_playwright
4 |
5 | from playwright_recaptcha import recaptchav2
6 |
7 |
8 | async def main() -> None:
9 | async with async_playwright() as playwright:
10 | browser = await playwright.firefox.launch()
11 | page = await browser.new_page()
12 | await page.goto("https://www.google.com/recaptcha/api2/demo")
13 |
14 | async with recaptchav2.AsyncSolver(page) as solver:
15 | token = await solver.solve_recaptcha(wait=True, image_challenge=True)
16 | print(token)
17 |
18 |
19 | if __name__ == "__main__":
20 | asyncio.run(main())
21 |
--------------------------------------------------------------------------------
/examples/recaptchav2/solve_with_sitekey.py:
--------------------------------------------------------------------------------
1 | from playwright.sync_api import sync_playwright
2 |
3 | from playwright_recaptcha import recaptchav2
4 |
5 | RECAPTCHA_HTML = """
6 |
7 |
8 |
9 |
11 |
12 |
13 |
16 |
17 |
18 | """
19 |
20 |
21 | def main() -> None:
22 | with sync_playwright() as playwright:
23 | browser = playwright.firefox.launch()
24 | page = browser.new_page()
25 |
26 | # It is important to load a website before setting the reCAPTCHA HTML.
27 | # If you don't, the reCAPTCHA will give you an "Invalid domain for site key" error.
28 | page.goto("https://www.google.com/", wait_until="commit")
29 |
30 | page.set_content(
31 | RECAPTCHA_HTML.format(sitekey="6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-")
32 | )
33 |
34 | with recaptchav2.SyncSolver(page) as solver:
35 | token = solver.solve_recaptcha(wait=True)
36 | print(token)
37 |
38 |
39 | if __name__ == "__main__":
40 | main()
41 |
--------------------------------------------------------------------------------
/examples/recaptchav2/sync_solve_audio.py:
--------------------------------------------------------------------------------
1 | from playwright.sync_api import sync_playwright
2 |
3 | from playwright_recaptcha import recaptchav2
4 |
5 |
6 | def main() -> None:
7 | with sync_playwright() as playwright:
8 | browser = playwright.firefox.launch()
9 | page = browser.new_page()
10 | page.goto("https://www.google.com/recaptcha/api2/demo")
11 |
12 | with recaptchav2.SyncSolver(page) as solver:
13 | token = solver.solve_recaptcha(wait=True)
14 | print(token)
15 |
16 |
17 | if __name__ == "__main__":
18 | main()
19 |
--------------------------------------------------------------------------------
/examples/recaptchav2/sync_solve_image.py:
--------------------------------------------------------------------------------
1 | from playwright.sync_api import sync_playwright
2 |
3 | from playwright_recaptcha import recaptchav2
4 |
5 |
6 | def main() -> None:
7 | with sync_playwright() as playwright:
8 | browser = playwright.firefox.launch()
9 | page = browser.new_page()
10 | page.goto("https://www.google.com/recaptcha/api2/demo")
11 |
12 | with recaptchav2.SyncSolver(page) as solver:
13 | token = solver.solve_recaptcha(wait=True, image_challenge=True)
14 | print(token)
15 |
16 |
17 | if __name__ == "__main__":
18 | main()
19 |
--------------------------------------------------------------------------------
/examples/recaptchav3/async_solve.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import re
3 |
4 | from playwright.async_api import async_playwright
5 |
6 | from playwright_recaptcha import recaptchav3
7 |
8 |
9 | async def main() -> None:
10 | async with async_playwright() as playwright:
11 | browser = await playwright.firefox.launch()
12 | page = await browser.new_page()
13 |
14 | async with recaptchav3.AsyncSolver(page) as solver:
15 | await page.goto("https://antcpt.com/score_detector/")
16 | token = await solver.solve_recaptcha()
17 | print(token)
18 |
19 | score_pattern = re.compile(r"Your score is: (\d\.\d)")
20 | score_locator = page.get_by_text(score_pattern)
21 | print(await score_locator.inner_text())
22 |
23 |
24 | if __name__ == "__main__":
25 | asyncio.run(main())
26 |
--------------------------------------------------------------------------------
/examples/recaptchav3/sync_solve.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from playwright.sync_api import sync_playwright
4 |
5 | from playwright_recaptcha import recaptchav3
6 |
7 |
8 | def main() -> None:
9 | with sync_playwright() as playwright:
10 | browser = playwright.firefox.launch()
11 | page = browser.new_page()
12 |
13 | with recaptchav3.SyncSolver(page) as solver:
14 | page.goto("https://antcpt.com/score_detector/")
15 | token = solver.solve_recaptcha()
16 | print(token)
17 |
18 | score_pattern = re.compile(r"Your score is: (\d\.\d)")
19 | score_locator = page.get_by_text(score_pattern)
20 | print(score_locator.inner_text())
21 |
22 |
23 | if __name__ == "__main__":
24 | main()
25 |
--------------------------------------------------------------------------------
/playwright_recaptcha/__init__.py:
--------------------------------------------------------------------------------
1 | """A library for solving reCAPTCHA v2 and v3 with Playwright."""
2 |
3 | __author__ = "Xewdy444"
4 | __version__ = "0.5.1"
5 | __license__ = "MIT"
6 |
7 | from .errors import (
8 | CapSolverError,
9 | RecaptchaError,
10 | RecaptchaNotFoundError,
11 | RecaptchaRateLimitError,
12 | RecaptchaSolveError,
13 | RecaptchaTimeoutError,
14 | )
15 | from .recaptchav2 import AsyncSolver as AsyncSolverV2
16 | from .recaptchav2 import SyncSolver as SyncSolverV2
17 | from .recaptchav3 import AsyncSolver as AsyncSolverV3
18 | from .recaptchav3 import SyncSolver as SyncSolverV3
19 |
20 | __all__ = [
21 | "CapSolverError",
22 | "RecaptchaError",
23 | "RecaptchaNotFoundError",
24 | "RecaptchaRateLimitError",
25 | "RecaptchaSolveError",
26 | "RecaptchaTimeoutError",
27 | "AsyncSolverV2",
28 | "SyncSolverV2",
29 | "AsyncSolverV3",
30 | "SyncSolverV3",
31 | ]
32 |
--------------------------------------------------------------------------------
/playwright_recaptcha/errors.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 |
4 | class CapSolverError(Exception):
5 | """An exception raised when the CapSolver API returns an error."""
6 |
7 | def __init__(self, message: Optional[str] = None) -> None:
8 | super().__init__(message or "The CapSolver API returned an error.")
9 |
10 |
11 | class RecaptchaError(Exception):
12 | """Base class for reCAPTCHA exceptions."""
13 |
14 |
15 | class RecaptchaNotFoundError(RecaptchaError):
16 | """An exception raised when the reCAPTCHA was not found."""
17 |
18 | def __init__(self, message: Optional[str] = None) -> None:
19 | super().__init__(message or "The reCAPTCHA was not found.")
20 |
21 |
22 | class RecaptchaSolveError(RecaptchaError):
23 | """Base class for reCAPTCHA solve exceptions."""
24 |
25 | def __init__(self, message: Optional[str] = None) -> None:
26 | super().__init__(message or "The reCAPTCHA could not be solved.")
27 |
28 |
29 | class RecaptchaRateLimitError(RecaptchaSolveError):
30 | """An exception raised when the reCAPTCHA rate limit has been exceeded."""
31 |
32 | def __init__(self) -> None:
33 | super().__init__("The reCAPTCHA rate limit has been exceeded.")
34 |
35 |
36 | class RecaptchaTimeoutError(RecaptchaSolveError):
37 | """An exception raised when the reCAPTCHA solve timeout has been exceeded."""
38 |
39 | def __init__(self) -> None:
40 | super().__init__("The reCAPTCHA solve timeout has been exceeded.")
41 |
--------------------------------------------------------------------------------
/playwright_recaptcha/recaptchav2/__init__.py:
--------------------------------------------------------------------------------
1 | """reCAPTCHA v2 solver for Playwright."""
2 | from .async_solver import AsyncSolver
3 | from .sync_solver import SyncSolver
4 |
5 | __all__ = ["AsyncSolver", "SyncSolver"]
6 |
--------------------------------------------------------------------------------
/playwright_recaptcha/recaptchav2/async_solver.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import asyncio
4 | import base64
5 | import functools
6 | import re
7 | import time
8 | from concurrent.futures import ThreadPoolExecutor
9 | from datetime import datetime
10 | from io import BytesIO
11 | from json import JSONDecodeError
12 | from typing import Any, BinaryIO, Dict, List, Optional, Union
13 | from urllib.parse import parse_qs, urlparse
14 |
15 | import speech_recognition
16 | from playwright.async_api import Locator, Page, Response
17 | from pydub import AudioSegment
18 | from pydub.exceptions import CouldntDecodeError
19 | from tenacity import (
20 | AsyncRetrying,
21 | retry_if_exception_type,
22 | stop_after_delay,
23 | wait_fixed,
24 | )
25 |
26 | from ..errors import (
27 | CapSolverError,
28 | RecaptchaNotFoundError,
29 | RecaptchaRateLimitError,
30 | RecaptchaSolveError,
31 | )
32 | from .base_solver import BaseSolver
33 | from .recaptcha_box import AsyncRecaptchaBox
34 | from .translations import OBJECT_TRANSLATIONS, ORIGINAL_LANGUAGE_AUDIO
35 |
36 |
37 | class AsyncAudioFile(speech_recognition.AudioFile):
38 | """
39 | A subclass of `speech_recognition.AudioFile` that can be used asynchronously.
40 |
41 | Parameters
42 | ----------
43 | file : Union[BinaryIO, str]
44 | The audio file handle or file path.
45 | executor : Optional[ThreadPoolExecutor], optional
46 | The thread pool executor to use, by default None.
47 | """
48 |
49 | def __init__(
50 | self,
51 | file: Union[BinaryIO, str],
52 | *,
53 | executor: Optional[ThreadPoolExecutor] = None,
54 | ) -> None:
55 | super().__init__(file)
56 | self._loop = asyncio.get_event_loop()
57 | self._executor = executor
58 |
59 | async def __aenter__(self) -> AsyncAudioFile:
60 | await self._loop.run_in_executor(self._executor, self.__enter__)
61 | return self
62 |
63 | async def __aexit__(self, *args: Any) -> None:
64 | await self._loop.run_in_executor(self._executor, self.__exit__, *args)
65 |
66 |
67 | class AsyncSolver(BaseSolver[Page]):
68 | """
69 | A class for solving reCAPTCHA v2 asynchronously with Playwright.
70 |
71 | Parameters
72 | ----------
73 | page : Page
74 | The Playwright page to solve the reCAPTCHA on.
75 | attempts : int, optional
76 | The number of solve attempts, by default 5.
77 | capsolver_api_key : Optional[str], optional
78 | The CapSolver API key, by default None.
79 | If None, the `CAPSOLVER_API_KEY` environment variable will be used.
80 | """
81 |
82 | async def __aenter__(self) -> AsyncSolver:
83 | return self
84 |
85 | async def __aexit__(self, *_: Any) -> None:
86 | self.close()
87 |
88 | @staticmethod
89 | async def _get_task_object(recaptcha_box: AsyncRecaptchaBox) -> Optional[str]:
90 | """
91 | Get the ID of the object in the reCAPTCHA image challenge task.
92 |
93 | Parameters
94 | ----------
95 | recaptcha_box : AsyncRecaptchaBox
96 | The reCAPTCHA box.
97 |
98 | Returns
99 | -------
100 | Optional[str]
101 | The object ID. Returns None if the task object is not recognized.
102 | """
103 | object_dict = {
104 | "/m/0pg52": OBJECT_TRANSLATIONS["taxis"],
105 | "/m/01bjv": OBJECT_TRANSLATIONS["bus"],
106 | "/m/04_sv": OBJECT_TRANSLATIONS["motorcycles"],
107 | "/m/013xlm": OBJECT_TRANSLATIONS["tractors"],
108 | "/m/01jk_4": OBJECT_TRANSLATIONS["chimneys"],
109 | "/m/014xcs": OBJECT_TRANSLATIONS["crosswalks"],
110 | "/m/015qff": OBJECT_TRANSLATIONS["traffic_lights"],
111 | "/m/0199g": OBJECT_TRANSLATIONS["bicycles"],
112 | "/m/015qbp": OBJECT_TRANSLATIONS["parking_meters"],
113 | "/m/0k4j": OBJECT_TRANSLATIONS["cars"],
114 | "/m/015kr": OBJECT_TRANSLATIONS["bridges"],
115 | "/m/019jd": OBJECT_TRANSLATIONS["boats"],
116 | "/m/0cdl1": OBJECT_TRANSLATIONS["palm_trees"],
117 | "/m/09d_r": OBJECT_TRANSLATIONS["mountains_or_hills"],
118 | "/m/01pns0": OBJECT_TRANSLATIONS["fire_hydrant"],
119 | "/m/01lynh": OBJECT_TRANSLATIONS["stairs"],
120 | }
121 |
122 | task = await recaptcha_box.bframe_frame.locator("div").all_inner_texts()
123 | object_ = task[0].split("\n")[1]
124 |
125 | for object_id, translations in object_dict.items():
126 | if object_ in translations:
127 | return object_id
128 |
129 | return None
130 |
131 | async def _response_callback(self, response: Response) -> None:
132 | """
133 | The callback for intercepting payload and userverify responses.
134 |
135 | Parameters
136 | ----------
137 | response : Response
138 | The response.
139 | """
140 | if (
141 | re.search("/recaptcha/(api2|enterprise)/payload", response.url) is not None
142 | and self._payload_response is None
143 | ):
144 | self._payload_response = response
145 | elif (
146 | re.search("/recaptcha/(api2|enterprise)/userverify", response.url)
147 | is not None
148 | ):
149 | token_match = re.search('"uvresp","(.*?)"', await response.text())
150 |
151 | if token_match is not None:
152 | self._token = token_match.group(1)
153 |
154 | async def _get_capsolver_response(
155 | self, recaptcha_box: AsyncRecaptchaBox, image_data: bytes
156 | ) -> Optional[Dict[str, Any]]:
157 | """
158 | Get the CapSolver JSON response for an image.
159 |
160 | Parameters
161 | ----------
162 | recaptcha_box : AsyncRecaptchaBox
163 | The reCAPTCHA box.
164 | image_data : bytes
165 | The image data.
166 |
167 | Returns
168 | -------
169 | Optional[Dict[str, Any]]
170 | The CapSolver JSON response.
171 | Returns None if the task object is not recognized.
172 |
173 | Raises
174 | ------
175 | CapSolverError
176 | If the CapSolver API returned an error.
177 | """
178 | image = base64.b64encode(image_data).decode("utf-8")
179 | task_object = await self._get_task_object(recaptcha_box)
180 |
181 | if task_object is None:
182 | return None
183 |
184 | payload = {
185 | "clientKey": self._capsolver_api_key,
186 | "task": {
187 | "type": "ReCaptchaV2Classification",
188 | "image": image,
189 | "question": task_object,
190 | },
191 | }
192 |
193 | response = await self._page.request.post(
194 | "https://api.capsolver.com/createTask", data=payload
195 | )
196 |
197 | try:
198 | response_json = await response.json()
199 | except JSONDecodeError as err:
200 | raise CapSolverError from err
201 |
202 | if response_json["errorId"] != 0:
203 | raise CapSolverError(response_json["errorDescription"])
204 |
205 | return response_json
206 |
207 | async def _solve_tiles(
208 | self, recaptcha_box: AsyncRecaptchaBox, indexes: List[int]
209 | ) -> None:
210 | """
211 | Solve the tiles in the reCAPTCHA image challenge.
212 |
213 | Parameters
214 | ----------
215 | recaptcha_box : AsyncRecaptchaBox
216 | The reCAPTCHA box.
217 | indexes : List[int]
218 | The indexes of the tiles that contain the task object.
219 |
220 | Raises
221 | ------
222 | CapSolverError
223 | If the CapSolver API returned an error.
224 | """
225 | changing_tiles: Dict[Locator, str] = {}
226 | indexes = indexes.copy()
227 |
228 | style_script = """
229 | (element) => {
230 | element.style = "";
231 | element.className = "rc-imageselect-tile";
232 | }
233 | """
234 |
235 | for index in indexes:
236 | tile = recaptcha_box.tile_selector.nth(index)
237 | await tile.click()
238 |
239 | if "rc-imageselect-dynamic-selected" not in await tile.get_attribute(
240 | "class"
241 | ):
242 | continue
243 |
244 | changing_tiles[tile] = await tile.locator("img").get_attribute("src")
245 | await tile.evaluate(style_script)
246 |
247 | start_time = datetime.now()
248 |
249 | while changing_tiles and (datetime.now() - start_time).seconds < 60:
250 | for tile in changing_tiles.copy():
251 | image_url = await tile.locator("img").get_attribute("src")
252 |
253 | if changing_tiles[tile] == image_url:
254 | continue
255 |
256 | changing_tiles[tile] = image_url
257 | response = await self._page.request.get(image_url)
258 |
259 | capsolver_response = await self._get_capsolver_response(
260 | recaptcha_box, await response.body()
261 | )
262 |
263 | if (
264 | capsolver_response is None
265 | or not capsolver_response["solution"]["hasObject"]
266 | ):
267 | changing_tiles.pop(tile)
268 | continue
269 |
270 | await tile.click()
271 | await tile.evaluate(style_script)
272 |
273 | async def _transcribe_audio(
274 | self, audio_url: str, *, language: str = "en-US"
275 | ) -> Optional[str]:
276 | """
277 | Transcribe the reCAPTCHA audio challenge.
278 |
279 | Parameters
280 | ----------
281 | audio_url : str
282 | The reCAPTCHA audio URL.
283 | language : str, optional
284 | The language of the audio, by default en-US.
285 |
286 | Returns
287 | -------
288 | Optional[str]
289 | The reCAPTCHA audio text.
290 | Returns None if the audio could not be converted.
291 | """
292 | loop = asyncio.get_event_loop()
293 | response = await self._page.request.get(audio_url)
294 |
295 | wav_audio = BytesIO()
296 | mp3_audio = BytesIO(await response.body())
297 |
298 | try:
299 | audio: AudioSegment = await loop.run_in_executor(
300 | None, AudioSegment.from_mp3, mp3_audio
301 | )
302 | except CouldntDecodeError:
303 | return None
304 |
305 | await loop.run_in_executor(
306 | None, functools.partial(audio.export, wav_audio, format="wav")
307 | )
308 |
309 | recognizer = speech_recognition.Recognizer()
310 |
311 | async with AsyncAudioFile(wav_audio) as source:
312 | audio_data = await loop.run_in_executor(None, recognizer.record, source)
313 |
314 | try:
315 | return await loop.run_in_executor(
316 | None,
317 | functools.partial(
318 | recognizer.recognize_google, audio_data, language=language
319 | ),
320 | )
321 | except speech_recognition.UnknownValueError:
322 | return None
323 |
324 | async def _click_checkbox(self, recaptcha_box: AsyncRecaptchaBox) -> None:
325 | """
326 | Click the reCAPTCHA checkbox.
327 |
328 | Parameters
329 | ----------
330 | recaptcha_box : AsyncRecaptchaBox
331 | The reCAPTCHA box.
332 |
333 | Raises
334 | ------
335 | RecaptchaRateLimitError
336 | If the reCAPTCHA rate limit has been exceeded.
337 | """
338 | await recaptcha_box.checkbox.click()
339 |
340 | while recaptcha_box.frames_are_attached() and self._token is None:
341 | if await recaptcha_box.rate_limit_is_visible():
342 | raise RecaptchaRateLimitError
343 |
344 | if await recaptcha_box.any_challenge_is_visible():
345 | return
346 |
347 | await self._page.wait_for_timeout(250)
348 |
349 | async def _get_audio_url(self, recaptcha_box: AsyncRecaptchaBox) -> str:
350 | """
351 | Get the reCAPTCHA audio URL.
352 |
353 | Parameters
354 | ----------
355 | recaptcha_box : AsyncRecaptchaBox
356 | The reCAPTCHA box.
357 |
358 | Returns
359 | -------
360 | str
361 | The reCAPTCHA audio URL.
362 |
363 | Raises
364 | ------
365 | RecaptchaRateLimitError
366 | If the reCAPTCHA rate limit has been exceeded.
367 | """
368 | while True:
369 | if await recaptcha_box.rate_limit_is_visible():
370 | raise RecaptchaRateLimitError
371 |
372 | if await recaptcha_box.audio_challenge_is_visible():
373 | return await recaptcha_box.audio_download_button.get_attribute("href")
374 |
375 | await self._page.wait_for_timeout(250)
376 |
377 | async def _submit_audio_text(
378 | self, recaptcha_box: AsyncRecaptchaBox, text: str
379 | ) -> None:
380 | """
381 | Submit the reCAPTCHA audio text.
382 |
383 | Parameters
384 | ----------
385 | recaptcha_box : AsyncRecaptchaBox
386 | The reCAPTCHA box.
387 | text : str
388 | The reCAPTCHA audio text.
389 |
390 | Raises
391 | ------
392 | RecaptchaRateLimitError
393 | If the reCAPTCHA rate limit has been exceeded.
394 | """
395 | await recaptcha_box.audio_challenge_textbox.fill(text)
396 |
397 | async with self._page.expect_response(
398 | re.compile("/recaptcha/(api2|enterprise)/userverify")
399 | ) as response:
400 | await recaptcha_box.verify_button.click()
401 |
402 | await response.value
403 |
404 | while recaptcha_box.frames_are_attached():
405 | if await recaptcha_box.rate_limit_is_visible():
406 | raise RecaptchaRateLimitError
407 |
408 | if (
409 | not await recaptcha_box.audio_challenge_is_visible()
410 | or await recaptcha_box.solve_failure_is_visible()
411 | or await recaptcha_box.challenge_is_solved()
412 | ):
413 | return
414 |
415 | await self._page.wait_for_timeout(250)
416 |
417 | async def _submit_tile_answers(self, recaptcha_box: AsyncRecaptchaBox) -> None:
418 | """
419 | Submit the reCAPTCHA image challenge tile answers.
420 |
421 | Parameters
422 | ----------
423 | recaptcha_box : AsyncRecaptchaBox
424 | The reCAPTCHA box.
425 |
426 | Raises
427 | ------
428 | RecaptchaRateLimitError
429 | If the reCAPTCHA rate limit has been exceeded.
430 | """
431 | await recaptcha_box.verify_button.click()
432 |
433 | while recaptcha_box.frames_are_attached():
434 | if await recaptcha_box.rate_limit_is_visible():
435 | raise RecaptchaRateLimitError
436 |
437 | if (
438 | await recaptcha_box.challenge_is_solved()
439 | or await recaptcha_box.try_again_is_visible()
440 | ):
441 | return
442 |
443 | if (
444 | await recaptcha_box.check_new_images_is_visible()
445 | or await recaptcha_box.select_all_matching_is_visible()
446 | ):
447 | async with self._page.expect_response(
448 | re.compile("/recaptcha/(api2|enterprise)/payload")
449 | ) as response:
450 | await recaptcha_box.new_challenge_button.click()
451 |
452 | await response.value
453 | return
454 |
455 | await self._page.wait_for_timeout(250)
456 |
457 | async def _solve_image_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None:
458 | """
459 | Solve the reCAPTCHA image challenge.
460 |
461 | Parameters
462 | ----------
463 | recaptcha_box : AsyncRecaptchaBox
464 | The reCAPTCHA box.
465 |
466 | Raises
467 | ------
468 | CapSolverError
469 | If the CapSolver API returned an error.
470 | RecaptchaRateLimitError
471 | If the reCAPTCHA rate limit has been exceeded.
472 | """
473 | while recaptcha_box.frames_are_attached():
474 | capsolver_response = await self._get_capsolver_response(
475 | recaptcha_box, await self._payload_response.body()
476 | )
477 |
478 | if (
479 | capsolver_response is None
480 | or not capsolver_response["solution"]["objects"]
481 | ):
482 | self._payload_response = None
483 |
484 | async with self._page.expect_response(
485 | re.compile("/recaptcha/(api2|enterprise)/reload")
486 | ) as response:
487 | await recaptcha_box.new_challenge_button.click()
488 |
489 | await response.value
490 |
491 | while self._payload_response is None:
492 | if await recaptcha_box.rate_limit_is_visible():
493 | raise RecaptchaRateLimitError
494 |
495 | await self._page.wait_for_timeout(250)
496 |
497 | continue
498 |
499 | await self._solve_tiles(
500 | recaptcha_box, capsolver_response["solution"]["objects"]
501 | )
502 |
503 | self._payload_response = None
504 | button = recaptcha_box.skip_button.or_(recaptcha_box.next_button)
505 |
506 | if await button.is_hidden():
507 | await self._submit_tile_answers(recaptcha_box)
508 | return
509 |
510 | async with self._page.expect_response(
511 | re.compile("/recaptcha/(api2|enterprise)/payload")
512 | ):
513 | await button.click()
514 |
515 | async def _solve_audio_challenge(self, recaptcha_box: AsyncRecaptchaBox) -> None:
516 | """
517 | Solve the reCAPTCHA audio challenge.
518 |
519 | Parameters
520 | ----------
521 | recaptcha_box : AsyncRecaptchaBox
522 | The reCAPTCHA box.
523 |
524 | Raises
525 | ------
526 | RecaptchaRateLimitError
527 | If the reCAPTCHA rate limit has been exceeded.
528 | """
529 | parsed_url = urlparse(recaptcha_box.anchor_frame.url)
530 | query_params = parse_qs(parsed_url.query)
531 | language = query_params["hl"][0]
532 |
533 | if language not in ORIGINAL_LANGUAGE_AUDIO:
534 | language = "en-US"
535 |
536 | while True:
537 | url = await self._get_audio_url(recaptcha_box)
538 | text = await self._transcribe_audio(url, language=language)
539 |
540 | if text is not None:
541 | break
542 |
543 | async with self._page.expect_response(
544 | re.compile("/recaptcha/(api2|enterprise)/reload")
545 | ) as response:
546 | await recaptcha_box.new_challenge_button.click()
547 |
548 | await response.value
549 |
550 | while url == await self._get_audio_url(recaptcha_box):
551 | await self._page.wait_for_timeout(250)
552 |
553 | await self._submit_audio_text(recaptcha_box, text)
554 |
555 | async def recaptcha_is_visible(self) -> bool:
556 | """
557 | Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.
558 |
559 | Returns
560 | -------
561 | bool
562 | Whether a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.
563 | """
564 | try:
565 | await AsyncRecaptchaBox.from_frames(self._page.frames)
566 | except RecaptchaNotFoundError:
567 | return False
568 |
569 | return True
570 |
571 | async def solve_recaptcha(
572 | self,
573 | *,
574 | attempts: Optional[int] = None,
575 | wait: bool = False,
576 | wait_timeout: float = 30,
577 | image_challenge: bool = False,
578 | ) -> str:
579 | """
580 | Solve the reCAPTCHA and return the `g-recaptcha-response` token.
581 |
582 | Parameters
583 | ----------
584 | attempts : Optional[int], optional
585 | The number of solve attempts, by default 5.
586 | wait : bool, optional
587 | Whether to wait for the reCAPTCHA to appear, by default False.
588 | wait_timeout : float, optional
589 | The amount of time in seconds to wait for the reCAPTCHA to appear,
590 | by default 30. Only used if `wait` is True.
591 | image_challenge : bool, optional
592 | Whether to solve the image challenge, by default False.
593 |
594 | Returns
595 | -------
596 | str
597 | The `g-recaptcha-response` token.
598 |
599 | Raises
600 | ------
601 | CapSolverError
602 | If the CapSolver API returned an error.
603 | RecaptchaNotFoundError
604 | If the reCAPTCHA was not found.
605 | RecaptchaRateLimitError
606 | If the reCAPTCHA rate limit has been exceeded.
607 | RecaptchaSolveError
608 | If the reCAPTCHA could not be solved.
609 | """
610 | if image_challenge and self._capsolver_api_key is None:
611 | raise CapSolverError(
612 | "You must provide a CapSolver API key to solve image challenges."
613 | )
614 |
615 | self._token = None
616 | attempts = attempts or self._attempts
617 |
618 | if wait:
619 | retry = AsyncRetrying(
620 | sleep=self._page.wait_for_timeout,
621 | stop=stop_after_delay(wait_timeout),
622 | wait=wait_fixed(0.25),
623 | retry=retry_if_exception_type(RecaptchaNotFoundError),
624 | reraise=True,
625 | )
626 |
627 | recaptcha_box = await retry(
628 | lambda: AsyncRecaptchaBox.from_frames(self._page.frames)
629 | )
630 | else:
631 | recaptcha_box = await AsyncRecaptchaBox.from_frames(self._page.frames)
632 |
633 | if await recaptcha_box.rate_limit_is_visible():
634 | raise RecaptchaRateLimitError
635 |
636 | if await recaptcha_box.checkbox.is_visible():
637 | click_timestamp = time.time()
638 | await self._click_checkbox(recaptcha_box)
639 |
640 | if self._token is not None:
641 | return self._token
642 |
643 | if (
644 | recaptcha_box.frames_are_detached()
645 | or not await recaptcha_box.any_challenge_is_visible()
646 | or await recaptcha_box.challenge_is_solved()
647 | ):
648 | while self._token is None:
649 | await self._page.wait_for_timeout(250)
650 |
651 | return self._token
652 |
653 | time_to_wait = max(1 - (time.time() - click_timestamp), 0)
654 | await self._page.wait_for_timeout(time_to_wait * 1000)
655 |
656 | while not await recaptcha_box.any_challenge_is_visible():
657 | await self._page.wait_for_timeout(250)
658 |
659 | if image_challenge and await recaptcha_box.image_challenge_button.is_visible():
660 | await recaptcha_box.image_challenge_button.click()
661 | elif (
662 | not image_challenge
663 | and await recaptcha_box.audio_challenge_button.is_visible()
664 | ):
665 | await recaptcha_box.audio_challenge_button.click()
666 |
667 | if image_challenge:
668 | image = recaptcha_box.image_challenge.locator("img").first
669 | image_url = await image.get_attribute("src")
670 | self._payload_response = await self._page.request.get(image_url)
671 |
672 | while attempts > 0:
673 | self._token = None
674 |
675 | if image_challenge:
676 | await self._solve_image_challenge(recaptcha_box)
677 | else:
678 | await self._solve_audio_challenge(recaptcha_box)
679 |
680 | if (
681 | recaptcha_box.frames_are_detached()
682 | or not await recaptcha_box.any_challenge_is_visible()
683 | or await recaptcha_box.challenge_is_solved()
684 | ):
685 | while self._token is None:
686 | await self._page.wait_for_timeout(250)
687 |
688 | return self._token
689 |
690 | attempts -= 1
691 |
692 | raise RecaptchaSolveError
693 |
--------------------------------------------------------------------------------
/playwright_recaptcha/recaptchav2/base_solver.py:
--------------------------------------------------------------------------------
1 | import os
2 | from abc import ABC, abstractmethod
3 | from typing import Any, Dict, Generic, Iterable, Optional, TypeVar, Union
4 |
5 | from playwright.async_api import APIResponse as AsyncAPIResponse
6 | from playwright.async_api import Page as AsyncPage
7 | from playwright.async_api import Response as AsyncResponse
8 | from playwright.sync_api import APIResponse as SyncAPIResponse
9 | from playwright.sync_api import Page as SyncPage
10 | from playwright.sync_api import Response as SyncResponse
11 |
12 | from .recaptcha_box import RecaptchaBox
13 |
14 | PageT = TypeVar("PageT", AsyncPage, SyncPage)
15 | APIResponse = Union[AsyncAPIResponse, SyncAPIResponse]
16 | Response = Union[AsyncResponse, SyncResponse]
17 |
18 |
19 | class BaseSolver(ABC, Generic[PageT]):
20 | """
21 | The base class for reCAPTCHA v2 solvers.
22 |
23 | Parameters
24 | ----------
25 | page : PageT
26 | The Playwright page to solve the reCAPTCHA on.
27 | attempts : int, optional
28 | The number of solve attempts, by default 5.
29 | capsolver_api_key : Optional[str], optional
30 | The CapSolver API key, by default None.
31 | If None, the `CAPSOLVER_API_KEY` environment variable will be used.
32 | """
33 |
34 | def __init__(
35 | self, page: PageT, *, attempts: int = 5, capsolver_api_key: Optional[str] = None
36 | ) -> None:
37 | self._page = page
38 | self._attempts = attempts
39 | self._capsolver_api_key = capsolver_api_key or os.getenv("CAPSOLVER_API_KEY")
40 |
41 | self._token: Optional[str] = None
42 | self._payload_response: Union[APIResponse, Response, None] = None
43 | self._page.on("response", self._response_callback)
44 |
45 | def __repr__(self) -> str:
46 | return (
47 | f"{self.__class__.__name__}(page={self._page!r}, "
48 | f"attempts={self._attempts!r}, "
49 | f"capsolver_api_key={self._capsolver_api_key!r})"
50 | )
51 |
52 | def close(self) -> None:
53 | """Remove the response listener."""
54 | try:
55 | self._page.remove_listener("response", self._response_callback)
56 | except KeyError:
57 | pass
58 |
59 | @staticmethod
60 | @abstractmethod
61 | def _get_task_object(recaptcha_box: RecaptchaBox) -> Optional[str]:
62 | """
63 | Get the ID of the object in the reCAPTCHA image challenge task.
64 |
65 | Parameters
66 | ----------
67 | recaptcha_box : RecaptchaBox
68 | The reCAPTCHA box.
69 |
70 | Returns
71 | -------
72 | Optional[str]
73 | The object ID. Returns None if the task object is not recognized.
74 | """
75 |
76 | @abstractmethod
77 | def _response_callback(self, response: Response) -> None:
78 | """
79 | The callback for intercepting payload and userverify responses.
80 |
81 | Parameters
82 | ----------
83 | response : Response
84 | The response.
85 | """
86 |
87 | @abstractmethod
88 | def _get_capsolver_response(
89 | self, recaptcha_box: RecaptchaBox, image_data: bytes
90 | ) -> Optional[Dict[str, Any]]:
91 | """
92 | Get the CapSolver JSON response for an image.
93 |
94 | Parameters
95 | ----------
96 | recaptcha_box : RecaptchaBox
97 | The reCAPTCHA box.
98 | image_data : bytes
99 | The image data.
100 |
101 | Returns
102 | -------
103 | Optional[Dict[str, Any]]
104 | The CapSolver JSON response.
105 | Returns None if the task object is not recognized.
106 |
107 | Raises
108 | ------
109 | CapSolverError
110 | If the CapSolver API returned an error.
111 | """
112 |
113 | @abstractmethod
114 | def _solve_tiles(self, recaptcha_box: RecaptchaBox, indexes: Iterable[int]) -> None:
115 | """
116 | Solve the tiles in the reCAPTCHA image challenge.
117 |
118 | Parameters
119 | ----------
120 | recaptcha_box : RecaptchaBox
121 | The reCAPTCHA box.
122 | indexes : Iterable[int]
123 | The indexes of the tiles that contain the task object.
124 |
125 | Raises
126 | ------
127 | CapSolverError
128 | If the CapSolver API returned an error.
129 | """
130 |
131 | @abstractmethod
132 | def _transcribe_audio(self, audio_url: str, *, language: str) -> Optional[str]:
133 | """
134 | Transcribe the reCAPTCHA audio challenge.
135 |
136 | Parameters
137 | ----------
138 | audio_url : str
139 | The reCAPTCHA audio URL.
140 | language : str
141 | The language of the audio.
142 |
143 | Returns
144 | -------
145 | Optional[str]
146 | The reCAPTCHA audio text.
147 | Returns None if the audio could not be converted.
148 | """
149 |
150 | @abstractmethod
151 | def _click_checkbox(self, recaptcha_box: RecaptchaBox) -> None:
152 | """
153 | Click the reCAPTCHA checkbox.
154 |
155 | Parameters
156 | ----------
157 | recaptcha_box : RecaptchaBox
158 | The reCAPTCHA box.
159 |
160 | Raises
161 | ------
162 | RecaptchaRateLimitError
163 | If the reCAPTCHA rate limit has been exceeded.
164 | """
165 |
166 | @abstractmethod
167 | def _get_audio_url(self, recaptcha_box: RecaptchaBox) -> str:
168 | """
169 | Get the reCAPTCHA audio URL.
170 |
171 | Parameters
172 | ----------
173 | recaptcha_box : RecaptchaBox
174 | The reCAPTCHA box.
175 |
176 | Returns
177 | -------
178 | str
179 | The reCAPTCHA audio URL.
180 |
181 | Raises
182 | ------
183 | RecaptchaRateLimitError
184 | If the reCAPTCHA rate limit has been exceeded.
185 | """
186 |
187 | @abstractmethod
188 | def _submit_audio_text(self, recaptcha_box: RecaptchaBox, text: str) -> None:
189 | """
190 | Submit the reCAPTCHA audio text.
191 |
192 | Parameters
193 | ----------
194 | recaptcha_box : RecaptchaBox
195 | The reCAPTCHA box.
196 | text : str
197 | The reCAPTCHA audio text.
198 |
199 | Raises
200 | ------
201 | RecaptchaRateLimitError
202 | If the reCAPTCHA rate limit has been exceeded.
203 | """
204 |
205 | @abstractmethod
206 | def _submit_tile_answers(self, recaptcha_box: RecaptchaBox) -> None:
207 | """
208 | Submit the reCAPTCHA image challenge tile answers.
209 |
210 | Parameters
211 | ----------
212 | recaptcha_box : RecaptchaBox
213 | The reCAPTCHA box.
214 |
215 | Raises
216 | ------
217 | RecaptchaRateLimitError
218 | If the reCAPTCHA rate limit has been exceeded.
219 | """
220 |
221 | @abstractmethod
222 | def _solve_image_challenge(self, recaptcha_box: RecaptchaBox) -> None:
223 | """
224 | Solve the reCAPTCHA image challenge.
225 |
226 | Parameters
227 | ----------
228 | recaptcha_box : RecaptchaBox
229 | The reCAPTCHA box.
230 |
231 | Raises
232 | ------
233 | CapSolverError
234 | If the CapSolver API returned an error.
235 | RecaptchaRateLimitError
236 | If the reCAPTCHA rate limit has been exceeded.
237 | """
238 |
239 | @abstractmethod
240 | def _solve_audio_challenge(self, recaptcha_box: RecaptchaBox) -> None:
241 | """
242 | Solve the reCAPTCHA audio challenge.
243 |
244 | Parameters
245 | ----------
246 | recaptcha_box : RecaptchaBox
247 | The reCAPTCHA box.
248 |
249 | Raises
250 | ------
251 | RecaptchaRateLimitError
252 | If the reCAPTCHA rate limit has been exceeded.
253 | """
254 |
255 | @abstractmethod
256 | def recaptcha_is_visible(self) -> bool:
257 | """
258 | Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.
259 |
260 | Returns
261 | -------
262 | bool
263 | Whether a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.
264 | """
265 |
266 | @abstractmethod
267 | def solve_recaptcha(
268 | self,
269 | *,
270 | attempts: Optional[int] = None,
271 | wait: bool = False,
272 | wait_timeout: float = 30,
273 | image_challenge: bool = False,
274 | ) -> str:
275 | """
276 | Solve the reCAPTCHA and return the `g-recaptcha-response` token.
277 |
278 | Parameters
279 | ----------
280 | attempts : Optional[int], optional
281 | The number of solve attempts, by default 5.
282 | wait : bool, optional
283 | Whether to wait for the reCAPTCHA to appear, by default False.
284 | wait_timeout : float, optional
285 | The amount of time in seconds to wait for the reCAPTCHA to appear,
286 | by default 30. Only used if `wait` is True.
287 | image_challenge : bool, optional
288 | Whether to solve the image challenge, by default False.
289 |
290 | Returns
291 | -------
292 | str
293 | The `g-recaptcha-response` token.
294 |
295 | Raises
296 | ------
297 | CapSolverError
298 | If the CapSolver API returned an error.
299 | RecaptchaNotFoundError
300 | If the reCAPTCHA was not found.
301 | RecaptchaRateLimitError
302 | If the reCAPTCHA rate limit has been exceeded.
303 | RecaptchaSolveError
304 | If the reCAPTCHA could not be solved.
305 | """
306 |
--------------------------------------------------------------------------------
/playwright_recaptcha/recaptchav2/recaptcha_box.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import re
4 | from abc import ABC, abstractmethod
5 | from functools import wraps
6 | from typing import Generic, Iterable, List, Pattern, Tuple, TypeVar, Union
7 |
8 | from playwright.async_api import Frame as AsyncFrame
9 | from playwright.async_api import Locator as AsyncLocator
10 | from playwright.sync_api import Frame as SyncFrame
11 | from playwright.sync_api import Locator as SyncLocator
12 |
13 | from ..errors import RecaptchaNotFoundError
14 | from .translations import ELEMENT_TRANSLATIONS
15 |
16 | FrameT = TypeVar("FrameT", AsyncFrame, SyncFrame)
17 | Locator = Union[AsyncLocator, SyncLocator]
18 |
19 |
20 | class RecaptchaBox(ABC, Generic[FrameT]):
21 | """
22 | The base class for reCAPTCHA v2 boxes.
23 |
24 | Parameters
25 | ----------
26 | anchor_frame : FrameT
27 | The reCAPTCHA anchor frame.
28 | bframe_frame : FrameT
29 | The reCAPTCHA bframe frame.
30 | """
31 |
32 | def __init__(self, anchor_frame: FrameT, bframe_frame: FrameT) -> None:
33 | self._anchor_frame = anchor_frame
34 | self._bframe_frame = bframe_frame
35 |
36 | def __repr__(self) -> str:
37 | return (
38 | f"{self.__class__.__name__}(anchor_frame={self._anchor_frame!r}, "
39 | f"bframe_frame={self._bframe_frame!r})"
40 | )
41 |
42 | @staticmethod
43 | def _get_recaptcha_frame_pairs(
44 | frames: Iterable[FrameT],
45 | ) -> List[Tuple[FrameT, FrameT]]:
46 | """
47 | Get the reCAPTCHA anchor and bframe frame pairs.
48 |
49 | Parameters
50 | ----------
51 | frames : Iterable[FrameT]
52 | A list of frames to search for the reCAPTCHA anchor and bframe frames.
53 |
54 | Returns
55 | -------
56 | List[Tuple[FrameT, FrameT]]
57 | A list of reCAPTCHA anchor and bframe frame pairs.
58 |
59 | Raises
60 | ------
61 | RecaptchaNotFoundError
62 | If no reCAPTCHA anchor and bframe frame pairs were found.
63 | """
64 | anchor_frames = [
65 | frame
66 | for frame in frames
67 | if re.search("/recaptcha/(api2|enterprise)/anchor", frame.url) is not None
68 | ]
69 |
70 | bframe_frames = [
71 | frame
72 | for frame in frames
73 | if re.search("/recaptcha/(api2|enterprise)/bframe", frame.url) is not None
74 | ]
75 |
76 | frame_pairs = []
77 |
78 | for anchor_frame in anchor_frames:
79 | frame_id = anchor_frame.name[2:]
80 |
81 | for bframe_frame in bframe_frames:
82 | if frame_id not in bframe_frame.name:
83 | continue
84 |
85 | frame_pairs.append((anchor_frame, bframe_frame))
86 |
87 | if not frame_pairs:
88 | raise RecaptchaNotFoundError
89 |
90 | return frame_pairs
91 |
92 | @staticmethod
93 | def _get_translations_pattern(translations: Iterable[str]) -> Pattern:
94 | """
95 | Get a compiled regex pattern from a list of translations.
96 |
97 | Parameters
98 | ----------
99 | translations : Iterable[str]
100 | A list of translations to compile into a regex pattern.
101 |
102 | Returns
103 | -------
104 | Pattern
105 | The compiled regex pattern.
106 | """
107 | escaped_translations = [re.escape(translation) for translation in translations]
108 | return re.compile(f'^({"|".join(escaped_translations)}).?$')
109 |
110 | @property
111 | def checkbox(self) -> Locator:
112 | """The reCAPTCHA checkbox locator."""
113 | return self.anchor_frame.get_by_role(
114 | "checkbox",
115 | name=self._get_translations_pattern(ELEMENT_TRANSLATIONS["im_not_a_robot"]),
116 | )
117 |
118 | @property
119 | def audio_challenge_button(self) -> Locator:
120 | """The reCAPTCHA audio challenge button locator."""
121 | return self.bframe_frame.get_by_role(
122 | "button",
123 | name=self._get_translations_pattern(
124 | ELEMENT_TRANSLATIONS["get_an_audio_challenge"]
125 | ),
126 | )
127 |
128 | @property
129 | def image_challenge_button(self) -> Locator:
130 | """The reCAPTCHA image challenge button locator."""
131 | return self.bframe_frame.get_by_role(
132 | "button",
133 | name=self._get_translations_pattern(
134 | ELEMENT_TRANSLATIONS["get_a_visual_challenge"]
135 | ),
136 | )
137 |
138 | @property
139 | def new_challenge_button(self) -> Locator:
140 | """The reCAPTCHA new challenge button locator."""
141 | return self.bframe_frame.get_by_role(
142 | "button",
143 | name=self._get_translations_pattern(
144 | ELEMENT_TRANSLATIONS["get_a_new_challenge"]
145 | ),
146 | )
147 |
148 | @property
149 | def audio_download_button(self) -> Locator:
150 | """The reCAPTCHA audio download button locator."""
151 | return self.bframe_frame.get_by_role(
152 | "link",
153 | name=self._get_translations_pattern(
154 | ELEMENT_TRANSLATIONS["alternatively_download_audio_as_mp3"]
155 | ),
156 | )
157 |
158 | @property
159 | def audio_challenge_textbox(self) -> Locator:
160 | """The reCAPTCHA audio challenge textbox locator."""
161 | return self.bframe_frame.get_by_role(
162 | "textbox",
163 | name=self._get_translations_pattern(
164 | ELEMENT_TRANSLATIONS["enter_what_you_hear"]
165 | ),
166 | )
167 |
168 | @property
169 | def skip_button(self) -> Locator:
170 | """The reCAPTCHA skip button locator."""
171 | return self.bframe_frame.get_by_role(
172 | "button", name=self._get_translations_pattern(ELEMENT_TRANSLATIONS["skip"])
173 | )
174 |
175 | @property
176 | def next_button(self) -> Locator:
177 | """The reCAPTCHA next button locator."""
178 | return self.bframe_frame.get_by_role(
179 | "button", name=self._get_translations_pattern(ELEMENT_TRANSLATIONS["next"])
180 | )
181 |
182 | @property
183 | def verify_button(self) -> Locator:
184 | """The reCAPTCHA verify button locator."""
185 | return self.bframe_frame.get_by_role(
186 | "button",
187 | name=self._get_translations_pattern(ELEMENT_TRANSLATIONS["verify"]),
188 | )
189 |
190 | @property
191 | def tile_selector(self) -> Locator:
192 | """The reCAPTCHA tile selector locator."""
193 | return self.bframe_frame.locator(".rc-imageselect-tile")
194 |
195 | @property
196 | def image_challenge(self) -> Locator:
197 | """The reCAPTCHA image challenge locator."""
198 | return self.bframe_frame.locator(".rc-imageselect-challenge")
199 |
200 | def frames_are_attached(self) -> bool:
201 | """
202 | Check if all of the reCAPTCHA frames are attached.
203 |
204 | Returns
205 | -------
206 | bool
207 | True if all of the reCAPTCHA frames are attached, False otherwise.
208 | """
209 | return not self.frames_are_detached()
210 |
211 | def frames_are_detached(self) -> bool:
212 | """
213 | Check if any of the reCAPTCHA frames are detached.
214 |
215 | Returns
216 | -------
217 | bool
218 | True if any of the reCAPTCHA frames are detached, False otherwise.
219 | """
220 | return self.anchor_frame.is_detached() or self.bframe_frame.is_detached()
221 |
222 | @abstractmethod
223 | def _check_if_attached(func):
224 | """
225 | A decorator for checking if the reCAPTCHA frames are attached
226 | before running the decorated function.
227 | """
228 |
229 | @property
230 | @abstractmethod
231 | def anchor_frame(self) -> FrameT:
232 | """The reCAPTCHA anchor frame."""
233 |
234 | @property
235 | @abstractmethod
236 | def bframe_frame(self) -> FrameT:
237 | """The reCAPTCHA bframe frame."""
238 |
239 | @classmethod
240 | @abstractmethod
241 | def from_frames(cls, frames: Iterable[FrameT]) -> RecaptchaBox:
242 | """
243 | Create a reCAPTCHA box using a list of frames.
244 |
245 | Parameters
246 | ----------
247 | frames : Iterable[FrameT]
248 | A list of frames to search for the reCAPTCHA frames.
249 |
250 | Returns
251 | -------
252 | RecaptchaBox
253 | The reCAPTCHA box.
254 |
255 | Raises
256 | ------
257 | RecaptchaNotFoundError
258 | If the reCAPTCHA frames were not found
259 | or if no unchecked reCAPTCHA boxes were found.
260 | """
261 |
262 | @abstractmethod
263 | def rate_limit_is_visible(self) -> bool:
264 | """
265 | Check if the reCAPTCHA rate limit message is visible.
266 |
267 | Returns
268 | -------
269 | bool
270 | True if the reCAPTCHA rate limit message is visible, False otherwise.
271 | """
272 |
273 | @abstractmethod
274 | def solve_failure_is_visible(self) -> bool:
275 | """
276 | Check if the reCAPTCHA solve failure message is visible.
277 |
278 | Returns
279 | -------
280 | bool
281 | True if the reCAPTCHA solve failure message is visible, False otherwise.
282 | """
283 |
284 | @abstractmethod
285 | def image_challenge_is_visible(self) -> bool:
286 | """
287 | Check if the reCAPTCHA image challenge is visible.
288 |
289 | Returns
290 | -------
291 | bool
292 | True if the reCAPTCHA challenge is visible, False otherwise.
293 | """
294 |
295 | @abstractmethod
296 | def audio_challenge_is_visible(self) -> bool:
297 | """
298 | Check if the reCAPTCHA audio challenge is visible.
299 |
300 | Returns
301 | -------
302 | bool
303 | True if the reCAPTCHA audio challenge is visible, False otherwise.
304 | """
305 |
306 | @abstractmethod
307 | def any_challenge_is_visible(self) -> bool:
308 | """
309 | Check if any reCAPTCHA challenge is visible.
310 |
311 | Returns
312 | -------
313 | bool
314 | True if any reCAPTCHA challenge is visible, False otherwise.
315 | """
316 |
317 | @abstractmethod
318 | def try_again_is_visible(self) -> bool:
319 | """
320 | Check if the reCAPTCHA try again message is visible.
321 |
322 | Returns
323 | -------
324 | bool
325 | True if the reCAPTCHA try again message is visible, False otherwise.
326 | """
327 |
328 | @abstractmethod
329 | def check_new_images_is_visible(self) -> bool:
330 | """
331 | Check if the reCAPTCHA check new images message is visible.
332 |
333 | Returns
334 | -------
335 | bool
336 | True if the reCAPTCHA check new images message is visible, False otherwise.
337 | """
338 |
339 | @abstractmethod
340 | def select_all_matching_is_visible(self) -> bool:
341 | """
342 | Check if the reCAPTCHA select all matching images message is visible.
343 |
344 | Returns
345 | -------
346 | bool
347 | True if the reCAPTCHA select all matching images message is visible,
348 | False otherwise.
349 | """
350 |
351 | @abstractmethod
352 | def challenge_is_solved(self) -> bool:
353 | """
354 | Check if the reCAPTCHA challenge has been solved.
355 |
356 | Returns
357 | -------
358 | bool
359 | True if the reCAPTCHA challenge has been solved, False otherwise.
360 | """
361 |
362 |
363 | class SyncRecaptchaBox(RecaptchaBox[SyncFrame]):
364 | """
365 | The synchronous class for reCAPTCHA v2 boxes.
366 |
367 | Parameters
368 | ----------
369 | anchor_frame : SyncFrame
370 | The reCAPTCHA anchor frame.
371 | bframe_frame : SyncFrame
372 | The reCAPTCHA bframe frame.
373 | """
374 |
375 | def _check_if_attached(func=None, /):
376 | """
377 | A decorator for checking if the reCAPTCHA frames are attached
378 | before running the decorated function.
379 | """
380 |
381 | def wrap(func):
382 | @wraps(func)
383 | def wrapper(self: SyncRecaptchaBox, *args, **kwargs) -> bool:
384 | if self.frames_are_detached():
385 | return False
386 |
387 | return func(self, *args, **kwargs)
388 |
389 | return wrapper
390 |
391 | if func is None:
392 | return wrap
393 |
394 | return wrap(func)
395 |
396 | @classmethod
397 | def from_frames(cls, frames: Iterable[SyncFrame]) -> SyncRecaptchaBox:
398 | """
399 | Create a reCAPTCHA box using a list of frames.
400 |
401 | Parameters
402 | ----------
403 | frames : Iterable[SyncFrame]
404 | A list of frames to search for the reCAPTCHA frames.
405 |
406 | Returns
407 | -------
408 | SyncRecaptchaBox
409 | The reCAPTCHA box.
410 |
411 | Raises
412 | ------
413 | RecaptchaNotFoundError
414 | If the reCAPTCHA frames were not found
415 | or if no unchecked reCAPTCHA boxes were found.
416 | """
417 | frame_pairs = cls._get_recaptcha_frame_pairs(frames)
418 |
419 | for anchor_frame, bframe_frame in frame_pairs:
420 | recaptcha_box = cls(anchor_frame, bframe_frame)
421 |
422 | if (
423 | recaptcha_box.frames_are_attached()
424 | and recaptcha_box.checkbox.is_visible()
425 | and not recaptcha_box.checkbox.is_checked()
426 | or recaptcha_box.audio_challenge_button.is_visible()
427 | and recaptcha_box.audio_challenge_button.is_enabled()
428 | or recaptcha_box.image_challenge_button.is_visible()
429 | and recaptcha_box.image_challenge_button.is_enabled()
430 | ):
431 | return recaptcha_box
432 |
433 | raise RecaptchaNotFoundError("No unchecked reCAPTCHA boxes were found.")
434 |
435 | @property
436 | def anchor_frame(self) -> SyncFrame:
437 | """The reCAPTCHA anchor frame."""
438 | return self._anchor_frame
439 |
440 | @property
441 | def bframe_frame(self) -> SyncFrame:
442 | """The reCAPTCHA bframe frame."""
443 | return self._bframe_frame
444 |
445 | @_check_if_attached
446 | def rate_limit_is_visible(self) -> bool:
447 | """
448 | Check if the reCAPTCHA rate limit message is visible.
449 |
450 | Returns
451 | -------
452 | bool
453 | True if the reCAPTCHA rate limit message is visible, False otherwise.
454 | """
455 | return self.bframe_frame.get_by_text(
456 | self._get_translations_pattern(ELEMENT_TRANSLATIONS["try_again_later"])
457 | ).is_visible()
458 |
459 | @_check_if_attached
460 | def solve_failure_is_visible(self) -> bool:
461 | """
462 | Check if the reCAPTCHA solve failure message is visible.
463 |
464 | Returns
465 | -------
466 | bool
467 | True if the reCAPTCHA solve failure message is visible, False otherwise.
468 | """
469 | return self.bframe_frame.get_by_text(
470 | self._get_translations_pattern(
471 | ELEMENT_TRANSLATIONS["multiple_correct_solutions_required"]
472 | )
473 | ).is_visible()
474 |
475 | @_check_if_attached
476 | def image_challenge_is_visible(self) -> bool:
477 | """
478 | Check if the reCAPTCHA image challenge is visible.
479 |
480 | Returns
481 | -------
482 | bool
483 | True if the reCAPTCHA challenge is visible, False otherwise.
484 | """
485 | button = self.skip_button.or_(self.next_button).or_(self.verify_button)
486 | return button.is_enabled()
487 |
488 | @_check_if_attached
489 | def audio_challenge_is_visible(self) -> bool:
490 | """
491 | Check if the reCAPTCHA audio challenge is visible.
492 |
493 | Returns
494 | -------
495 | bool
496 | True if the reCAPTCHA audio challenge is visible, False otherwise.
497 | """
498 | return (
499 | self.bframe_frame.get_by_text(
500 | self._get_translations_pattern(
501 | ELEMENT_TRANSLATIONS["press_play_to_listen"]
502 | )
503 | ).is_visible()
504 | and self.new_challenge_button.is_visible()
505 | and self.new_challenge_button.is_enabled()
506 | )
507 |
508 | @_check_if_attached
509 | def any_challenge_is_visible(self) -> bool:
510 | """
511 | Check if any reCAPTCHA challenge is visible.
512 |
513 | Returns
514 | -------
515 | bool
516 | True if any reCAPTCHA challenge is visible, False otherwise.
517 | """
518 | return self.image_challenge_is_visible() or self.audio_challenge_is_visible()
519 |
520 | @_check_if_attached
521 | def try_again_is_visible(self) -> bool:
522 | """
523 | Check if the reCAPTCHA try again message is visible.
524 |
525 | Returns
526 | -------
527 | bool
528 | True if the reCAPTCHA try again message is visible, False otherwise.
529 | """
530 | return self.bframe_frame.get_by_text(
531 | self._get_translations_pattern(ELEMENT_TRANSLATIONS["please_try_again"])
532 | ).is_visible()
533 |
534 | @_check_if_attached
535 | def check_new_images_is_visible(self) -> bool:
536 | """
537 | Check if the reCAPTCHA check new images message is visible.
538 |
539 | Returns
540 | -------
541 | bool
542 | True if the reCAPTCHA check new images message is visible, False otherwise.
543 | """
544 | return self.bframe_frame.get_by_text(
545 | self._get_translations_pattern(
546 | ELEMENT_TRANSLATIONS["please_also_check_the_new_images"]
547 | )
548 | ).is_visible()
549 |
550 | @_check_if_attached
551 | def select_all_matching_is_visible(self) -> bool:
552 | """
553 | Check if the reCAPTCHA select all matching images message is visible.
554 |
555 | Returns
556 | -------
557 | bool
558 | True if the reCAPTCHA select all matching images message is visible,
559 | False otherwise.
560 | """
561 | return self.bframe_frame.get_by_text(
562 | self._get_translations_pattern(
563 | ELEMENT_TRANSLATIONS["please_select_all_matching_images"]
564 | )
565 | ).is_visible()
566 |
567 | @_check_if_attached
568 | def challenge_is_solved(self) -> bool:
569 | """
570 | Check if the reCAPTCHA challenge has been solved.
571 |
572 | Returns
573 | -------
574 | bool
575 | True if the reCAPTCHA challenge has been solved, False otherwise.
576 | """
577 | return self.checkbox.is_visible() and self.checkbox.is_checked()
578 |
579 |
580 | class AsyncRecaptchaBox(RecaptchaBox[AsyncFrame]):
581 | """
582 | The asynchronous class for reCAPTCHA v2 boxes.
583 |
584 | Parameters
585 | ----------
586 | anchor_frame : AsyncFrame
587 | The reCAPTCHA anchor frame.
588 | bframe_frame : AsyncFrame
589 | The reCAPTCHA bframe frame.
590 | """
591 |
592 | def _check_if_attached(func=None, /):
593 | """
594 | A decorator for checking if the reCAPTCHA frames are attached
595 | before running the decorated function.
596 | """
597 |
598 | def wrap(func):
599 | @wraps(func)
600 | async def wrapper(self: AsyncRecaptchaBox, *args, **kwargs) -> bool:
601 | if self.frames_are_detached():
602 | return False
603 |
604 | return await func(self, *args, **kwargs)
605 |
606 | return wrapper
607 |
608 | if func is None:
609 | return wrap
610 |
611 | return wrap(func)
612 |
613 | @classmethod
614 | async def from_frames(cls, frames: Iterable[AsyncFrame]) -> AsyncRecaptchaBox:
615 | """
616 | Create a reCAPTCHA box using a list of frames.
617 |
618 | Parameters
619 | ----------
620 | frames : Iterable[AsyncFrame]
621 | A list of frames to search for the reCAPTCHA frames.
622 |
623 | Returns
624 | -------
625 | AsyncRecaptchaBox
626 | The reCAPTCHA box.
627 |
628 | Raises
629 | ------
630 | RecaptchaNotFoundError
631 | If the reCAPTCHA frames were not found
632 | or if no unchecked reCAPTCHA boxes were found.
633 | """
634 | frame_pairs = cls._get_recaptcha_frame_pairs(frames)
635 |
636 | for anchor_frame, bframe_frame in frame_pairs:
637 | recaptcha_box = cls(anchor_frame, bframe_frame)
638 |
639 | if (
640 | recaptcha_box.frames_are_attached()
641 | and await recaptcha_box.checkbox.is_visible()
642 | and not await recaptcha_box.checkbox.is_checked()
643 | or await recaptcha_box.audio_challenge_button.is_visible()
644 | and await recaptcha_box.audio_challenge_button.is_enabled()
645 | or await recaptcha_box.image_challenge_button.is_visible()
646 | and await recaptcha_box.image_challenge_button.is_enabled()
647 | ):
648 | return recaptcha_box
649 |
650 | raise RecaptchaNotFoundError("No unchecked reCAPTCHA boxes were found.")
651 |
652 | @property
653 | def anchor_frame(self) -> AsyncFrame:
654 | """The reCAPTCHA anchor frame."""
655 | return self._anchor_frame
656 |
657 | @property
658 | def bframe_frame(self) -> AsyncFrame:
659 | """The reCAPTCHA bframe frame."""
660 | return self._bframe_frame
661 |
662 | @_check_if_attached
663 | async def rate_limit_is_visible(self) -> bool:
664 | """
665 | Check if the reCAPTCHA rate limit message is visible.
666 |
667 | Returns
668 | -------
669 | bool
670 | True if the reCAPTCHA rate limit message is visible, False otherwise.
671 | """
672 | return await self.bframe_frame.get_by_text(
673 | self._get_translations_pattern(ELEMENT_TRANSLATIONS["try_again_later"])
674 | ).is_visible()
675 |
676 | @_check_if_attached
677 | async def solve_failure_is_visible(self) -> bool:
678 | """
679 | Check if the reCAPTCHA solve failure message is visible.
680 |
681 | Returns
682 | -------
683 | bool
684 | True if the reCAPTCHA solve failure message is visible, False otherwise.
685 | """
686 | return await self.bframe_frame.get_by_text(
687 | self._get_translations_pattern(
688 | ELEMENT_TRANSLATIONS["multiple_correct_solutions_required"]
689 | )
690 | ).is_visible()
691 |
692 | @_check_if_attached
693 | async def image_challenge_is_visible(self) -> bool:
694 | """
695 | Check if the reCAPTCHA image challenge is visible.
696 |
697 | Returns
698 | -------
699 | bool
700 | True if the reCAPTCHA challenge is visible, False otherwise.
701 | """
702 | button = self.skip_button.or_(self.next_button).or_(self.verify_button)
703 | return await button.is_enabled()
704 |
705 | @_check_if_attached
706 | async def audio_challenge_is_visible(self) -> bool:
707 | """
708 | Check if the reCAPTCHA audio challenge is visible.
709 |
710 | Returns
711 | -------
712 | bool
713 | True if the reCAPTCHA audio challenge is visible, False otherwise.
714 | """
715 | return (
716 | await self.bframe_frame.get_by_text(
717 | self._get_translations_pattern(
718 | ELEMENT_TRANSLATIONS["press_play_to_listen"]
719 | )
720 | ).is_visible()
721 | and await self.new_challenge_button.is_visible()
722 | and await self.new_challenge_button.is_enabled()
723 | )
724 |
725 | @_check_if_attached
726 | async def any_challenge_is_visible(self) -> bool:
727 | """
728 | Check if any reCAPTCHA challenge is visible.
729 |
730 | Returns
731 | -------
732 | bool
733 | True if any reCAPTCHA challenge is visible, False otherwise.
734 | """
735 | return (
736 | await self.image_challenge_is_visible()
737 | or await self.audio_challenge_is_visible()
738 | )
739 |
740 | @_check_if_attached
741 | async def try_again_is_visible(self) -> bool:
742 | """
743 | Check if the reCAPTCHA try again message is visible.
744 |
745 | Returns
746 | -------
747 | bool
748 | True if the reCAPTCHA try again message is visible, False otherwise.
749 | """
750 | return await self.bframe_frame.get_by_text(
751 | self._get_translations_pattern(ELEMENT_TRANSLATIONS["please_try_again"])
752 | ).is_visible()
753 |
754 | @_check_if_attached
755 | async def check_new_images_is_visible(self) -> bool:
756 | """
757 | Check if the reCAPTCHA check new images message is visible.
758 |
759 | Returns
760 | -------
761 | bool
762 | True if the reCAPTCHA check new images message is visible, False otherwise.
763 | """
764 | return await self.bframe_frame.get_by_text(
765 | self._get_translations_pattern(
766 | ELEMENT_TRANSLATIONS["please_also_check_the_new_images"]
767 | )
768 | ).is_visible()
769 |
770 | @_check_if_attached
771 | async def select_all_matching_is_visible(self) -> bool:
772 | """
773 | Check if the reCAPTCHA select all matching images message is visible.
774 |
775 | Returns
776 | -------
777 | bool
778 | True if the reCAPTCHA select all matching images message is visible,
779 | False otherwise.
780 | """
781 | return await self.bframe_frame.get_by_text(
782 | self._get_translations_pattern(
783 | ELEMENT_TRANSLATIONS["please_select_all_matching_images"]
784 | )
785 | ).is_visible()
786 |
787 | @_check_if_attached
788 | async def challenge_is_solved(self) -> bool:
789 | """
790 | Check if the reCAPTCHA challenge has been solved.
791 |
792 | Returns
793 | -------
794 | bool
795 | True if the reCAPTCHA challenge has been solved, False otherwise.
796 | """
797 | return await self.checkbox.is_visible() and await self.checkbox.is_checked()
798 |
--------------------------------------------------------------------------------
/playwright_recaptcha/recaptchav2/sync_solver.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import base64
4 | import re
5 | import time
6 | from datetime import datetime
7 | from io import BytesIO
8 | from json import JSONDecodeError
9 | from typing import Any, Dict, List, Optional
10 | from urllib.parse import parse_qs, urlparse
11 |
12 | import speech_recognition
13 | from playwright.sync_api import Locator, Page, Response
14 | from pydub import AudioSegment
15 | from pydub.exceptions import CouldntDecodeError
16 | from tenacity import Retrying, retry_if_exception_type, stop_after_delay, wait_fixed
17 |
18 | from ..errors import (
19 | CapSolverError,
20 | RecaptchaNotFoundError,
21 | RecaptchaRateLimitError,
22 | RecaptchaSolveError,
23 | )
24 | from .base_solver import BaseSolver
25 | from .recaptcha_box import SyncRecaptchaBox
26 | from .translations import OBJECT_TRANSLATIONS, ORIGINAL_LANGUAGE_AUDIO
27 |
28 |
29 | class SyncSolver(BaseSolver[Page]):
30 | """
31 | A class for solving reCAPTCHA v2 synchronously with Playwright.
32 |
33 | Parameters
34 | ----------
35 | page : Page
36 | The Playwright page to solve the reCAPTCHA on.
37 | attempts : int, optional
38 | The number of solve attempts, by default 5.
39 | capsolver_api_key : Optional[str], optional
40 | The CapSolver API key, by default None.
41 | If None, the `CAPSOLVER_API_KEY` environment variable will be used.
42 | """
43 |
44 | def __enter__(self) -> SyncSolver:
45 | return self
46 |
47 | def __exit__(self, *_: Any) -> None:
48 | self.close()
49 |
50 | @staticmethod
51 | def _get_task_object(recaptcha_box: SyncRecaptchaBox) -> Optional[str]:
52 | """
53 | Get the ID of the object in the reCAPTCHA image challenge task.
54 |
55 | Parameters
56 | ----------
57 | recaptcha_box : SyncRecaptchaBox
58 | The reCAPTCHA box.
59 |
60 | Returns
61 | -------
62 | Optional[str]
63 | The object ID. Returns None if the task object is not recognized.
64 | """
65 | object_dict = {
66 | "/m/0pg52": OBJECT_TRANSLATIONS["taxis"],
67 | "/m/01bjv": OBJECT_TRANSLATIONS["bus"],
68 | "/m/04_sv": OBJECT_TRANSLATIONS["motorcycles"],
69 | "/m/013xlm": OBJECT_TRANSLATIONS["tractors"],
70 | "/m/01jk_4": OBJECT_TRANSLATIONS["chimneys"],
71 | "/m/014xcs": OBJECT_TRANSLATIONS["crosswalks"],
72 | "/m/015qff": OBJECT_TRANSLATIONS["traffic_lights"],
73 | "/m/0199g": OBJECT_TRANSLATIONS["bicycles"],
74 | "/m/015qbp": OBJECT_TRANSLATIONS["parking_meters"],
75 | "/m/0k4j": OBJECT_TRANSLATIONS["cars"],
76 | "/m/015kr": OBJECT_TRANSLATIONS["bridges"],
77 | "/m/019jd": OBJECT_TRANSLATIONS["boats"],
78 | "/m/0cdl1": OBJECT_TRANSLATIONS["palm_trees"],
79 | "/m/09d_r": OBJECT_TRANSLATIONS["mountains_or_hills"],
80 | "/m/01pns0": OBJECT_TRANSLATIONS["fire_hydrant"],
81 | "/m/01lynh": OBJECT_TRANSLATIONS["stairs"],
82 | }
83 |
84 | task = recaptcha_box.bframe_frame.locator("div").all_inner_texts()
85 | object_ = task[0].split("\n")[1]
86 |
87 | for object_id, translations in object_dict.items():
88 | if object_ in translations:
89 | return object_id
90 |
91 | return None
92 |
93 | def _response_callback(self, response: Response) -> None:
94 | """
95 | The callback for intercepting payload and userverify responses.
96 |
97 | Parameters
98 | ----------
99 | response : Response
100 | The response.
101 | """
102 | if (
103 | re.search("/recaptcha/(api2|enterprise)/payload", response.url) is not None
104 | and self._payload_response is None
105 | ):
106 | self._payload_response = response
107 | elif (
108 | re.search("/recaptcha/(api2|enterprise)/userverify", response.url)
109 | is not None
110 | ):
111 | token_match = re.search('"uvresp","(.*?)"', response.text())
112 |
113 | if token_match is not None:
114 | self._token = token_match.group(1)
115 |
116 | def _get_capsolver_response(
117 | self, recaptcha_box: SyncRecaptchaBox, image_data: bytes
118 | ) -> Optional[Dict[str, Any]]:
119 | """
120 | Get the CapSolver JSON response for an image.
121 |
122 | Parameters
123 | ----------
124 | recaptcha_box : SyncRecaptchaBox
125 | The reCAPTCHA box.
126 | image_data : bytes
127 | The image data.
128 |
129 | Returns
130 | -------
131 | Optional[Dict[str, Any]]
132 | The CapSolver JSON response.
133 | Returns None if the task object is not recognized.
134 |
135 | Raises
136 | ------
137 | CapSolverError
138 | If the CapSolver API returned an error.
139 | """
140 | image = base64.b64encode(image_data).decode("utf-8")
141 | task_object = self._get_task_object(recaptcha_box)
142 |
143 | if task_object is None:
144 | return None
145 |
146 | payload = {
147 | "clientKey": self._capsolver_api_key,
148 | "task": {
149 | "type": "ReCaptchaV2Classification",
150 | "image": image,
151 | "question": task_object,
152 | },
153 | }
154 |
155 | response = self._page.request.post(
156 | "https://api.capsolver.com/createTask", data=payload
157 | )
158 |
159 | try:
160 | response_json = response.json()
161 | except JSONDecodeError as err:
162 | raise CapSolverError from err
163 |
164 | if response_json["errorId"] != 0:
165 | raise CapSolverError(response_json["errorDescription"])
166 |
167 | return response_json
168 |
169 | def _solve_tiles(self, recaptcha_box: SyncRecaptchaBox, indexes: List[int]) -> None:
170 | """
171 | Solve the tiles in the reCAPTCHA image challenge.
172 |
173 | Parameters
174 | ----------
175 | recaptcha_box : SyncRecaptchaBox
176 | The reCAPTCHA box.
177 | indexes : List[int]
178 | The indexes of the tiles that contain the task object.
179 |
180 | Raises
181 | ------
182 | CapSolverError
183 | If the CapSolver API returned an error.
184 | """
185 | changing_tiles: Dict[Locator, str] = {}
186 | indexes = indexes.copy()
187 |
188 | style_script = """
189 | (element) => {
190 | element.style = "";
191 | element.className = "rc-imageselect-tile";
192 | }
193 | """
194 |
195 | for index in indexes:
196 | tile = recaptcha_box.tile_selector.nth(index)
197 | tile.click()
198 |
199 | if "rc-imageselect-dynamic-selected" not in tile.get_attribute("class"):
200 | continue
201 |
202 | changing_tiles[tile] = tile.locator("img").get_attribute("src")
203 | tile.evaluate(style_script)
204 |
205 | start_time = datetime.now()
206 |
207 | while changing_tiles and (datetime.now() - start_time).seconds < 60:
208 | for tile in changing_tiles.copy():
209 | image_url = tile.locator("img").get_attribute("src")
210 |
211 | if changing_tiles[tile] == image_url:
212 | continue
213 |
214 | changing_tiles[tile] = image_url
215 | response = self._page.request.get(image_url)
216 |
217 | capsolver_response = self._get_capsolver_response(
218 | recaptcha_box, response.body()
219 | )
220 |
221 | if (
222 | capsolver_response is None
223 | or not capsolver_response["solution"]["hasObject"]
224 | ):
225 | changing_tiles.pop(tile)
226 | continue
227 |
228 | tile.click()
229 | tile.evaluate(style_script)
230 |
231 | def _transcribe_audio(
232 | self, audio_url: str, *, language: str = "en-US"
233 | ) -> Optional[str]:
234 | """
235 | Transcribe the reCAPTCHA audio challenge.
236 |
237 | Parameters
238 | ----------
239 | audio_url : str
240 | The reCAPTCHA audio URL.
241 | language : str, optional
242 | The language of the audio, by default en-US.
243 |
244 | Returns
245 | -------
246 | Optional[str]
247 | The reCAPTCHA audio text.
248 | Returns None if the audio could not be converted.
249 | """
250 | response = self._page.request.get(audio_url)
251 |
252 | wav_audio = BytesIO()
253 | mp3_audio = BytesIO(response.body())
254 |
255 | try:
256 | audio: AudioSegment = AudioSegment.from_mp3(mp3_audio)
257 | except CouldntDecodeError:
258 | return None
259 |
260 | audio.export(wav_audio, format="wav")
261 | recognizer = speech_recognition.Recognizer()
262 |
263 | with speech_recognition.AudioFile(wav_audio) as source:
264 | audio_data = recognizer.record(source)
265 |
266 | try:
267 | return recognizer.recognize_google(audio_data, language=language)
268 | except speech_recognition.UnknownValueError:
269 | return None
270 |
271 | def _click_checkbox(self, recaptcha_box: SyncRecaptchaBox) -> None:
272 | """
273 | Click the reCAPTCHA checkbox.
274 |
275 | Parameters
276 | ----------
277 | recaptcha_box : SyncRecaptchaBox
278 | The reCAPTCHA box.
279 |
280 | Raises
281 | ------
282 | RecaptchaRateLimitError
283 | If the reCAPTCHA rate limit has been exceeded.
284 | """
285 | recaptcha_box.checkbox.click()
286 |
287 | while recaptcha_box.frames_are_attached() and self._token is None:
288 | if recaptcha_box.rate_limit_is_visible():
289 | raise RecaptchaRateLimitError
290 |
291 | if recaptcha_box.any_challenge_is_visible():
292 | return
293 |
294 | self._page.wait_for_timeout(250)
295 |
296 | def _get_audio_url(self, recaptcha_box: SyncRecaptchaBox) -> str:
297 | """
298 | Get the reCAPTCHA audio URL.
299 |
300 | Parameters
301 | ----------
302 | recaptcha_box : SyncRecaptchaBox
303 | The reCAPTCHA box.
304 |
305 | Returns
306 | -------
307 | str
308 | The reCAPTCHA audio URL.
309 |
310 | Raises
311 | ------
312 | RecaptchaRateLimitError
313 | If the reCAPTCHA rate limit has been exceeded.
314 | """
315 | while True:
316 | if recaptcha_box.rate_limit_is_visible():
317 | raise RecaptchaRateLimitError
318 |
319 | if recaptcha_box.audio_challenge_is_visible():
320 | return recaptcha_box.audio_download_button.get_attribute("href")
321 |
322 | self._page.wait_for_timeout(250)
323 |
324 | def _submit_audio_text(self, recaptcha_box: SyncRecaptchaBox, text: str) -> None:
325 | """
326 | Submit the reCAPTCHA audio text.
327 |
328 | Parameters
329 | ----------
330 | recaptcha_box : SyncRecaptchaBox
331 | The reCAPTCHA box.
332 | text : str
333 | The reCAPTCHA audio text.
334 |
335 | Raises
336 | ------
337 | RecaptchaRateLimitError
338 | If the reCAPTCHA rate limit has been exceeded.
339 | """
340 | recaptcha_box.audio_challenge_textbox.fill(text)
341 |
342 | with self._page.expect_response(
343 | re.compile("/recaptcha/(api2|enterprise)/userverify")
344 | ):
345 | recaptcha_box.verify_button.click()
346 |
347 | while recaptcha_box.frames_are_attached():
348 | if recaptcha_box.rate_limit_is_visible():
349 | raise RecaptchaRateLimitError
350 |
351 | if (
352 | not recaptcha_box.audio_challenge_is_visible()
353 | or recaptcha_box.solve_failure_is_visible()
354 | or recaptcha_box.challenge_is_solved()
355 | ):
356 | return
357 |
358 | self._page.wait_for_timeout(250)
359 |
360 | def _submit_tile_answers(self, recaptcha_box: SyncRecaptchaBox) -> None:
361 | """
362 | Submit the reCAPTCHA image challenge tile answers.
363 |
364 | Parameters
365 | ----------
366 | recaptcha_box : SyncRecaptchaBox
367 | The reCAPTCHA box.
368 |
369 | Raises
370 | ------
371 | RecaptchaRateLimitError
372 | If the reCAPTCHA rate limit has been exceeded.
373 | """
374 | recaptcha_box.verify_button.click()
375 |
376 | while recaptcha_box.frames_are_attached():
377 | if recaptcha_box.rate_limit_is_visible():
378 | raise RecaptchaRateLimitError
379 |
380 | if (
381 | recaptcha_box.challenge_is_solved()
382 | or recaptcha_box.try_again_is_visible()
383 | ):
384 | return
385 |
386 | if (
387 | recaptcha_box.check_new_images_is_visible()
388 | or recaptcha_box.select_all_matching_is_visible()
389 | ):
390 | with self._page.expect_response(
391 | re.compile("/recaptcha/(api2|enterprise)/payload")
392 | ):
393 | recaptcha_box.new_challenge_button.click()
394 |
395 | return
396 |
397 | self._page.wait_for_timeout(250)
398 |
399 | def _solve_image_challenge(self, recaptcha_box: SyncRecaptchaBox) -> None:
400 | """
401 | Solve the reCAPTCHA image challenge.
402 |
403 | Parameters
404 | ----------
405 | recaptcha_box : SyncRecaptchaBox
406 | The reCAPTCHA box.
407 |
408 | Raises
409 | ------
410 | CapSolverError
411 | If the CapSolver API returned an error.
412 | RecaptchaRateLimitError
413 | If the reCAPTCHA rate limit has been exceeded.
414 | """
415 | while recaptcha_box.frames_are_attached():
416 | capsolver_response = self._get_capsolver_response(
417 | recaptcha_box, self._payload_response.body()
418 | )
419 |
420 | if (
421 | capsolver_response is None
422 | or not capsolver_response["solution"]["objects"]
423 | ):
424 | self._payload_response = None
425 |
426 | with self._page.expect_response(
427 | re.compile("/recaptcha/(api2|enterprise)/reload")
428 | ):
429 | recaptcha_box.new_challenge_button.click()
430 |
431 | while self._payload_response is None:
432 | if recaptcha_box.rate_limit_is_visible():
433 | raise RecaptchaRateLimitError
434 |
435 | self._page.wait_for_timeout(250)
436 |
437 | continue
438 |
439 | self._solve_tiles(recaptcha_box, capsolver_response["solution"]["objects"])
440 | self._payload_response = None
441 |
442 | button = recaptcha_box.skip_button.or_(recaptcha_box.next_button)
443 |
444 | if button.is_hidden():
445 | self._submit_tile_answers(recaptcha_box)
446 | return
447 |
448 | with self._page.expect_response(
449 | re.compile("/recaptcha/(api2|enterprise)/payload")
450 | ):
451 | button.click()
452 |
453 | def _solve_audio_challenge(self, recaptcha_box: SyncRecaptchaBox) -> None:
454 | """
455 | Solve the reCAPTCHA audio challenge.
456 |
457 | Parameters
458 | ----------
459 | recaptcha_box : SyncRecaptchaBox
460 | The reCAPTCHA box.
461 |
462 | Raises
463 | ------
464 | RecaptchaRateLimitError
465 | If the reCAPTCHA rate limit has been exceeded.
466 | """
467 | parsed_url = urlparse(recaptcha_box.anchor_frame.url)
468 | query_params = parse_qs(parsed_url.query)
469 | language = query_params["hl"][0]
470 |
471 | if language not in ORIGINAL_LANGUAGE_AUDIO:
472 | language = "en-US"
473 |
474 | while True:
475 | url = self._get_audio_url(recaptcha_box)
476 | text = self._transcribe_audio(url, language=language)
477 |
478 | if text is not None:
479 | break
480 |
481 | with self._page.expect_response(
482 | re.compile("/recaptcha/(api2|enterprise)/reload")
483 | ):
484 | recaptcha_box.new_challenge_button.click()
485 |
486 | while url == self._get_audio_url(recaptcha_box):
487 | self._page.wait_for_timeout(250)
488 |
489 | self._submit_audio_text(recaptcha_box, text)
490 |
491 | def recaptcha_is_visible(self) -> bool:
492 | """
493 | Check if a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.
494 |
495 | Returns
496 | -------
497 | bool
498 | Whether a reCAPTCHA challenge or unchecked reCAPTCHA box is visible.
499 | """
500 | try:
501 | SyncRecaptchaBox.from_frames(self._page.frames)
502 | except RecaptchaNotFoundError:
503 | return False
504 |
505 | return True
506 |
507 | def solve_recaptcha(
508 | self,
509 | *,
510 | attempts: Optional[int] = None,
511 | wait: bool = False,
512 | wait_timeout: float = 30,
513 | image_challenge: bool = False,
514 | ) -> str:
515 | """
516 | Solve the reCAPTCHA and return the `g-recaptcha-response` token.
517 |
518 | Parameters
519 | ----------
520 | attempts : Optional[int], optional
521 | The number of solve attempts, by default 5.
522 | wait : bool, optional
523 | Whether to wait for the reCAPTCHA to appear, by default False.
524 | wait_timeout : float, optional
525 | The amount of time in seconds to wait for the reCAPTCHA to appear,
526 | by default 30. Only used if `wait` is True.
527 | image_challenge : bool, optional
528 | Whether to solve the image challenge, by default False.
529 |
530 | Returns
531 | -------
532 | str
533 | The `g-recaptcha-response` token.
534 |
535 | Raises
536 | ------
537 | CapSolverError
538 | If the CapSolver API returned an error.
539 | RecaptchaNotFoundError
540 | If the reCAPTCHA was not found.
541 | RecaptchaRateLimitError
542 | If the reCAPTCHA rate limit has been exceeded.
543 | RecaptchaSolveError
544 | If the reCAPTCHA could not be solved.
545 | """
546 | if image_challenge and self._capsolver_api_key is None:
547 | raise CapSolverError(
548 | "You must provide a CapSolver API key to solve image challenges."
549 | )
550 |
551 | self._token = None
552 | attempts = attempts or self._attempts
553 |
554 | if wait:
555 | retry = Retrying(
556 | sleep=self._page.wait_for_timeout,
557 | stop=stop_after_delay(wait_timeout),
558 | wait=wait_fixed(0.25),
559 | retry=retry_if_exception_type(RecaptchaNotFoundError),
560 | reraise=True,
561 | )
562 |
563 | recaptcha_box = retry(
564 | lambda: SyncRecaptchaBox.from_frames(self._page.frames)
565 | )
566 | else:
567 | recaptcha_box = SyncRecaptchaBox.from_frames(self._page.frames)
568 |
569 | if recaptcha_box.rate_limit_is_visible():
570 | raise RecaptchaRateLimitError
571 |
572 | if recaptcha_box.checkbox.is_visible():
573 | click_timestamp = time.time()
574 | self._click_checkbox(recaptcha_box)
575 |
576 | if self._token is not None:
577 | return self._token
578 |
579 | if (
580 | recaptcha_box.frames_are_detached()
581 | or not recaptcha_box.any_challenge_is_visible()
582 | or recaptcha_box.challenge_is_solved()
583 | ):
584 | while self._token is None:
585 | self._page.wait_for_timeout(250)
586 |
587 | return self._token
588 |
589 | time_to_wait = max(1 - (time.time() - click_timestamp), 0)
590 | self._page.wait_for_timeout(time_to_wait * 1000)
591 |
592 | while not recaptcha_box.any_challenge_is_visible():
593 | self._page.wait_for_timeout(250)
594 |
595 | if image_challenge and recaptcha_box.image_challenge_button.is_visible():
596 | recaptcha_box.image_challenge_button.click()
597 | elif not image_challenge and recaptcha_box.audio_challenge_button.is_visible():
598 | recaptcha_box.audio_challenge_button.click()
599 |
600 | if image_challenge:
601 | image = recaptcha_box.image_challenge.locator("img").first
602 | image_url = image.get_attribute("src")
603 | self._payload_response = self._page.request.get(image_url)
604 |
605 | while attempts > 0:
606 | self._token = None
607 |
608 | if image_challenge:
609 | self._solve_image_challenge(recaptcha_box)
610 | else:
611 | self._solve_audio_challenge(recaptcha_box)
612 |
613 | if (
614 | recaptcha_box.frames_are_detached()
615 | or not recaptcha_box.any_challenge_is_visible()
616 | or recaptcha_box.challenge_is_solved()
617 | ):
618 | while self._token is None:
619 | self._page.wait_for_timeout(250)
620 |
621 | return self._token
622 |
623 | attempts -= 1
624 |
625 | raise RecaptchaSolveError
626 |
--------------------------------------------------------------------------------
/playwright_recaptcha/recaptchav2/translations.py:
--------------------------------------------------------------------------------
1 | ELEMENT_TRANSLATIONS = {
2 | "im_not_a_robot": (
3 | "I'm not a robot",
4 | "Я не робот",
5 | "进行人机身份验证",
6 | "No soy un robot",
7 | "Je ne suis pas un robot",
8 | "Ich bin kein Roboter",
9 | "Ik ben geen robot",
10 | "Non sono un robot",
11 | "Não sou um robô",
12 | ),
13 | "get_an_audio_challenge": (
14 | "Get an audio challenge",
15 | "Пройти аудиотест",
16 | "改用音频验证",
17 | "Obtener una pista sonora",
18 | "Générer un test audio",
19 | "Audio-Captcha abrufen",
20 | "Een audio-uitdaging ophalen",
21 | "Verifica audio",
22 | "Receber um desafio de áudio",
23 | ),
24 | "get_a_visual_challenge": (
25 | "Get a visual challenge",
26 | "Пройти визуальный тест",
27 | "改用图片验证",
28 | "Obtener una pista visual",
29 | "Générer un test visuel",
30 | "Visuelles Captcha abrufen",
31 | "Een visuele uitdaging ophalen",
32 | "Verifica visiva",
33 | "Receber um desafio visual",
34 | ),
35 | "get_a_new_challenge": (
36 | "Get a new challenge",
37 | "Обновить",
38 | "换一个新的验证码",
39 | "Obtener una pista nueva",
40 | "Générer un nouveau test",
41 | "Neues Captcha abrufen",
42 | "Een nieuwe uitdaging ophalen",
43 | "Nuova verifica",
44 | "Receber outro desafio",
45 | ),
46 | "alternatively_download_audio_as_mp3": (
47 | "Alternatively, download audio as MP3",
48 | "Скачать MP3-файл",
49 | "或者以 MP3 格式下载音频",
50 | "También puedes descargar el audio en formato MP3",
51 | "Ou téléchargez le fichier audio au format MP3",
52 | "Audio als MP3 herunterladen",
53 | "Of download het geluid als MP3-bestand",
54 | "In alternativa, scarica l'audio come MP3",
55 | "Como alternativa, faça o download do áudio como MP3",
56 | ),
57 | "enter_what_you_hear": (
58 | "Enter what you hear",
59 | "Введите прозвучавшие слова",
60 | "请输入您听到的内容",
61 | "Escribe lo que escuches",
62 | "Saisissez ce que vous entendez",
63 | "Geben Sie ein, was Sie hören",
64 | "Geef op wat je hoort",
65 | "Inserisci quello che senti",
66 | "Digite o que você ouve",
67 | ),
68 | "skip": (
69 | "Skip",
70 | "Пропустить",
71 | "跳过",
72 | "Saltar",
73 | "Ignorer",
74 | "Überspringen",
75 | "Overslaan",
76 | "Salta",
77 | "Pular",
78 | ),
79 | "next": (
80 | "Next",
81 | "Далее",
82 | "下一个",
83 | "Siguiente",
84 | "Suivant",
85 | "Weiter",
86 | "Volgende",
87 | "Avanti",
88 | "Avançar",
89 | ),
90 | "verify": (
91 | "Verify",
92 | "Подтвердить",
93 | "验证",
94 | "Verificar",
95 | "Valider",
96 | "Bestätigen",
97 | "Verifiëren",
98 | "Verifica",
99 | "Verificar",
100 | ),
101 | "try_again_later": (
102 | "Try again later",
103 | "Повторите попытку позже",
104 | "稍后重试",
105 | "Inténtalo de nuevo más tarde",
106 | "Réessayez plus tard",
107 | "Später noch einmal versuchen",
108 | "Probeer het later opnieuw",
109 | "Riprova più tardi",
110 | "Tente novamente mais tarde",
111 | ),
112 | "multiple_correct_solutions_required": (
113 | "Multiple correct solutions required - please solve more",
114 | "Вы должны выполнить несколько заданий",
115 | "需要提供多个正确答案 - 请回答更多问题",
116 | "Debes resolver más captchas",
117 | "Veuillez effectuer d'autres tests (vous devez fournir plusieurs solutions correctes)",
118 | "Es sind mehrere richtige Lösungen erforderlich. Bitte weitere Aufgaben lösen",
119 | "Er zijn meerdere juiste oplossingen vereist - geef meer oplossingen op",
120 | "È necessario fornire più soluzioni corrette. Risolvi altri captcha",
121 | "São necessárias várias soluções corretas. Solucione mais",
122 | ),
123 | "press_play_to_listen": (
124 | "Press PLAY to listen",
125 | 'Чтобы прослушать, нажмите "Воспроизвести"',
126 | "按“播放”可听语音内容",
127 | "Pulsa REPRODUCIR para escuchar el audio",
128 | "Appuyez sur LECTURE pour écouter",
129 | "Wählen Sie WIEDERGABE aus, um die Wiedergabe zu starten",
130 | "Druk op AFSPELEN om te luisteren",
131 | "Premi RIPRODUCI per ascoltare",
132 | "Pressione REPRODUZIR para ouvir",
133 | ),
134 | "please_try_again": (
135 | "Please try again",
136 | "Повторите попытку",
137 | "请重试",
138 | "Inténtalo de nuevo",
139 | "Veuillez réessayer",
140 | "Versuche es bitte erneut",
141 | "Probeer het opnieuw",
142 | "Riprova",
143 | "Tente novamente",
144 | ),
145 | "please_also_check_the_new_images": (
146 | "Please also check the new images",
147 | "Просмотрите также новые изображение",
148 | "另外,您还需查看新显示的图片",
149 | "Comprueba también las imágenes nuevas",
150 | "Veuillez également vérifier les nouvelles images",
151 | "Sehen Sie sich auch die neuen Bilder an",
152 | "Controleer ook de nieuwe afbeeldingen",
153 | "Controlla anche le nuove immagini",
154 | "Verifique também as novas imagens",
155 | ),
156 | "please_select_all_matching_images": (
157 | "Please select all matching images",
158 | "Выберите все совпадающие изображения",
159 | "请选择所有相符的图片",
160 | "Selecciona todas las imágenes que coincidan",
161 | "Veuillez sélectionner toutes les images correspondantes",
162 | "Wählen Sie alle passenden Bilder aus",
163 | "Selecteer alle overeenkomende afbeeldingen",
164 | "Seleziona tutte le immagini corrispondenti",
165 | "Selecione todas as imagens correspondentes",
166 | ),
167 | }
168 |
169 | OBJECT_TRANSLATIONS = {
170 | "taxis": ("taxis", "такси", "出租车", "Taxis", "taxi's", "taxi", "táxis"),
171 | "bus": (
172 | "bus",
173 | "buses",
174 | "автобус",
175 | "автобусы",
176 | "公交车",
177 | "autobuses",
178 | "autobús",
179 | "Bus",
180 | "Bussen",
181 | "bussen",
182 | "autobus",
183 | "ônibus",
184 | ),
185 | "motorcycles": (
186 | "motorcycles",
187 | "мотоциклы",
188 | "摩托车",
189 | "motocicletas",
190 | "motos",
191 | "Motorrädern",
192 | "motorfietsen",
193 | "motoren",
194 | "motocicli",
195 | ),
196 | "tractors": (
197 | "tractors",
198 | "трактора",
199 | "拖拉机",
200 | "tractores",
201 | "tracteurs",
202 | "Traktoren",
203 | "tractoren",
204 | "trattori",
205 | "tratores",
206 | ),
207 | "chimneys": (
208 | "chimneys",
209 | "дымовые трубы",
210 | "烟囱",
211 | "chimeneas",
212 | "cheminées",
213 | "Schornsteinen",
214 | "schoorstenen",
215 | "camini",
216 | "chaminés",
217 | ),
218 | "crosswalks": (
219 | "crosswalks",
220 | "пешеходные переходы",
221 | "人行横道",
222 | "过街人行道",
223 | "pasos de peatones",
224 | "passages pour piétons",
225 | "Fußgängerüberwegen",
226 | "oversteekplaatsen",
227 | "zebrapaden",
228 | "strisce pedonali",
229 | "faixas de pedestres",
230 | "faixas de pedestre",
231 | ),
232 | "traffic_lights": (
233 | "traffic lights",
234 | "светофоры",
235 | "红绿灯",
236 | "semáforos",
237 | "feux de circulation",
238 | "Ampeln",
239 | "verkeerslichten",
240 | "semafori",
241 | ),
242 | "bicycles": (
243 | "bicycles",
244 | "велосипеды",
245 | "自行车",
246 | "bicicletas",
247 | "vélos",
248 | "Fahrrädern",
249 | "fietsen",
250 | "biciclette",
251 | ),
252 | "parking_meters": (
253 | "parking meters",
254 | "парковочные автоматы",
255 | "停车计时器",
256 | "parquímetros",
257 | "parcmètres",
258 | "Parkometern",
259 | "parkeermeters",
260 | "parchimetri",
261 | ),
262 | "cars": (
263 | "cars",
264 | "автомобили",
265 | "小轿车",
266 | "coches",
267 | "voitures",
268 | "Pkws",
269 | "auto's",
270 | "auto",
271 | "carros",
272 | ),
273 | "bridges": (
274 | "bridges",
275 | "мостами",
276 | "桥",
277 | "puentes",
278 | "ponts",
279 | "Brücken",
280 | "bruggen",
281 | "ponti",
282 | "pontes",
283 | ),
284 | "boats": ("boats", "лодки", "船", "barcos", "bateaux", "Boote", "boten", "barche"),
285 | "palm_trees": (
286 | "palm trees",
287 | "пальмы",
288 | "棕榈树",
289 | "palmeras",
290 | "palmiers",
291 | "Palmen",
292 | "palmbomen",
293 | "palme",
294 | "palmeiras",
295 | ),
296 | "mountains_or_hills": (
297 | "mountains or hills",
298 | "mountain",
299 | "горы или холмы",
300 | "montañas o colinas",
301 | "montagnes ou collines",
302 | "Berge oder Hügel",
303 | "bergen of heuvels",
304 | "montagne o colline",
305 | "montanhas ou colinas",
306 | ),
307 | "fire_hydrant": (
308 | "a fire hydrant",
309 | "fire hydrants",
310 | "гидрантами",
311 | "пожарные гидранты",
312 | "消防栓",
313 | "bocas de incendios",
314 | "una boca de incendios",
315 | "borne d'incendie",
316 | "bouches d'incendie",
317 | "Hydranten",
318 | "Feuerhydranten",
319 | "een brandkraan",
320 | "brandkranen",
321 | "idrante",
322 | "idranti",
323 | "um hidrante",
324 | "hidrantes",
325 | ),
326 | "stairs": (
327 | "stairs",
328 | "лестницы",
329 | "楼梯",
330 | "escaleras",
331 | "escaliers",
332 | "Treppen(stufen)",
333 | "trappen",
334 | "scale",
335 | "escadas",
336 | ),
337 | }
338 |
339 | ORIGINAL_LANGUAGE_AUDIO = ("de", "es", "fr", "it", "nl", "pt")
340 |
--------------------------------------------------------------------------------
/playwright_recaptcha/recaptchav3/__init__.py:
--------------------------------------------------------------------------------
1 | """reCAPTCHA v3 solver for Playwright."""
2 | from .async_solver import AsyncSolver
3 | from .sync_solver import SyncSolver
4 |
5 | __all__ = ["AsyncSolver", "SyncSolver"]
6 |
--------------------------------------------------------------------------------
/playwright_recaptcha/recaptchav3/async_solver.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import re
4 | import time
5 | from typing import Any, Optional
6 |
7 | from playwright.async_api import Page, Response
8 |
9 | from ..errors import RecaptchaTimeoutError
10 | from .base_solver import BaseSolver
11 |
12 |
13 | class AsyncSolver(BaseSolver[Page]):
14 | """
15 | A class for solving reCAPTCHA v3 asynchronously with Playwright.
16 |
17 | Parameters
18 | ----------
19 | page : Page
20 | The Playwright page to solve the reCAPTCHA on.
21 | timeout : float, optional
22 | The solve timeout in seconds, by default 30.
23 | """
24 |
25 | async def __aenter__(self) -> AsyncSolver:
26 | return self
27 |
28 | async def __aexit__(self, *_: Any) -> None:
29 | self.close()
30 |
31 | async def _response_callback(self, response: Response) -> None:
32 | """
33 | The callback for intercepting reload responses.
34 |
35 | Parameters
36 | ----------
37 | response : Response
38 | The response.
39 | """
40 | if re.search("/recaptcha/(api2|enterprise)/reload", response.url) is None:
41 | return
42 |
43 | token_match = re.search('"rresp","(.*?)"', await response.text())
44 |
45 | if token_match is not None:
46 | self._token = token_match.group(1)
47 |
48 | async def solve_recaptcha(self, timeout: Optional[float] = None) -> str:
49 | """
50 | Wait for the reCAPTCHA to be solved and return the `g-recaptcha-response` token.
51 |
52 | Parameters
53 | ----------
54 | timeout : Optional[float], optional
55 | The solve timeout in seconds, by default 30.
56 |
57 | Returns
58 | -------
59 | str
60 | The `g-recaptcha-response` token.
61 |
62 | Raises
63 | ------
64 | RecaptchaTimeoutError
65 | If the solve timeout has been exceeded.
66 | """
67 | self._token = None
68 | timeout = timeout or self._timeout
69 | start_time = time.time()
70 |
71 | while self._token is None:
72 | if time.time() - start_time >= timeout:
73 | raise RecaptchaTimeoutError
74 |
75 | await self._page.wait_for_timeout(250)
76 |
77 | return self._token
78 |
--------------------------------------------------------------------------------
/playwright_recaptcha/recaptchav3/base_solver.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from typing import Generic, Optional, TypeVar, Union
3 |
4 | from playwright.async_api import Page as AsyncPage
5 | from playwright.async_api import Response as AsyncResponse
6 | from playwright.sync_api import Page as SyncPage
7 | from playwright.sync_api import Response as SyncResponse
8 |
9 | PageT = TypeVar("PageT", AsyncPage, SyncPage)
10 | Response = Union[AsyncResponse, SyncResponse]
11 |
12 |
13 | class BaseSolver(ABC, Generic[PageT]):
14 | """
15 | The base class for reCAPTCHA v3 solvers.
16 |
17 | Parameters
18 | ----------
19 | page : PageT
20 | The Playwright page to solve the reCAPTCHA on.
21 | timeout : float, optional
22 | The solve timeout in seconds, by default 30.
23 | """
24 |
25 | def __init__(self, page: PageT, timeout: float = 30) -> None:
26 | self._page = page
27 | self._timeout = timeout
28 |
29 | self._token: Optional[str] = None
30 | self._page.on("response", self._response_callback)
31 |
32 | def __repr__(self) -> str:
33 | return (
34 | f"{self.__class__.__name__}(page={self._page!r}, "
35 | f"timeout={self._timeout!r})"
36 | )
37 |
38 | def close(self) -> None:
39 | """Remove the reload response listener."""
40 | try:
41 | self._page.remove_listener("response", self._response_callback)
42 | except KeyError:
43 | pass
44 |
45 | @abstractmethod
46 | def _response_callback(self, response: Response) -> None:
47 | """
48 | The callback for intercepting reload responses.
49 |
50 | Parameters
51 | ----------
52 | response : Response
53 | The response.
54 | """
55 |
56 | @abstractmethod
57 | def solve_recaptcha(self, timeout: Optional[float] = None) -> str:
58 | """
59 | Wait for the reCAPTCHA to be solved and return the `g-recaptcha-response` token.
60 |
61 | Parameters
62 | ----------
63 | timeout : Optional[float], optional
64 | The solve timeout in seconds, by default 30.
65 |
66 | Returns
67 | -------
68 | str
69 | The `g-recaptcha-response` token.
70 |
71 | Raises
72 | ------
73 | RecaptchaTimeoutError
74 | If the solve timeout has been exceeded.
75 | """
76 |
--------------------------------------------------------------------------------
/playwright_recaptcha/recaptchav3/sync_solver.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import re
4 | import time
5 | from typing import Any, Optional
6 |
7 | from playwright.sync_api import Page, Response
8 |
9 | from ..errors import RecaptchaTimeoutError
10 | from .base_solver import BaseSolver
11 |
12 |
13 | class SyncSolver(BaseSolver[Page]):
14 | """
15 | A class for solving reCAPTCHA v3 synchronously with Playwright.
16 |
17 | Parameters
18 | ----------
19 | page : Page
20 | The Playwright page to solve the reCAPTCHA on.
21 | timeout : float, optional
22 | The solve timeout in seconds, by default 30.
23 | """
24 |
25 | def __enter__(self) -> SyncSolver:
26 | return self
27 |
28 | def __exit__(self, *_: Any) -> None:
29 | self.close()
30 |
31 | def _response_callback(self, response: Response) -> None:
32 | """
33 | The callback for intercepting reload responses.
34 |
35 | Parameters
36 | ----------
37 | response : Response
38 | The response.
39 | """
40 | if re.search("/recaptcha/(api2|enterprise)/reload", response.url) is None:
41 | return
42 |
43 | token_match = re.search('"rresp","(.*?)"', response.text())
44 |
45 | if token_match is not None:
46 | self._token = token_match.group(1)
47 |
48 | def solve_recaptcha(self, timeout: Optional[float] = None) -> str:
49 | """
50 | Wait for the reCAPTCHA to be solved and return the `g-recaptcha-response` token.
51 |
52 | Parameters
53 | ----------
54 | timeout : Optional[float], optional
55 | The solve timeout in seconds, by default 30.
56 |
57 | Returns
58 | -------
59 | str
60 | The `g-recaptcha-response` token.
61 |
62 | Raises
63 | ------
64 | RecaptchaTimeoutError
65 | If the solve timeout has been exceeded.
66 | """
67 | self._token = None
68 | timeout = timeout or self._timeout
69 | start_time = time.time()
70 |
71 | while self._token is None:
72 | if time.time() - start_time >= timeout:
73 | raise RecaptchaTimeoutError
74 |
75 | self._page.wait_for_timeout(250)
76 |
77 | return self._token
78 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | asyncio_default_fixture_loop_scope = function
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | playwright>=1.33.0,!=1.50.0
2 | pydub==0.25.1
3 | pytest-asyncio==0.24.0; python_version == "3.8"
4 | pytest-asyncio==0.26.0; python_version >= "3.9"
5 | setuptools==75.3.0; python_version == "3.8"
6 | setuptools==80.9.0; python_version >= "3.9"
7 | SpeechRecognition==3.10.4; python_version == "3.8"
8 | SpeechRecognition==3.14.3; python_version >= "3.9"
9 | tenacity==9.1.2
10 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 | with open("README.md", encoding="utf-8") as file:
4 | long_description = file.read()
5 |
6 | setup(
7 | name="playwright-recaptcha",
8 | version="0.5.1",
9 | author="Xewdy444",
10 | author_email="xewdy@xewdy.systems",
11 | description="A library for solving reCAPTCHA v2 and v3 with Playwright",
12 | license="MIT",
13 | long_description=long_description,
14 | long_description_content_type="text/markdown",
15 | url="https://github.com/Xewdy444/Playwright-reCAPTCHA",
16 | packages=find_packages(),
17 | python_requires=">=3.8",
18 | install_requires=[
19 | "playwright>=1.33.0,!=1.50.0",
20 | "pydub==0.25.1",
21 | 'SpeechRecognition==3.14.3; python_version == "3.8"',
22 | 'SpeechRecognition==3.14.3; python_version >= "3.9"',
23 | "tenacity==9.1.2",
24 | ],
25 | classifiers=[
26 | "Programming Language :: Python :: 3",
27 | "Programming Language :: Python :: 3.8",
28 | "Programming Language :: Python :: 3.9",
29 | "Programming Language :: Python :: 3.10",
30 | "Programming Language :: Python :: 3.11",
31 | "Programming Language :: Python :: 3.12",
32 | "License :: OSI Approved :: MIT License",
33 | "Operating System :: OS Independent",
34 | "Intended Audience :: Developers",
35 | "Topic :: Software Development :: Testing",
36 | "Topic :: Internet :: WWW/HTTP :: Browsers",
37 | "Framework :: AsyncIO",
38 | ],
39 | )
40 |
--------------------------------------------------------------------------------
/tests/test_async_recaptchav2.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from playwright.async_api import async_playwright
3 |
4 | from playwright_recaptcha import (
5 | CapSolverError,
6 | RecaptchaNotFoundError,
7 | RecaptchaRateLimitError,
8 | recaptchav2,
9 | )
10 |
11 |
12 | @pytest.mark.asyncio
13 | @pytest.mark.xfail(raises=RecaptchaRateLimitError)
14 | async def test_solver_with_normal_recaptcha() -> None:
15 | """Test the solver with a normal reCAPTCHA."""
16 | async with async_playwright() as playwright:
17 | browser = await playwright.firefox.launch()
18 | page = await browser.new_page()
19 | await page.goto("https://www.google.com/recaptcha/api2/demo")
20 |
21 | async with recaptchav2.AsyncSolver(page) as solver:
22 | await solver.solve_recaptcha(wait=True)
23 |
24 |
25 | @pytest.mark.asyncio
26 | @pytest.mark.xfail(raises=(RecaptchaNotFoundError, RecaptchaRateLimitError))
27 | async def test_solver_with_hidden_recaptcha() -> None:
28 | """Test the solver with a hidden reCAPTCHA."""
29 | async with async_playwright() as playwright:
30 | browser = await playwright.firefox.launch()
31 | page = await browser.new_page()
32 |
33 | await page.goto("https://www.google.com/recaptcha/api2/demo?invisible=true")
34 | await page.get_by_role("button").click()
35 |
36 | async with recaptchav2.AsyncSolver(page) as solver:
37 | await solver.solve_recaptcha(wait=True)
38 |
39 |
40 | @pytest.mark.asyncio
41 | @pytest.mark.xfail(raises=RecaptchaRateLimitError)
42 | async def test_solver_with_slow_browser() -> None:
43 | """Test the solver with a slow browser."""
44 | async with async_playwright() as playwright:
45 | browser = await playwright.firefox.launch(slow_mo=1000)
46 | page = await browser.new_page()
47 | await page.goto("https://www.google.com/recaptcha/api2/demo")
48 |
49 | async with recaptchav2.AsyncSolver(page) as solver:
50 | await solver.solve_recaptcha(wait=True)
51 |
52 |
53 | @pytest.mark.asyncio
54 | @pytest.mark.xfail(raises=CapSolverError)
55 | async def test_solver_with_image_challenge() -> None:
56 | """Test the solver with an image challenge."""
57 | async with async_playwright() as playwright:
58 | browser = await playwright.firefox.launch()
59 | page = await browser.new_page()
60 | await page.goto("https://www.google.com/recaptcha/api2/demo")
61 |
62 | async with recaptchav2.AsyncSolver(page) as solver:
63 | await solver.solve_recaptcha(wait=True, image_challenge=True)
64 |
65 |
66 | @pytest.mark.asyncio
67 | async def test_recaptcha_not_found_error() -> None:
68 | """Test the solver with a page that does not have a reCAPTCHA."""
69 | async with async_playwright() as playwright:
70 | browser = await playwright.firefox.launch()
71 | page = await browser.new_page()
72 | await page.goto("https://www.google.com/")
73 |
74 | with pytest.raises(RecaptchaNotFoundError):
75 | async with recaptchav2.AsyncSolver(page) as solver:
76 | await solver.solve_recaptcha()
77 |
--------------------------------------------------------------------------------
/tests/test_async_recaptchav3.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from playwright.async_api import async_playwright
3 |
4 | from playwright_recaptcha import RecaptchaTimeoutError, recaptchav3
5 |
6 |
7 | @pytest.mark.asyncio
8 | async def test_solver_with_normal_browser() -> None:
9 | """Test the solver with a normal browser."""
10 | async with async_playwright() as playwright:
11 | browser = await playwright.firefox.launch()
12 | page = await browser.new_page()
13 |
14 | async with recaptchav3.AsyncSolver(page) as solver:
15 | await page.goto("https://antcpt.com/score_detector/")
16 | await solver.solve_recaptcha()
17 |
18 |
19 | @pytest.mark.asyncio
20 | async def test_solver_with_slow_browser() -> None:
21 | """Test the solver with a slow browser."""
22 | async with async_playwright() as playwright:
23 | browser = await playwright.firefox.launch(slow_mo=1000)
24 | page = await browser.new_page()
25 |
26 | async with recaptchav3.AsyncSolver(page) as solver:
27 | await page.goto("https://antcpt.com/score_detector/")
28 | await solver.solve_recaptcha()
29 |
30 |
31 | @pytest.mark.asyncio
32 | async def test_recaptcha_not_found_error() -> None:
33 | """Test the solver with a page that does not have a reCAPTCHA."""
34 | async with async_playwright() as playwright:
35 | browser = await playwright.firefox.launch()
36 | page = await browser.new_page()
37 |
38 | with pytest.raises(RecaptchaTimeoutError):
39 | async with recaptchav3.AsyncSolver(page, timeout=10) as solver:
40 | await page.goto("https://www.google.com/")
41 | await solver.solve_recaptcha()
42 |
--------------------------------------------------------------------------------
/tests/test_sync_recaptchav2.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from playwright.sync_api import sync_playwright
3 |
4 | from playwright_recaptcha import (
5 | CapSolverError,
6 | RecaptchaNotFoundError,
7 | RecaptchaRateLimitError,
8 | recaptchav2,
9 | )
10 |
11 |
12 | @pytest.mark.xfail(raises=RecaptchaRateLimitError)
13 | def test_solver_with_normal_recaptcha() -> None:
14 | """Test the solver with a normal reCAPTCHA."""
15 | with sync_playwright() as playwright:
16 | browser = playwright.firefox.launch()
17 | page = browser.new_page()
18 | page.goto("https://www.google.com/recaptcha/api2/demo")
19 |
20 | with recaptchav2.SyncSolver(page) as solver:
21 | solver.solve_recaptcha(wait=True)
22 |
23 |
24 | @pytest.mark.xfail(raises=(RecaptchaNotFoundError, RecaptchaRateLimitError))
25 | def test_solver_with_hidden_recaptcha() -> None:
26 | """Test the solver with a hidden reCAPTCHA."""
27 | with sync_playwright() as playwright:
28 | browser = playwright.firefox.launch()
29 | page = browser.new_page()
30 |
31 | page.goto("https://www.google.com/recaptcha/api2/demo?invisible=true")
32 | page.get_by_role("button").click()
33 |
34 | with recaptchav2.SyncSolver(page) as solver:
35 | solver.solve_recaptcha(wait=True)
36 |
37 |
38 | @pytest.mark.xfail(raises=RecaptchaRateLimitError)
39 | def test_solver_with_slow_browser() -> None:
40 | """Test the solver with a slow browser."""
41 | with sync_playwright() as playwright:
42 | browser = playwright.firefox.launch(slow_mo=1000)
43 | page = browser.new_page()
44 | page.goto("https://www.google.com/recaptcha/api2/demo")
45 |
46 | with recaptchav2.SyncSolver(page) as solver:
47 | solver.solve_recaptcha(wait=True)
48 |
49 |
50 | @pytest.mark.xfail(raises=CapSolverError)
51 | def test_solver_with_image_challenge() -> None:
52 | """Test the solver with an image challenge."""
53 | with sync_playwright() as playwright:
54 | browser = playwright.firefox.launch()
55 | page = browser.new_page()
56 | page.goto("https://www.google.com/recaptcha/api2/demo")
57 |
58 | with recaptchav2.SyncSolver(page) as solver:
59 | solver.solve_recaptcha(wait=True, image_challenge=True)
60 |
61 |
62 | def test_recaptcha_not_found_error() -> None:
63 | """Test the solver with a page that does not have a reCAPTCHA."""
64 | with sync_playwright() as playwright:
65 | browser = playwright.firefox.launch()
66 | page = browser.new_page()
67 | page.goto("https://www.google.com/")
68 |
69 | with pytest.raises(RecaptchaNotFoundError), recaptchav2.SyncSolver(
70 | page
71 | ) as solver:
72 | solver.solve_recaptcha()
73 |
--------------------------------------------------------------------------------
/tests/test_sync_recaptchav3.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from playwright.sync_api import sync_playwright
3 |
4 | from playwright_recaptcha import RecaptchaTimeoutError, recaptchav3
5 |
6 |
7 | def test_solver_with_normal_browser() -> None:
8 | """Test the solver with a normal browser."""
9 | with sync_playwright() as playwright:
10 | browser = playwright.firefox.launch()
11 | page = browser.new_page()
12 |
13 | with recaptchav3.SyncSolver(page) as solver:
14 | page.goto("https://antcpt.com/score_detector/")
15 | solver.solve_recaptcha()
16 |
17 |
18 | def test_solver_with_slow_browser() -> None:
19 | """Test the solver with a slow browser."""
20 | with sync_playwright() as playwright:
21 | browser = playwright.firefox.launch(slow_mo=1000)
22 | page = browser.new_page()
23 |
24 | with recaptchav3.SyncSolver(page) as solver:
25 | page.goto("https://antcpt.com/score_detector/")
26 | solver.solve_recaptcha()
27 |
28 |
29 | def test_recaptcha_not_found_error() -> None:
30 | """Test the solver with a page that does not have a reCAPTCHA."""
31 | with sync_playwright() as playwright:
32 | browser = playwright.firefox.launch()
33 | page = browser.new_page()
34 |
35 | with pytest.raises(RecaptchaTimeoutError), recaptchav3.SyncSolver(
36 | page, timeout=10
37 | ) as solver:
38 | page.goto("https://www.google.com/")
39 | solver.solve_recaptcha()
40 |
--------------------------------------------------------------------------------