├── .deepsource.toml ├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ └── ruff.yml ├── .gitignore ├── LICENSE ├── README.md ├── main.py └── requirements.txt /.deepsource.toml: -------------------------------------------------------------------------------- 1 | version = 1 2 | 3 | [[analyzers]] 4 | name = "python" 5 | 6 | [analyzers.meta] 7 | runtime_version = "3.x.x" 8 | 9 | [[transformers]] 10 | name = "black" -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | - package-ecosystem: "pip" 5 | target-branch: "zendriver" 6 | directory: "/" 7 | schedule: 8 | interval: "daily" 9 | time: "03:00" 10 | timezone: "America/Chicago" 11 | 12 | - package-ecosystem: "pip" 13 | target-branch: "playwright" 14 | directory: "/" 15 | schedule: 16 | interval: "daily" 17 | time: "03:00" 18 | timezone: "America/Chicago" 19 | -------------------------------------------------------------------------------- /.github/workflows/ruff.yml: -------------------------------------------------------------------------------- 1 | name: Analyze With Ruff 2 | 3 | on: 4 | push: 5 | branches: 6 | - zendriver 7 | pull_request: 8 | 9 | permissions: 10 | actions: read 11 | contents: read 12 | security-events: write 13 | 14 | jobs: 15 | analyze-with-ruff: 16 | name: Analyze With Ruff 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: actions/setup-python@v5 22 | with: 23 | python-version: 3.x 24 | 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install -U pip 28 | pip install ruff 29 | 30 | - name: Run ruff 31 | run: ruff check 32 | --no-cache 33 | --exit-zero 34 | --output-format sarif > ruff-results.sarif 35 | 36 | - name: Upload ruff results to GitHub 37 | uses: github/codeql-action/upload-sarif@v2 38 | with: 39 | sarif_file: ruff-results.sarif 40 | wait-for-processing: true 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ruff 2 | .ruff_cache/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # poetry 101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 102 | # This is especially recommended for binary packages to ensure reproducibility, and is more 103 | # commonly ignored for libraries. 104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 105 | #poetry.lock 106 | 107 | # pdm 108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 109 | #pdm.lock 110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 111 | # in version control. 112 | # https://pdm.fming.dev/#use-with-ide 113 | .pdm.toml 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | # pytype static type analyzer 153 | .pytype/ 154 | 155 | # Cython debug symbols 156 | cython_debug/ 157 | 158 | # PyCharm 159 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 161 | # and can be added to the global gitignore or merged into this file. For a more nuclear 162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 163 | #.idea/ 164 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Xewdy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CF-Clearance-Scraper 2 | 3 | ## Zendriver Version 4 | A simple program for scraping Cloudflare clearance (cf_clearance) cookies from websites issuing Cloudflare challenges to visitors. This program works on all Cloudflare challenge types (JavaScript, managed, and interactive). If you would prefer using Playwright, you can check out the [Playwright version](https://github.com/Xewdy444/CF-Clearance-Scraper/tree/playwright). 5 | 6 | 7 | ## Clearance Cookie Usage 8 | In order to bypass Cloudflare challenges with the clearance cookies, you must make sure of two things: 9 | 10 | - The user agent used to fetch the clearance cookie must match the user agent being used within the requests that use the clearance cookie 11 | - The IP address used to fetch the clearance cookie must match the IP address being used to make the requests that use the clearance cookie 12 | 13 | ```mermaid 14 | flowchart 15 | N14e["cf_clearance"] 16 | N14f["IP Address"] 17 | N150["User Agent"] 18 | N14e --> N14f 19 | N14e --> N150 20 | ``` 21 | 22 | ## Installation 23 | Download and install [Google Chrome](https://www.google.com/chrome/index.html). 24 | 25 | Then, install the Python dependencies: 26 | 27 | $ pip install -r requirements.txt 28 | 29 | ## Usage 30 | > [!WARNING] 31 | > Depending on the user agent used, it may affect your ability to solve the Cloudflare challenge. 32 | 33 | ``` 34 | usage: main.py [-h] [-f FILE] [-t TIMEOUT] [-p PROXY] [-ua USER_AGENT] [--disable-http2] [--disable-http3] [--headed] [-ac] [-c] [-w] [-a] URL 35 | 36 | A simple program for scraping Cloudflare clearance (cf_clearance) cookies from websites issuing Cloudflare challenges to visitors 37 | 38 | positional arguments: 39 | URL The URL to scrape the Cloudflare clearance cookie from 40 | 41 | options: 42 | -h, --help show this help message and exit 43 | -f FILE, --file FILE The file to write the Cloudflare clearance cookie information to, in JSON format 44 | -t TIMEOUT, --timeout TIMEOUT 45 | The timeout in seconds to use for solving challenges 46 | -p PROXY, --proxy PROXY 47 | The proxy server URL to use for the browser requests 48 | -ua USER_AGENT, --user-agent USER_AGENT 49 | The user agent to use for the browser requests 50 | --disable-http2 Disable the usage of HTTP/2 for the browser requests 51 | --disable-http3 Disable the usage of HTTP/3 for the browser requests 52 | --headed Run the browser in headed mode 53 | -ac, --all-cookies Retrieve all cookies from the page, not just the Cloudflare clearance cookie 54 | -c, --curl Get the cURL command for the request with the cookies and user agent 55 | -w, --wget Get the Wget command for the request with the cookies and user agent 56 | -a, --aria2 Get the aria2 command for the request with the cookies and user agent 57 | ``` 58 | 59 | ## Example 60 | $ python main.py -f cookies.json https://sergiodemo.com/security/challenge/legacy-challenge 61 | [14:24:27] [INFO] Launching headless browser... 62 | [14:24:27] [INFO] Going to https://sergiodemo.com/security/challenge/legacy-challenge... 63 | [14:24:28] [INFO] Solving Cloudflare challenge [Interactive]... 64 | [14:24:31] [INFO] Cookie: cf_clearance=SkyEdEGvKp1BBA2NpRW3Azsw5neMD6sEEqJd6jOCCfs-1736886257-1.2.1.1-cam47ywp3q_yKE1bw0lZ2YS83dnh_BsIHtS7earbsYE.AxQDBtZiifiHvp1nZGRhABaSdjU7XRQpUCVwUSrlJGH8DXr50YR18pNLxBvcEJFO2gPMxr.ZjKze8rWgM9H4rPeET67jzAo_ZRpNP6hGCvdyO62VVCtqDBQDKhKZz9yZQp7YEHK7tchQIteVgu.dUxYdan5_D.R0zewnS382BP0w1AoTf2p40.lQwbhgildEiKG14xACd13V4EEthkZV0dnliwcn35rT3h32ODf50MABQNSQ8WjhZhbLSNOPO_zEhrK9R0Yn4eBuRKvnL9_x9jKvaBPDPAgyiZv_VzFP_g 65 | [14:24:31] [INFO] User agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 66 | [14:24:31] [INFO] Writing Cloudflare clearance cookie information to cookies.json... 67 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import argparse 4 | import asyncio 5 | import json 6 | import logging 7 | import random 8 | from datetime import datetime, timedelta, timezone 9 | from enum import Enum 10 | from typing import Any, Dict, Final, Iterable, List, Optional 11 | 12 | import latest_user_agents 13 | import user_agents 14 | import zendriver 15 | from selenium_authenticated_proxy import SeleniumAuthenticatedProxy 16 | from zendriver import cdp 17 | from zendriver.cdp.emulation import UserAgentBrandVersion, UserAgentMetadata 18 | from zendriver.cdp.network import T_JSON_DICT, Cookie 19 | from zendriver.core.element import Element 20 | 21 | COMMAND: Final[str] = ( 22 | '{name}: {binary} --header "Cookie: {cookies}" --header "User-Agent: {user_agent}" {url}' 23 | ) 24 | 25 | 26 | def get_chrome_user_agent() -> str: 27 | """ 28 | Get a random up-to-date Chrome user agent string. 29 | 30 | Returns 31 | ------- 32 | str 33 | The user agent string. 34 | """ 35 | chrome_user_agents = [ 36 | user_agent 37 | for user_agent in latest_user_agents.get_latest_user_agents() 38 | if "Chrome" in user_agent and "Edg" not in user_agent 39 | ] 40 | 41 | return random.choice(chrome_user_agents) 42 | 43 | 44 | class ChallengePlatform(Enum): 45 | """Cloudflare challenge platform types.""" 46 | 47 | JAVASCRIPT = "non-interactive" 48 | MANAGED = "managed" 49 | INTERACTIVE = "interactive" 50 | 51 | 52 | class CloudflareSolver: 53 | """ 54 | A class for solving Cloudflare challenges with Zendriver. 55 | 56 | Parameters 57 | ---------- 58 | user_agent : Optional[str] 59 | The user agent string to use for the browser requests. 60 | timeout : float 61 | The timeout in seconds to use for browser actions and solving challenges. 62 | http2 : bool 63 | Enable or disable the usage of HTTP/2 for the browser requests. 64 | http3 : bool 65 | Enable or disable the usage of HTTP/3 for the browser requests. 66 | headless : bool 67 | Enable or disable headless mode for the browser (not supported on Windows). 68 | proxy : Optional[str] 69 | The proxy server URL to use for the browser requests. 70 | """ 71 | 72 | def __init__( 73 | self, 74 | *, 75 | user_agent: Optional[str], 76 | timeout: float, 77 | http2: bool, 78 | http3: bool, 79 | headless: bool, 80 | proxy: Optional[str], 81 | ) -> None: 82 | config = zendriver.Config(headless=headless) 83 | 84 | if user_agent is not None: 85 | config.add_argument(f"--user-agent={user_agent}") 86 | 87 | if not http2: 88 | config.add_argument("--disable-http2") 89 | 90 | if not http3: 91 | config.add_argument("--disable-quic") 92 | 93 | auth_proxy = SeleniumAuthenticatedProxy(proxy) 94 | auth_proxy.enrich_chrome_options(config) 95 | 96 | self.driver = zendriver.Browser(config) 97 | self._timeout = timeout 98 | 99 | async def __aenter__(self) -> CloudflareSolver: 100 | await self.driver.start() 101 | return self 102 | 103 | async def __aexit__(self, *_: Any) -> None: 104 | await self.driver.stop() 105 | 106 | @staticmethod 107 | def _format_cookies(cookies: Iterable[Cookie]) -> List[T_JSON_DICT]: 108 | """ 109 | Format cookies into a list of JSON cookies. 110 | 111 | Parameters 112 | ---------- 113 | cookies : Iterable[Cookie] 114 | List of cookies. 115 | 116 | Returns 117 | ------- 118 | List[T_JSON_DICT] 119 | List of JSON cookies. 120 | """ 121 | return [cookie.to_json() for cookie in cookies] 122 | 123 | @staticmethod 124 | def extract_clearance_cookie( 125 | cookies: Iterable[T_JSON_DICT], 126 | ) -> Optional[T_JSON_DICT]: 127 | """ 128 | Extract the Cloudflare clearance cookie from a list of cookies. 129 | 130 | Parameters 131 | ---------- 132 | cookies : Iterable[T_JSON_DICT] 133 | List of cookies. 134 | 135 | Returns 136 | ------- 137 | Optional[T_JSON_DICT] 138 | The Cloudflare clearance cookie. Returns None if the cookie is not found. 139 | """ 140 | 141 | for cookie in cookies: 142 | if cookie["name"] == "cf_clearance": 143 | return cookie 144 | 145 | return None 146 | 147 | async def get_user_agent(self) -> str: 148 | """ 149 | Get the current user agent string. 150 | 151 | Returns 152 | ------- 153 | str 154 | The user agent string. 155 | """ 156 | return await self.driver.main_tab.evaluate("navigator.userAgent") 157 | 158 | async def get_cookies(self) -> List[T_JSON_DICT]: 159 | """ 160 | Get all cookies from the current page. 161 | 162 | Returns 163 | ------- 164 | List[T_JSON_DICT] 165 | List of cookies. 166 | """ 167 | return self._format_cookies(await self.driver.cookies.get_all()) 168 | 169 | async def set_user_agent_metadata(self, user_agent: str) -> None: 170 | """ 171 | Set the user agent metadata for the browser. 172 | 173 | Parameters 174 | ---------- 175 | user_agent : str 176 | The user agent string to parse information from. 177 | """ 178 | device = user_agents.parse(user_agent) 179 | 180 | self.driver.main_tab.feed_cdp( 181 | cdp.network.set_user_agent_override( 182 | user_agent=user_agent, 183 | platform=device.os.family, 184 | user_agent_metadata=UserAgentMetadata( 185 | platform=device.os.family, 186 | platform_version=device.os.version_string, 187 | architecture="x86_64", 188 | model=device.device.model or "", 189 | mobile=device.is_mobile, 190 | brands=[ 191 | UserAgentBrandVersion( 192 | brand="Google Chrome", 193 | version=str(device.browser.version[0]), 194 | ), 195 | UserAgentBrandVersion(brand="Not-A.Brand", version="8"), 196 | UserAgentBrandVersion( 197 | brand="Chromium", version=str(device.browser.version[0]) 198 | ), 199 | ], 200 | ), 201 | ) 202 | ) 203 | 204 | async def detect_challenge(self) -> Optional[ChallengePlatform]: 205 | """ 206 | Detect the Cloudflare challenge platform on the current page. 207 | 208 | Returns 209 | ------- 210 | Optional[ChallengePlatform] 211 | The Cloudflare challenge platform. 212 | """ 213 | html = await self.driver.main_tab.get_content() 214 | 215 | for platform in ChallengePlatform: 216 | if f"cType: '{platform.value}'" in html: 217 | return platform 218 | 219 | return None 220 | 221 | async def solve_challenge(self) -> None: 222 | """Solve the Cloudflare challenge on the current page.""" 223 | start_timestamp = datetime.now() 224 | 225 | while ( 226 | self.extract_clearance_cookie(await self.get_cookies()) is None 227 | and await self.detect_challenge() is not None 228 | and (datetime.now() - start_timestamp).seconds < self._timeout 229 | ): 230 | widget_input = await self.driver.main_tab.find("input") 231 | 232 | if widget_input.parent is None or not widget_input.parent.shadow_roots: 233 | await asyncio.sleep(0.25) 234 | continue 235 | 236 | challenge = Element( 237 | widget_input.parent.shadow_roots[0], 238 | self.driver.main_tab, 239 | widget_input.parent.tree, 240 | ) 241 | 242 | challenge = challenge.children[0] 243 | 244 | if ( 245 | isinstance(challenge, Element) 246 | and "display: none;" not in challenge.attrs["style"] 247 | ): 248 | await asyncio.sleep(1) 249 | 250 | try: 251 | await challenge.get_position() 252 | except Exception: 253 | continue 254 | 255 | await challenge.mouse_click() 256 | 257 | 258 | async def main() -> None: 259 | parser = argparse.ArgumentParser( 260 | description="A simple program for scraping Cloudflare clearance (cf_clearance) cookies from websites issuing Cloudflare challenges to visitors" 261 | ) 262 | 263 | parser.add_argument( 264 | "url", 265 | metavar="URL", 266 | help="The URL to scrape the Cloudflare clearance cookie from", 267 | type=str, 268 | ) 269 | 270 | parser.add_argument( 271 | "-f", 272 | "--file", 273 | default=None, 274 | help="The file to write the Cloudflare clearance cookie information to, in JSON format", 275 | type=str, 276 | ) 277 | 278 | parser.add_argument( 279 | "-t", 280 | "--timeout", 281 | default=30, 282 | help="The timeout in seconds to use for solving challenges", 283 | type=float, 284 | ) 285 | 286 | parser.add_argument( 287 | "-p", 288 | "--proxy", 289 | default=None, 290 | help="The proxy server URL to use for the browser requests", 291 | type=str, 292 | ) 293 | 294 | parser.add_argument( 295 | "-ua", 296 | "--user-agent", 297 | default=None, 298 | help="The user agent to use for the browser requests", 299 | type=str, 300 | ) 301 | 302 | parser.add_argument( 303 | "--disable-http2", 304 | action="store_true", 305 | help="Disable the usage of HTTP/2 for the browser requests", 306 | ) 307 | 308 | parser.add_argument( 309 | "--disable-http3", 310 | action="store_true", 311 | help="Disable the usage of HTTP/3 for the browser requests", 312 | ) 313 | 314 | parser.add_argument( 315 | "--headed", 316 | action="store_true", 317 | help="Run the browser in headed mode", 318 | ) 319 | 320 | parser.add_argument( 321 | "-ac", 322 | "--all-cookies", 323 | action="store_true", 324 | help="Retrieve all cookies from the page, not just the Cloudflare clearance cookie", 325 | ) 326 | 327 | parser.add_argument( 328 | "-c", 329 | "--curl", 330 | action="store_true", 331 | help="Get the cURL command for the request with the cookies and user agent", 332 | ) 333 | 334 | parser.add_argument( 335 | "-w", 336 | "--wget", 337 | action="store_true", 338 | help="Get the Wget command for the request with the cookies and user agent", 339 | ) 340 | 341 | parser.add_argument( 342 | "-a", 343 | "--aria2", 344 | action="store_true", 345 | help="Get the aria2 command for the request with the cookies and user agent", 346 | ) 347 | 348 | args = parser.parse_args() 349 | 350 | logging.basicConfig( 351 | format="[%(asctime)s] [%(levelname)s] %(message)s", 352 | datefmt="%H:%M:%S", 353 | level=logging.INFO, 354 | ) 355 | 356 | logging.getLogger("zendriver").setLevel(logging.WARNING) 357 | logging.info("Launching %s browser...", "headed" if args.headed else "headless") 358 | 359 | challenge_messages = { 360 | ChallengePlatform.JAVASCRIPT: "Solving Cloudflare challenge [JavaScript]...", 361 | ChallengePlatform.MANAGED: "Solving Cloudflare challenge [Managed]...", 362 | ChallengePlatform.INTERACTIVE: "Solving Cloudflare challenge [Interactive]...", 363 | } 364 | 365 | user_agent = get_chrome_user_agent() if args.user_agent is None else args.user_agent 366 | 367 | async with CloudflareSolver( 368 | user_agent=user_agent, 369 | timeout=args.timeout, 370 | http2=not args.disable_http2, 371 | http3=not args.disable_http3, 372 | headless=not args.headed, 373 | proxy=args.proxy, 374 | ) as solver: 375 | logging.info("Going to %s...", args.url) 376 | 377 | try: 378 | await solver.driver.get(args.url) 379 | except asyncio.TimeoutError as err: 380 | logging.error(err) 381 | return 382 | 383 | all_cookies = await solver.get_cookies() 384 | clearance_cookie = solver.extract_clearance_cookie(all_cookies) 385 | 386 | if clearance_cookie is None: 387 | await solver.set_user_agent_metadata(await solver.get_user_agent()) 388 | challenge_platform = await solver.detect_challenge() 389 | 390 | if challenge_platform is None: 391 | logging.error("No Cloudflare challenge detected.") 392 | return 393 | 394 | logging.info(challenge_messages[challenge_platform]) 395 | 396 | try: 397 | await solver.solve_challenge() 398 | except asyncio.TimeoutError: 399 | pass 400 | 401 | all_cookies = await solver.get_cookies() 402 | clearance_cookie = solver.extract_clearance_cookie(all_cookies) 403 | 404 | user_agent = await solver.get_user_agent() 405 | 406 | if clearance_cookie is None: 407 | logging.error("Failed to retrieve a Cloudflare clearance cookie.") 408 | return 409 | 410 | cookie_string = "; ".join( 411 | f'{cookie["name"]}={cookie["value"]}' for cookie in all_cookies 412 | ) 413 | 414 | if args.all_cookies: 415 | logging.info("All cookies: %s", cookie_string) 416 | else: 417 | logging.info("Cookie: cf_clearance=%s", clearance_cookie["value"]) 418 | 419 | logging.info("User agent: %s", user_agent) 420 | 421 | if args.curl: 422 | logging.info( 423 | COMMAND.format( 424 | name="cURL", 425 | binary="curl", 426 | cookies=( 427 | cookie_string 428 | if args.all_cookies 429 | else f'cf_clearance={clearance_cookie["value"]}' 430 | ), 431 | user_agent=user_agent, 432 | url=( 433 | f"--proxy {args.proxy} {args.url}" 434 | if args.proxy is not None 435 | else args.url 436 | ), 437 | ) 438 | ) 439 | 440 | if args.wget: 441 | if args.proxy is not None: 442 | logging.warning( 443 | "Proxies must be set in an environment variable or config file for Wget." 444 | ) 445 | 446 | logging.info( 447 | COMMAND.format( 448 | name="Wget", 449 | binary="wget", 450 | cookies=( 451 | cookie_string 452 | if args.all_cookies 453 | else f'cf_clearance={clearance_cookie["value"]}' 454 | ), 455 | user_agent=user_agent, 456 | url=args.url, 457 | ) 458 | ) 459 | 460 | if args.aria2: 461 | if args.proxy is not None and args.proxy.casefold().startswith("socks"): 462 | logging.warning("SOCKS proxies are not supported by aria2.") 463 | 464 | logging.info( 465 | COMMAND.format( 466 | name="aria2", 467 | binary="aria2c", 468 | cookies=( 469 | cookie_string 470 | if args.all_cookies 471 | else f'cf_clearance={clearance_cookie["value"]}' 472 | ), 473 | user_agent=user_agent, 474 | url=( 475 | f"--all-proxy {args.proxy} {args.url}" 476 | if args.proxy is not None 477 | else args.url 478 | ), 479 | ) 480 | ) 481 | 482 | if args.file is None: 483 | return 484 | 485 | logging.info("Writing Cloudflare clearance cookie information to %s...", args.file) 486 | 487 | try: 488 | with open(args.file, encoding="utf-8") as file: 489 | json_data = json.load(file) 490 | except (FileNotFoundError, json.JSONDecodeError): 491 | json_data: Dict[str, List[Dict[str, Any]]] = {} 492 | 493 | local_timezone = datetime.now(timezone.utc).astimezone().tzinfo 494 | unix_timestamp = clearance_cookie["expires"] - timedelta(days=365).total_seconds() 495 | timestamp = datetime.fromtimestamp(unix_timestamp, tz=local_timezone).isoformat() 496 | 497 | json_data.setdefault(clearance_cookie["domain"], []).append( 498 | { 499 | "unix_timestamp": int(unix_timestamp), 500 | "timestamp": timestamp, 501 | "cf_clearance": clearance_cookie["value"], 502 | "cookies": all_cookies, 503 | "user_agent": user_agent, 504 | "proxy": args.proxy, 505 | } 506 | ) 507 | 508 | with open(args.file, "w", encoding="utf-8") as file: 509 | json.dump(json_data, file, indent=4) 510 | 511 | 512 | if __name__ == "__main__": 513 | asyncio.run(main()) 514 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | latest_user_agents==0.0.5 2 | selenium_authenticated_proxy==1.1.2 3 | user_agents==2.2.0 4 | zendriver==0.5.2 5 | --------------------------------------------------------------------------------