├── .deepsource.toml
├── .gitattributes
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── ruff.yml
├── .gitignore
├── LICENSE
├── README.md
├── main.py
└── requirements.txt


/.deepsource.toml:
--------------------------------------------------------------------------------
 1 | version = 1
 2 | 
 3 | [[analyzers]]
 4 | name = "python"
 5 | 
 6 |   [analyzers.meta]
 7 |   runtime_version = "3.x.x"
 8 | 
 9 | [[transformers]]
10 | name = "black"


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | updates:
 4 |   - package-ecosystem: "pip"
 5 |     target-branch: "zendriver"
 6 |     directory: "/"
 7 |     schedule:
 8 |       interval: "daily"
 9 |       time: "03:00"
10 |       timezone: "America/Chicago"
11 | 
12 |   - package-ecosystem: "pip"
13 |     target-branch: "playwright"
14 |     directory: "/"
15 |     schedule:
16 |       interval: "daily"
17 |       time: "03:00"
18 |       timezone: "America/Chicago"
19 | 


--------------------------------------------------------------------------------
/.github/workflows/ruff.yml:
--------------------------------------------------------------------------------
 1 | name: Analyze With Ruff
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - zendriver
 7 |   pull_request:
 8 | 
 9 | permissions:
10 |   actions: read
11 |   contents: read
12 |   security-events: write
13 | 
14 | jobs:
15 |   analyze-with-ruff:
16 |     name: Analyze With Ruff
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - uses: actions/checkout@v4
20 | 
21 |       - uses: actions/setup-python@v5
22 |         with:
23 |           python-version: 3.x
24 | 
25 |       - name: Install dependencies
26 |         run: |
27 |           python -m pip install -U pip
28 |           pip install ruff
29 | 
30 |       - name: Run ruff
31 |         run: ruff check
32 |           --no-cache
33 |           --exit-zero
34 |           --output-format sarif > ruff-results.sarif
35 | 
36 |       - name: Upload ruff results to GitHub
37 |         uses: github/codeql-action/upload-sarif@v2
38 |         with:
39 |           sarif_file: ruff-results.sarif
40 |           wait-for-processing: true
41 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Ruff
  2 | .ruff_cache/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | cover/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | .pybuilder/
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | #   For a library or package, you might want to ignore these files since the code is
 90 | #   intended to run in multiple environments; otherwise, check them in:
 91 | # .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # poetry
101 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
103 | #   commonly ignored for libraries.
104 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 | 
107 | # pdm
108 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | #   in version control.
112 | #   https://pdm.fming.dev/#use-with-ide
113 | .pdm.toml
114 | 
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116 | __pypackages__/
117 | 
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | # pytype static type analyzer
153 | .pytype/
154 | 
155 | # Cython debug symbols
156 | cython_debug/
157 | 
158 | # PyCharm
159 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
162 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
163 | #.idea/
164 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Xewdy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CF-Clearance-Scraper
 2 | 
 3 | ## Zendriver Version
 4 | A simple program for scraping Cloudflare clearance (cf_clearance) cookies from websites issuing Cloudflare challenges to visitors. This program works on all Cloudflare challenge types (JavaScript, managed, and interactive). If you would prefer using Playwright, you can check out the [Playwright version](https://github.com/Xewdy444/CF-Clearance-Scraper/tree/playwright).
 5 | 
 6 | 
 7 | ## Clearance Cookie Usage
 8 | In order to bypass Cloudflare challenges with the clearance cookies, you must make sure of two things:
 9 | 
10 | - The user agent used to fetch the clearance cookie must match the user agent being used within the requests that use the clearance cookie
11 | - The IP address used to fetch the clearance cookie must match the IP address being used to make the requests that use the clearance cookie
12 | 
13 | ```mermaid
14 | flowchart
15 | 	N14e["cf_clearance"]
16 | 	N14f["IP Address"]
17 | 	N150["User Agent"]
18 | 	N14e --> N14f
19 | 	N14e --> N150
20 | ```
21 | 
22 | ## Installation
23 | Download and install [Google Chrome](https://www.google.com/chrome/index.html).
24 | 
25 | Then, install the Python dependencies:
26 | 
27 |     $ pip install -r requirements.txt
28 | 
29 | ## Usage
30 | > [!WARNING]
31 | > Depending on the user agent used, it may affect your ability to solve the Cloudflare challenge.
32 | 
33 | ```
34 | usage: main.py [-h] [-f FILE] [-t TIMEOUT] [-p PROXY] [-ua USER_AGENT] [--disable-http2] [--disable-http3] [--headed] [-ac] [-c] [-w] [-a] URL
35 | 
36 | A simple program for scraping Cloudflare clearance (cf_clearance) cookies from websites issuing Cloudflare challenges to visitors
37 | 
38 | positional arguments:
39 |   URL                   The URL to scrape the Cloudflare clearance cookie from
40 | 
41 | options:
42 |   -h, --help            show this help message and exit
43 |   -f FILE, --file FILE  The file to write the Cloudflare clearance cookie information to, in JSON format
44 |   -t TIMEOUT, --timeout TIMEOUT
45 |                         The timeout in seconds to use for solving challenges
46 |   -p PROXY, --proxy PROXY
47 |                         The proxy server URL to use for the browser requests
48 |   -ua USER_AGENT, --user-agent USER_AGENT
49 |                         The user agent to use for the browser requests
50 |   --disable-http2       Disable the usage of HTTP/2 for the browser requests
51 |   --disable-http3       Disable the usage of HTTP/3 for the browser requests
52 |   --headed              Run the browser in headed mode
53 |   -ac, --all-cookies    Retrieve all cookies from the page, not just the Cloudflare clearance cookie
54 |   -c, --curl            Get the cURL command for the request with the cookies and user agent
55 |   -w, --wget            Get the Wget command for the request with the cookies and user agent
56 |   -a, --aria2           Get the aria2 command for the request with the cookies and user agent
57 | ```
58 | 
59 | ## Example
60 |     $ python main.py -f cookies.json https://sergiodemo.com/security/challenge/legacy-challenge
61 |     [14:24:27] [INFO] Launching headless browser...
62 |     [14:24:27] [INFO] Going to https://sergiodemo.com/security/challenge/legacy-challenge...
63 |     [14:24:28] [INFO] Solving Cloudflare challenge [Interactive]...
64 |     [14:24:31] [INFO] Cookie: cf_clearance=SkyEdEGvKp1BBA2NpRW3Azsw5neMD6sEEqJd6jOCCfs-1736886257-1.2.1.1-cam47ywp3q_yKE1bw0lZ2YS83dnh_BsIHtS7earbsYE.AxQDBtZiifiHvp1nZGRhABaSdjU7XRQpUCVwUSrlJGH8DXr50YR18pNLxBvcEJFO2gPMxr.ZjKze8rWgM9H4rPeET67jzAo_ZRpNP6hGCvdyO62VVCtqDBQDKhKZz9yZQp7YEHK7tchQIteVgu.dUxYdan5_D.R0zewnS382BP0w1AoTf2p40.lQwbhgildEiKG14xACd13V4EEthkZV0dnliwcn35rT3h32ODf50MABQNSQ8WjhZhbLSNOPO_zEhrK9R0Yn4eBuRKvnL9_x9jKvaBPDPAgyiZv_VzFP_g
65 |     [14:24:31] [INFO] User agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36
66 |     [14:24:31] [INFO] Writing Cloudflare clearance cookie information to cookies.json...
67 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import argparse
  4 | import asyncio
  5 | import json
  6 | import logging
  7 | import random
  8 | from datetime import datetime, timedelta, timezone
  9 | from enum import Enum
 10 | from typing import Any, Dict, Final, Iterable, List, Optional
 11 | 
 12 | import latest_user_agents
 13 | import user_agents
 14 | import zendriver
 15 | from selenium_authenticated_proxy import SeleniumAuthenticatedProxy
 16 | from zendriver import cdp
 17 | from zendriver.cdp.emulation import UserAgentBrandVersion, UserAgentMetadata
 18 | from zendriver.cdp.network import T_JSON_DICT, Cookie
 19 | from zendriver.core.element import Element
 20 | 
 21 | COMMAND: Final[str] = (
 22 |     '{name}: {binary} --header "Cookie: {cookies}" --header "User-Agent: {user_agent}" {url}'
 23 | )
 24 | 
 25 | 
 26 | def get_chrome_user_agent() -> str:
 27 |     """
 28 |     Get a random up-to-date Chrome user agent string.
 29 | 
 30 |     Returns
 31 |     -------
 32 |     str
 33 |         The user agent string.
 34 |     """
 35 |     chrome_user_agents = [
 36 |         user_agent
 37 |         for user_agent in latest_user_agents.get_latest_user_agents()
 38 |         if "Chrome" in user_agent and "Edg" not in user_agent
 39 |     ]
 40 | 
 41 |     return random.choice(chrome_user_agents)
 42 | 
 43 | 
 44 | class ChallengePlatform(Enum):
 45 |     """Cloudflare challenge platform types."""
 46 | 
 47 |     JAVASCRIPT = "non-interactive"
 48 |     MANAGED = "managed"
 49 |     INTERACTIVE = "interactive"
 50 | 
 51 | 
 52 | class CloudflareSolver:
 53 |     """
 54 |     A class for solving Cloudflare challenges with Zendriver.
 55 | 
 56 |     Parameters
 57 |     ----------
 58 |     user_agent : Optional[str]
 59 |         The user agent string to use for the browser requests.
 60 |     timeout : float
 61 |         The timeout in seconds to use for browser actions and solving challenges.
 62 |     http2 : bool
 63 |         Enable or disable the usage of HTTP/2 for the browser requests.
 64 |     http3 : bool
 65 |         Enable or disable the usage of HTTP/3 for the browser requests.
 66 |     headless : bool
 67 |         Enable or disable headless mode for the browser (not supported on Windows).
 68 |     proxy : Optional[str]
 69 |         The proxy server URL to use for the browser requests.
 70 |     """
 71 | 
 72 |     def __init__(
 73 |         self,
 74 |         *,
 75 |         user_agent: Optional[str],
 76 |         timeout: float,
 77 |         http2: bool,
 78 |         http3: bool,
 79 |         headless: bool,
 80 |         proxy: Optional[str],
 81 |     ) -> None:
 82 |         config = zendriver.Config(headless=headless)
 83 | 
 84 |         if user_agent is not None:
 85 |             config.add_argument(f"--user-agent={user_agent}")
 86 | 
 87 |         if not http2:
 88 |             config.add_argument("--disable-http2")
 89 | 
 90 |         if not http3:
 91 |             config.add_argument("--disable-quic")
 92 | 
 93 |         auth_proxy = SeleniumAuthenticatedProxy(proxy)
 94 |         auth_proxy.enrich_chrome_options(config)
 95 | 
 96 |         self.driver = zendriver.Browser(config)
 97 |         self._timeout = timeout
 98 | 
 99 |     async def __aenter__(self) -> CloudflareSolver:
100 |         await self.driver.start()
101 |         return self
102 | 
103 |     async def __aexit__(self, *_: Any) -> None:
104 |         await self.driver.stop()
105 | 
106 |     @staticmethod
107 |     def _format_cookies(cookies: Iterable[Cookie]) -> List[T_JSON_DICT]:
108 |         """
109 |         Format cookies into a list of JSON cookies.
110 | 
111 |         Parameters
112 |         ----------
113 |         cookies : Iterable[Cookie]
114 |             List of cookies.
115 | 
116 |         Returns
117 |         -------
118 |         List[T_JSON_DICT]
119 |             List of JSON cookies.
120 |         """
121 |         return [cookie.to_json() for cookie in cookies]
122 | 
123 |     @staticmethod
124 |     def extract_clearance_cookie(
125 |         cookies: Iterable[T_JSON_DICT],
126 |     ) -> Optional[T_JSON_DICT]:
127 |         """
128 |         Extract the Cloudflare clearance cookie from a list of cookies.
129 | 
130 |         Parameters
131 |         ----------
132 |         cookies : Iterable[T_JSON_DICT]
133 |             List of cookies.
134 | 
135 |         Returns
136 |         -------
137 |         Optional[T_JSON_DICT]
138 |             The Cloudflare clearance cookie. Returns None if the cookie is not found.
139 |         """
140 | 
141 |         for cookie in cookies:
142 |             if cookie["name"] == "cf_clearance":
143 |                 return cookie
144 | 
145 |         return None
146 | 
147 |     async def get_user_agent(self) -> str:
148 |         """
149 |         Get the current user agent string.
150 | 
151 |         Returns
152 |         -------
153 |         str
154 |             The user agent string.
155 |         """
156 |         return await self.driver.main_tab.evaluate("navigator.userAgent")
157 | 
158 |     async def get_cookies(self) -> List[T_JSON_DICT]:
159 |         """
160 |         Get all cookies from the current page.
161 | 
162 |         Returns
163 |         -------
164 |         List[T_JSON_DICT]
165 |             List of cookies.
166 |         """
167 |         return self._format_cookies(await self.driver.cookies.get_all())
168 | 
169 |     async def set_user_agent_metadata(self, user_agent: str) -> None:
170 |         """
171 |         Set the user agent metadata for the browser.
172 | 
173 |         Parameters
174 |         ----------
175 |         user_agent : str
176 |             The user agent string to parse information from.
177 |         """
178 |         device = user_agents.parse(user_agent)
179 | 
180 |         self.driver.main_tab.feed_cdp(
181 |             cdp.network.set_user_agent_override(
182 |                 user_agent=user_agent,
183 |                 platform=device.os.family,
184 |                 user_agent_metadata=UserAgentMetadata(
185 |                     platform=device.os.family,
186 |                     platform_version=device.os.version_string,
187 |                     architecture="x86_64",
188 |                     model=device.device.model or "",
189 |                     mobile=device.is_mobile,
190 |                     brands=[
191 |                         UserAgentBrandVersion(
192 |                             brand="Google Chrome",
193 |                             version=str(device.browser.version[0]),
194 |                         ),
195 |                         UserAgentBrandVersion(brand="Not-A.Brand", version="8"),
196 |                         UserAgentBrandVersion(
197 |                             brand="Chromium", version=str(device.browser.version[0])
198 |                         ),
199 |                     ],
200 |                 ),
201 |             )
202 |         )
203 | 
204 |     async def detect_challenge(self) -> Optional[ChallengePlatform]:
205 |         """
206 |         Detect the Cloudflare challenge platform on the current page.
207 | 
208 |         Returns
209 |         -------
210 |         Optional[ChallengePlatform]
211 |             The Cloudflare challenge platform.
212 |         """
213 |         html = await self.driver.main_tab.get_content()
214 | 
215 |         for platform in ChallengePlatform:
216 |             if f"cType: '{platform.value}'" in html:
217 |                 return platform
218 | 
219 |         return None
220 | 
221 |     async def solve_challenge(self) -> None:
222 |         """Solve the Cloudflare challenge on the current page."""
223 |         start_timestamp = datetime.now()
224 | 
225 |         while (
226 |             self.extract_clearance_cookie(await self.get_cookies()) is None
227 |             and await self.detect_challenge() is not None
228 |             and (datetime.now() - start_timestamp).seconds < self._timeout
229 |         ):
230 |             widget_input = await self.driver.main_tab.find("input")
231 | 
232 |             if widget_input.parent is None or not widget_input.parent.shadow_roots:
233 |                 await asyncio.sleep(0.25)
234 |                 continue
235 | 
236 |             challenge = Element(
237 |                 widget_input.parent.shadow_roots[0],
238 |                 self.driver.main_tab,
239 |                 widget_input.parent.tree,
240 |             )
241 | 
242 |             challenge = challenge.children[0]
243 | 
244 |             if (
245 |                 isinstance(challenge, Element)
246 |                 and "display: none;" not in challenge.attrs["style"]
247 |             ):
248 |                 await asyncio.sleep(1)
249 | 
250 |                 try:
251 |                     await challenge.get_position()
252 |                 except Exception:
253 |                     continue
254 | 
255 |                 await challenge.mouse_click()
256 | 
257 | 
258 | async def main() -> None:
259 |     parser = argparse.ArgumentParser(
260 |         description="A simple program for scraping Cloudflare clearance (cf_clearance) cookies from websites issuing Cloudflare challenges to visitors"
261 |     )
262 | 
263 |     parser.add_argument(
264 |         "url",
265 |         metavar="URL",
266 |         help="The URL to scrape the Cloudflare clearance cookie from",
267 |         type=str,
268 |     )
269 | 
270 |     parser.add_argument(
271 |         "-f",
272 |         "--file",
273 |         default=None,
274 |         help="The file to write the Cloudflare clearance cookie information to, in JSON format",
275 |         type=str,
276 |     )
277 | 
278 |     parser.add_argument(
279 |         "-t",
280 |         "--timeout",
281 |         default=30,
282 |         help="The timeout in seconds to use for solving challenges",
283 |         type=float,
284 |     )
285 | 
286 |     parser.add_argument(
287 |         "-p",
288 |         "--proxy",
289 |         default=None,
290 |         help="The proxy server URL to use for the browser requests",
291 |         type=str,
292 |     )
293 | 
294 |     parser.add_argument(
295 |         "-ua",
296 |         "--user-agent",
297 |         default=None,
298 |         help="The user agent to use for the browser requests",
299 |         type=str,
300 |     )
301 | 
302 |     parser.add_argument(
303 |         "--disable-http2",
304 |         action="store_true",
305 |         help="Disable the usage of HTTP/2 for the browser requests",
306 |     )
307 | 
308 |     parser.add_argument(
309 |         "--disable-http3",
310 |         action="store_true",
311 |         help="Disable the usage of HTTP/3 for the browser requests",
312 |     )
313 | 
314 |     parser.add_argument(
315 |         "--headed",
316 |         action="store_true",
317 |         help="Run the browser in headed mode",
318 |     )
319 | 
320 |     parser.add_argument(
321 |         "-ac",
322 |         "--all-cookies",
323 |         action="store_true",
324 |         help="Retrieve all cookies from the page, not just the Cloudflare clearance cookie",
325 |     )
326 | 
327 |     parser.add_argument(
328 |         "-c",
329 |         "--curl",
330 |         action="store_true",
331 |         help="Get the cURL command for the request with the cookies and user agent",
332 |     )
333 | 
334 |     parser.add_argument(
335 |         "-w",
336 |         "--wget",
337 |         action="store_true",
338 |         help="Get the Wget command for the request with the cookies and user agent",
339 |     )
340 | 
341 |     parser.add_argument(
342 |         "-a",
343 |         "--aria2",
344 |         action="store_true",
345 |         help="Get the aria2 command for the request with the cookies and user agent",
346 |     )
347 | 
348 |     args = parser.parse_args()
349 | 
350 |     logging.basicConfig(
351 |         format="[%(asctime)s] [%(levelname)s] %(message)s",
352 |         datefmt="%H:%M:%S",
353 |         level=logging.INFO,
354 |     )
355 | 
356 |     logging.getLogger("zendriver").setLevel(logging.WARNING)
357 |     logging.info("Launching %s browser...", "headed" if args.headed else "headless")
358 | 
359 |     challenge_messages = {
360 |         ChallengePlatform.JAVASCRIPT: "Solving Cloudflare challenge [JavaScript]...",
361 |         ChallengePlatform.MANAGED: "Solving Cloudflare challenge [Managed]...",
362 |         ChallengePlatform.INTERACTIVE: "Solving Cloudflare challenge [Interactive]...",
363 |     }
364 | 
365 |     user_agent = get_chrome_user_agent() if args.user_agent is None else args.user_agent
366 | 
367 |     async with CloudflareSolver(
368 |         user_agent=user_agent,
369 |         timeout=args.timeout,
370 |         http2=not args.disable_http2,
371 |         http3=not args.disable_http3,
372 |         headless=not args.headed,
373 |         proxy=args.proxy,
374 |     ) as solver:
375 |         logging.info("Going to %s...", args.url)
376 | 
377 |         try:
378 |             await solver.driver.get(args.url)
379 |         except asyncio.TimeoutError as err:
380 |             logging.error(err)
381 |             return
382 | 
383 |         all_cookies = await solver.get_cookies()
384 |         clearance_cookie = solver.extract_clearance_cookie(all_cookies)
385 | 
386 |         if clearance_cookie is None:
387 |             await solver.set_user_agent_metadata(await solver.get_user_agent())
388 |             challenge_platform = await solver.detect_challenge()
389 | 
390 |             if challenge_platform is None:
391 |                 logging.error("No Cloudflare challenge detected.")
392 |                 return
393 | 
394 |             logging.info(challenge_messages[challenge_platform])
395 | 
396 |             try:
397 |                 await solver.solve_challenge()
398 |             except asyncio.TimeoutError:
399 |                 pass
400 | 
401 |             all_cookies = await solver.get_cookies()
402 |             clearance_cookie = solver.extract_clearance_cookie(all_cookies)
403 | 
404 |         user_agent = await solver.get_user_agent()
405 | 
406 |     if clearance_cookie is None:
407 |         logging.error("Failed to retrieve a Cloudflare clearance cookie.")
408 |         return
409 | 
410 |     cookie_string = "; ".join(
411 |         f'{cookie["name"]}={cookie["value"]}' for cookie in all_cookies
412 |     )
413 | 
414 |     if args.all_cookies:
415 |         logging.info("All cookies: %s", cookie_string)
416 |     else:
417 |         logging.info("Cookie: cf_clearance=%s", clearance_cookie["value"])
418 | 
419 |     logging.info("User agent: %s", user_agent)
420 | 
421 |     if args.curl:
422 |         logging.info(
423 |             COMMAND.format(
424 |                 name="cURL",
425 |                 binary="curl",
426 |                 cookies=(
427 |                     cookie_string
428 |                     if args.all_cookies
429 |                     else f'cf_clearance={clearance_cookie["value"]}'
430 |                 ),
431 |                 user_agent=user_agent,
432 |                 url=(
433 |                     f"--proxy {args.proxy} {args.url}"
434 |                     if args.proxy is not None
435 |                     else args.url
436 |                 ),
437 |             )
438 |         )
439 | 
440 |     if args.wget:
441 |         if args.proxy is not None:
442 |             logging.warning(
443 |                 "Proxies must be set in an environment variable or config file for Wget."
444 |             )
445 | 
446 |         logging.info(
447 |             COMMAND.format(
448 |                 name="Wget",
449 |                 binary="wget",
450 |                 cookies=(
451 |                     cookie_string
452 |                     if args.all_cookies
453 |                     else f'cf_clearance={clearance_cookie["value"]}'
454 |                 ),
455 |                 user_agent=user_agent,
456 |                 url=args.url,
457 |             )
458 |         )
459 | 
460 |     if args.aria2:
461 |         if args.proxy is not None and args.proxy.casefold().startswith("socks"):
462 |             logging.warning("SOCKS proxies are not supported by aria2.")
463 | 
464 |         logging.info(
465 |             COMMAND.format(
466 |                 name="aria2",
467 |                 binary="aria2c",
468 |                 cookies=(
469 |                     cookie_string
470 |                     if args.all_cookies
471 |                     else f'cf_clearance={clearance_cookie["value"]}'
472 |                 ),
473 |                 user_agent=user_agent,
474 |                 url=(
475 |                     f"--all-proxy {args.proxy} {args.url}"
476 |                     if args.proxy is not None
477 |                     else args.url
478 |                 ),
479 |             )
480 |         )
481 | 
482 |     if args.file is None:
483 |         return
484 | 
485 |     logging.info("Writing Cloudflare clearance cookie information to %s...", args.file)
486 | 
487 |     try:
488 |         with open(args.file, encoding="utf-8") as file:
489 |             json_data = json.load(file)
490 |     except (FileNotFoundError, json.JSONDecodeError):
491 |         json_data: Dict[str, List[Dict[str, Any]]] = {}
492 | 
493 |     local_timezone = datetime.now(timezone.utc).astimezone().tzinfo
494 |     unix_timestamp = clearance_cookie["expires"] - timedelta(days=365).total_seconds()
495 |     timestamp = datetime.fromtimestamp(unix_timestamp, tz=local_timezone).isoformat()
496 | 
497 |     json_data.setdefault(clearance_cookie["domain"], []).append(
498 |         {
499 |             "unix_timestamp": int(unix_timestamp),
500 |             "timestamp": timestamp,
501 |             "cf_clearance": clearance_cookie["value"],
502 |             "cookies": all_cookies,
503 |             "user_agent": user_agent,
504 |             "proxy": args.proxy,
505 |         }
506 |     )
507 | 
508 |     with open(args.file, "w", encoding="utf-8") as file:
509 |         json.dump(json_data, file, indent=4)
510 | 
511 | 
512 | if __name__ == "__main__":
513 |     asyncio.run(main())
514 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | latest_user_agents==0.0.5
2 | selenium_authenticated_proxy==1.1.2
3 | user_agents==2.2.0
4 | zendriver==0.5.2
5 | 


--------------------------------------------------------------------------------