66 | Checking if the site connection is secure
67 |
68 |
69 |
70 |
76 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
104 |
106 |
107 |
108 | 0magnet.com needs to review the security of your connection before
109 | proceeding.
110 |
111 |
112 | Did you knowthe first
113 | botnet in 2003 took over 500-1000 devices? Today, botnets take over millions of devices at
114 | once.
115 |
116 |
118 |
123 |
124 | Requests from malicious bots can pose as legitimate traffic.
125 | Occasionally, you may see this page while the site ensures that the
126 | connection is secure.
66 | Checking if the site connection is secure
67 |
68 |
69 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
92 |
94 |
95 |
96 | 0magnet.com needs to review the security of your connection before
97 | proceeding.
98 |
99 |
100 | Did you knowbotnets can
101 | be used to shutdown popular websites?
102 |
103 |
105 |
110 |
111 | Requests from malicious bots can pose as legitimate traffic.
112 | Occasionally, you may see this page while the site ensures that the
113 | connection is secure.
67 | Checking if the site connection is secure
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
90 |
92 |
93 |
94 | 0magnet.com needs to review the security of your connection before
95 | proceeding.
96 |
97 |
98 | Did you knowbots
99 | historically made up nearly 40% of all internet traffic?
100 |
101 |
103 |
108 |
Requests from malicious bots can
109 | pose as legitimate traffic. Occasionally, you may see this page while the site ensures that the
110 | connection is secure.
111 |
112 |
113 |
Connection
115 | is secure
116 |
Proceeding...
117 |
118 |
129 |
130 |
131 |
151 |
152 |
163 |
164 |
165 |
166 |
167 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "flaresolverr",
3 | "version": "3.0.2",
4 | "description": "Proxy server to bypass Cloudflare protection",
5 | "author": "Diego Heras (ngosang / ngosang@hotmail.es)",
6 | "license": "MIT"
7 | }
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | bottle==0.12.23
2 | waitress==2.1.2
3 | selenium==4.7.2
4 | func-timeout==4.3.5
5 | # required by undetected_chromedriver
6 | requests==2.28.1
7 | websockets==10.4
8 | # only required for linux
9 | xvfbwrapper==0.2.9
10 |
--------------------------------------------------------------------------------
/resources/flaresolverr_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rawandahmad698/pyCFSolver/b425a5de0945006a95c80c6f5479f14a24990587/resources/flaresolverr_logo.png
--------------------------------------------------------------------------------
/resources/flaresolverr_logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
181 |
--------------------------------------------------------------------------------
/src/bottle_plugins/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rawandahmad698/pyCFSolver/b425a5de0945006a95c80c6f5479f14a24990587/src/bottle_plugins/__init__.py
--------------------------------------------------------------------------------
/src/bottle_plugins/error_plugin.py:
--------------------------------------------------------------------------------
1 | from bottle import response
2 | import logging
3 |
4 |
5 | def error_plugin(callback):
6 | """
7 | Bottle plugin to handle exceptions
8 | https://stackoverflow.com/a/32764250
9 | """
10 |
11 | def wrapper(*args, **kwargs):
12 | try:
13 | actual_response = callback(*args, **kwargs)
14 | except Exception as e:
15 | logging.error(str(e))
16 | actual_response = {
17 | "error": str(e)
18 | }
19 | response.status = 500
20 | return actual_response
21 |
22 | return wrapper
23 |
--------------------------------------------------------------------------------
/src/bottle_plugins/logger_plugin.py:
--------------------------------------------------------------------------------
1 | from bottle import request, response
2 | import logging
3 |
4 |
5 | def logger_plugin(callback):
6 | """
7 | Bottle plugin to use logging module
8 | http://bottlepy.org/docs/dev/plugindev.html
9 |
10 | Wrap a Bottle request so that a log line is emitted after it's handled.
11 | (This decorator can be extended to take the desired logger as a param.)
12 | """
13 |
14 | def wrapper(*args, **kwargs):
15 | actual_response = callback(*args, **kwargs)
16 | if not request.url.endswith("/health"):
17 | logging.info('%s %s %s %s' % (request.remote_addr,
18 | request.method,
19 | request.url,
20 | response.status))
21 | return actual_response
22 |
23 | return wrapper
24 |
--------------------------------------------------------------------------------
/src/dtos.py:
--------------------------------------------------------------------------------
1 |
2 | STATUS_OK = "ok"
3 | STATUS_ERROR = "error"
4 |
5 |
6 | class ChallengeResolutionResultT:
7 | url: str = None
8 | status: int = None
9 | headers: list = None
10 | response: str = None
11 | cookies: list = None
12 | userAgent: str = None
13 |
14 | def __init__(self, _dict):
15 | self.__dict__.update(_dict)
16 |
17 |
18 | class ChallengeResolutionT:
19 | status: str = None
20 | message: str = None
21 | result: ChallengeResolutionResultT = None
22 |
23 | def __init__(self, _dict):
24 | self.__dict__.update(_dict)
25 | if self.result is not None:
26 | self.result = ChallengeResolutionResultT(self.result)
27 |
28 |
29 | class V1RequestBase(object):
30 | # V1RequestBase
31 | cmd: str = None
32 | cookies: list = None
33 | maxTimeout: int = None
34 | proxy: dict = None
35 | session_ttl_minutes: int = None
36 | headless: bool = None
37 | delay: int = None
38 | beta_args: bool = None
39 | session: str = None
40 | headers: list = None # deprecated v2.0.0, not used
41 | userAgent: str = None # deprecated v2.0.0, not used
42 |
43 | # V1Request
44 | url: str = None
45 | postData: str = None
46 | returnOnlyCookies: bool = None
47 | download: bool = None # deprecated v2.0.0, not used
48 | returnRawHtml: bool = None # deprecated v2.0.0, not used
49 |
50 | def __init__(self, _dict):
51 | self.__dict__.update(_dict)
52 |
53 |
54 | class V1ResponseBase(object):
55 | # V1ResponseBase
56 | status: str = None
57 | message: str = None
58 | startTimestamp: int = None
59 | endTimestamp: int = None
60 | version: str = None
61 |
62 | # V1ResponseSolution
63 | solution: ChallengeResolutionResultT = None
64 |
65 | # hidden vars
66 | __error_500__: bool = False
67 |
68 | def __init__(self, _dict):
69 | self.__dict__.update(_dict)
70 | if self.solution is not None:
71 | self.solution = ChallengeResolutionResultT(self.solution)
72 |
73 |
74 | class IndexResponse(object):
75 | msg: str = None
76 | version: str = None
77 | userAgent: str = None
78 |
79 | def __init__(self, _dict):
80 | self.__dict__.update(_dict)
81 |
82 |
83 | class HealthResponse(object):
84 | status: str = None
85 |
86 | def __init__(self, _dict):
87 | self.__dict__.update(_dict)
88 |
--------------------------------------------------------------------------------
/src/flaresolverr.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | import os
4 | import sys
5 |
6 | from bottle import run, response, Bottle, request
7 |
8 | from bottle_plugins.error_plugin import error_plugin
9 | from bottle_plugins.logger_plugin import logger_plugin
10 | from dtos import IndexResponse, V1RequestBase
11 | import flaresolverr_service
12 | import utils
13 |
14 | envi = "prod"
15 |
16 |
17 | class JSONErrorBottle(Bottle):
18 | """
19 | Handle 404 errors
20 | """
21 | def default_error_handler(self, res):
22 | response.content_type = 'application/json'
23 | return json.dumps(dict(error=res.body, status_code=res.status_code))
24 |
25 |
26 | app = JSONErrorBottle()
27 |
28 | # plugin order is important
29 | app.install(logger_plugin)
30 | app.install(error_plugin)
31 |
32 |
33 | @app.route('/')
34 | def index():
35 | """
36 | Show welcome message
37 | """
38 | res = flaresolverr_service.index_endpoint()
39 | return utils.object_to_dict(res)
40 |
41 |
42 | @app.route('/health')
43 | def health():
44 | """
45 | Healthcheck endpoint.
46 | This endpoint is special because it doesn't print traces
47 | """
48 | res = flaresolverr_service.health_endpoint()
49 | return utils.object_to_dict(res)
50 |
51 |
52 | @app.post('/v1')
53 | def controller_v1():
54 | """
55 | Controller v1
56 | """
57 | req = V1RequestBase(request.json)
58 | res = flaresolverr_service.controller_v1_endpoint(req)
59 | if res.__error_500__:
60 | response.status = 500
61 | return utils.object_to_dict(res)
62 |
63 |
64 | if __name__ == "__main__":
65 | # validate configuration
66 | log_level = os.environ.get('LOG_LEVEL', 'info').upper()
67 | log_html = utils.get_config_log_html()
68 | headless = utils.get_config_headless()
69 | server_host = os.environ.get('HOST', '0.0.0.0')
70 | port = int(os.environ.get('PORT', 8191)) if envi == "dev" else 8192
71 | server_port = port
72 |
73 | # configure logger
74 | logger_format = '%(asctime)s %(levelname)-8s %(message)s'
75 | if log_level == 'DEBUG':
76 | logger_format = '%(asctime)s %(levelname)-8s ReqId %(thread)s %(message)s'
77 |
78 | logging.basicConfig(
79 | format=logger_format,
80 | level=log_level,
81 | datefmt='%Y-%m-%d %H:%M:%S',
82 | handlers=[
83 | logging.StreamHandler(sys.stdout)
84 | ]
85 | )
86 | # disable warning traces from urllib3
87 | logging.getLogger('urllib3').setLevel(logging.ERROR)
88 | logging.getLogger('selenium.webdriver.remote.remote_connection').setLevel(logging.WARNING)
89 | logging.getLogger('undetected_chromedriver').setLevel(logging.WARNING)
90 |
91 | logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}')
92 | logging.debug('Debug log enabled')
93 |
94 | # test browser installation
95 | flaresolverr_service.test_browser_installation()
96 |
97 | # start webserver
98 | # default server 'wsgiref' does not support concurrent requests
99 | run(app, host=server_host, port=server_port, quiet=True, server='waitress')
100 |
--------------------------------------------------------------------------------
/src/flaresolverr_service.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import platform
3 | import sys
4 | import time
5 | import traceback
6 | from datetime import timedelta
7 | from urllib.parse import unquote
8 |
9 | from func_timeout import FunctionTimedOut, func_timeout
10 | # Import timeout exception from selenium
11 | from selenium.common.exceptions import TimeoutException
12 | from selenium.webdriver.chrome.webdriver import WebDriver
13 | from selenium.webdriver.common.by import By
14 | from selenium.webdriver.support.expected_conditions import (
15 | presence_of_element_located, staleness_of, title_is)
16 | from selenium.webdriver.common.action_chains import ActionChains
17 | from selenium.webdriver.support.wait import WebDriverWait
18 |
19 | import utils
20 | from dtos import (STATUS_ERROR, STATUS_OK, ChallengeResolutionResultT,
21 | ChallengeResolutionT, HealthResponse, IndexResponse,
22 | V1RequestBase, V1ResponseBase)
23 |
24 | from flaresolverr import envi
25 |
26 | from sessions import SessionsStorage
27 |
28 | ACCESS_DENIED_TITLES = [
29 | # Cloudflare
30 | 'Access denied',
31 | # Cloudflare http://bitturk.net/ Firefox
32 | 'Attention Required! | Cloudflare'
33 | ]
34 | ACCESS_DENIED_SELECTORS = [
35 | # Cloudflare
36 | 'div.cf-error-title span.cf-code-label span',
37 | # Cloudflare http://bitturk.net/ Firefox
38 | '#cf-error-details div.cf-error-overview h1'
39 | ]
40 | CHALLENGE_TITLES = [
41 | # Cloudflare
42 | 'Just a moment...',
43 | # DDoS-GUARD
44 | 'DDoS-Guard'
45 | ]
46 | CHALLENGE_SELECTORS = [
47 | # Cloudflare
48 | '#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js',
49 | # Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
50 | 'td.info #js_info',
51 | # Fairlane / pararius.com
52 | 'div.vc div.text-box h2'
53 | ]
54 | SHORT_TIMEOUT = 10
55 | SESSIONS_STORAGE = SessionsStorage()
56 |
57 |
58 | def test_browser_installation():
59 | logging.info("Testing web browser installation...")
60 | logging.info("Platform: " + platform.platform())
61 |
62 | chrome_exe_path = utils.get_chrome_exe_path()
63 | if chrome_exe_path is None:
64 | logging.error("Chrome / Chromium web browser not installed!")
65 | sys.exit(1)
66 | else:
67 | logging.info("Chrome / Chromium path: " + chrome_exe_path)
68 |
69 | chrome_major_version = utils.get_chrome_major_version()
70 | if chrome_major_version == '':
71 | logging.error("Chrome / Chromium version not detected!")
72 | sys.exit(1)
73 | else:
74 | logging.info("Chrome / Chromium major version: " + chrome_major_version)
75 |
76 | logging.info("Launching web browser...")
77 | user_agent = utils.get_user_agent()
78 | logging.info("FlareSolverr User-Agent: " + user_agent)
79 | logging.info("Test successful!")
80 |
81 |
82 | def index_endpoint() -> IndexResponse:
83 | res = IndexResponse({})
84 | res.msg = "FlareSolverr is ready!"
85 | res.version = utils.get_flaresolverr_version()
86 | res.userAgent = utils.get_user_agent()
87 | return res
88 |
89 |
90 | def health_endpoint() -> HealthResponse:
91 | res = HealthResponse({})
92 | res.status = STATUS_OK
93 | return res
94 |
95 |
96 | def controller_v1_endpoint(req: V1RequestBase) -> V1ResponseBase:
97 | start_ts = int(time.time() * 1000)
98 | logging.info(f"Incoming request => POST /v1 body: {utils.object_to_dict(req)}")
99 | res: V1ResponseBase
100 |
101 | if envi == "dev":
102 | req.headless = True
103 |
104 | try:
105 | res = _controller_v1_handler(req)
106 | except Exception as e:
107 | res = V1ResponseBase({})
108 | res.__error_500__ = True
109 | res.status = STATUS_ERROR
110 | res.message = "X=Error: " + str(e)
111 | logging.error(res.message)
112 | # Get the traceback and log it
113 | tb = traceback.format_exc()
114 | print(tb)
115 |
116 | res.startTimestamp = start_ts
117 | res.endTimestamp = int(time.time() * 1000)
118 | res.version = utils.get_flaresolverr_version()
119 | logging.debug(f"Response => POST /v1 body: {utils.object_to_dict(res)}")
120 | logging.info(f"Response in {(res.endTimestamp - res.startTimestamp) / 1000} s")
121 | return res
122 |
123 |
124 | def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase:
125 | # do some validations
126 | if req.cmd is None:
127 | raise Exception("Request parameter 'cmd' is mandatory.")
128 | if req.headers is not None:
129 | logging.warning("Request parameter 'headers' was removed in FlareSolverr v2.")
130 | if req.userAgent is not None:
131 | logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.")
132 |
133 | # set default values
134 | if req.maxTimeout is None or req.maxTimeout < 1:
135 | req.maxTimeout = 60000
136 |
137 | # execute the command
138 | res: V1ResponseBase
139 | if req.cmd == 'sessions.create':
140 | res = _cmd_sessions_create(req)
141 | elif req.cmd == 'sessions.list':
142 | res = _cmd_sessions_list(req)
143 | elif req.cmd == 'sessions.destroy':
144 | res = _cmd_sessions_destroy(req)
145 | elif req.cmd == 'request.get':
146 | res = _cmd_request_get(req)
147 | elif req.cmd == 'request.post':
148 | res = _cmd_request_post(req)
149 | else:
150 | raise Exception(f"Request parameter 'cmd' = '{req.cmd}' is invalid.")
151 |
152 | return res
153 |
154 |
155 | def _cmd_request_get(req: V1RequestBase) -> V1ResponseBase:
156 | # do some validations
157 | if req.url is None:
158 | raise Exception("Request parameter 'url' is mandatory in 'request.get' command.")
159 | if req.postData is not None:
160 | raise Exception("Cannot use 'postBody' when sending a GET request.")
161 | if req.returnRawHtml is not None:
162 | logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
163 | if req.download is not None:
164 | logging.warning("Request parameter 'download' was removed in FlareSolverr v2.")
165 |
166 | if req.headless:
167 | logging.info("Headless mode is enabled.")
168 |
169 | challenge_res = _resolve_challenge(req, 'GET')
170 | if challenge_res is None:
171 | res = V1ResponseBase({})
172 | res.status = STATUS_ERROR
173 | res.message = "An error occurred while resolving the challenge."
174 | return res
175 |
176 | res = V1ResponseBase({})
177 | res.status = challenge_res.status
178 | res.message = challenge_res.message
179 | res.solution = challenge_res.result
180 | return res
181 |
182 |
183 | def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase:
184 | # do some validations
185 | if req.postData is None:
186 | raise Exception("Request parameter 'postData' is mandatory in 'request.post' command.")
187 | if req.returnRawHtml is not None:
188 | logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
189 | if req.download is not None:
190 | logging.warning("Request parameter 'download' was removed in FlareSolverr v2.")
191 |
192 | challenge_res = _resolve_challenge(req, 'POST')
193 | res = V1ResponseBase({})
194 | res.status = challenge_res.status
195 | res.message = challenge_res.message
196 | res.solution = challenge_res.result
197 | return res
198 |
199 |
200 | def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase:
201 | logging.debug("Creating new session...")
202 | session, fresh = SESSIONS_STORAGE.create(req=req, session_id=req.session)
203 | session_id = session.session_id
204 |
205 | if not fresh:
206 | return V1ResponseBase({
207 | "status": STATUS_OK,
208 | "message": "Session already exists.",
209 | "session": session_id
210 | })
211 |
212 | return V1ResponseBase({
213 | "status": STATUS_OK,
214 | "message": "Session created successfully.",
215 | "session": session_id
216 | })
217 |
218 |
219 | def _cmd_sessions_list(req: V1RequestBase) -> V1ResponseBase:
220 | session_ids = SESSIONS_STORAGE.session_ids()
221 |
222 | return V1ResponseBase({
223 | "status": STATUS_OK,
224 | "message": "",
225 | "sessions": session_ids
226 | })
227 |
228 |
229 | def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase:
230 | try:
231 | session_id = req.session
232 | except Exception as e:
233 | print("Error: " + str(e))
234 | return V1ResponseBase({
235 | "status": STATUS_ERROR,
236 | "message": str(e)
237 | })
238 |
239 | existed = SESSIONS_STORAGE.destroy(session_id)
240 |
241 | if not existed:
242 | raise Exception("The session doesn't exist.")
243 |
244 | logging.info(f"Session destroyed (session_id={session_id})")
245 |
246 | return V1ResponseBase({
247 | "status": STATUS_OK,
248 | "message": "The session has been removed."
249 | })
250 |
251 |
252 | def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
253 | timeout = req.maxTimeout / 1000
254 | driver = None
255 | try:
256 | if req.session:
257 | session_id = req.session
258 | ttl = timedelta(minutes=req.session_ttl_minutes) if req.session_ttl_minutes else None
259 | logging.debug(f"Trying to get session (session_id={session_id}, ttl={str(ttl)})")
260 | isb = session_id in SESSIONS_STORAGE.is_being_created
261 | if isb:
262 | # If you want to not wait for the session to be created, uncomment the following lines
263 | # logging.error(f"Session is being created (session_id={session_id})")
264 | # res = ChallengeResolutionT({})
265 | # res.status = STATUS_ERROR
266 | # res.message = "Session is being created...."
267 | # return res
268 | logging.info(f"Waiting for session to be created (session_id={session_id})")
269 |
270 | session, fresh = SESSIONS_STORAGE.get(session_id, ttl, req=req)
271 | if fresh:
272 | logging.info(f"new session created to perform the request (session_id={session_id})")
273 | else:
274 | logging.info(f"existing session is used to perform the request (session_id={session_id}, "
275 | f"lifetime={str(session.lifetime())}, ttl={str(ttl)})")
276 |
277 | driver = session.driver
278 | else:
279 | driver = utils.get_webdriver(req=req)
280 | logging.debug('New instance of webdriver has been created to perform the request')
281 | return func_timeout(timeout, _evil_logic, (req, driver, method))
282 | except FunctionTimedOut:
283 | raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.')
284 | except Exception as e:
285 | # Get error line number
286 | tb = traceback.format_exc()
287 | line_number = tb.split('File')[1].split(',')[1].split(')')[0]
288 | print(f'Error on line {line_number} in file {__file__}')
289 | # raise Exception('Error solving the challenge. ' + str(e))
290 | print('Error solving the challenge. ' + str(e))
291 | print(f'Traceback: {tb}')
292 | finally:
293 | if not req.session:
294 | if driver:
295 | driver.quit()
296 | else:
297 | logging.debug('No instance of webdriver has been created to perform the request')
298 | logging.debug('A used instance of webdriver has been destroyed')
299 |
300 |
301 | def click_verify(driver: WebDriver):
302 | try:
303 | logging.debug("Try to find the Cloudflare verify checkbox")
304 | iframe = driver.find_element(By.XPATH, "//iframe[@title='Widget containing a Cloudflare security challenge']")
305 | driver.switch_to.frame(iframe)
306 | checkbox = driver.find_element(
307 | by=By.XPATH,
308 | value='//*[@id="challenge-stage"]/div/label/input',
309 | )
310 | if checkbox:
311 | actions = ActionChains(driver)
312 | actions.move_to_element_with_offset(checkbox, 5, 7)
313 | actions.click(checkbox)
314 | actions.perform()
315 | logging.debug("Cloudflare verify checkbox found and clicked")
316 | except Exception as e:
317 | logging.exception(e)
318 | logging.debug("Cloudflare verify checkbox not found on the page")
319 | finally:
320 | driver.switch_to.default_content()
321 |
322 | try:
323 | logging.debug("Try to find the Cloudflare 'Verify you are human' button")
324 | button = driver.find_element(
325 | by=By.XPATH,
326 | value="//input[@type='button' and @value='Verify you are human']",
327 | )
328 | if button:
329 | actions = ActionChains(driver)
330 | actions.move_to_element_with_offset(button, 5, 7)
331 | actions.click(button)
332 | actions.perform()
333 | logging.debug("The Cloudflare 'Verify you are human' button found and clicked")
334 | except Exception as e:
335 | logging.debug("The Cloudflare 'Verify you are human' button not found on the page")
336 | # print(e)
337 |
338 | time.sleep(2)
339 |
340 |
341 | def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT:
342 | res = ChallengeResolutionT({})
343 | res.status = STATUS_OK
344 | res.message = ""
345 |
346 | # navigate to the page
347 | logging.debug(f'Navigating to... {req.url}')
348 | if method == 'POST':
349 | _post_request(req, driver)
350 | else:
351 | driver.get(req.url)
352 | if utils.get_config_log_html():
353 | logging.debug(f"Response HTML:\n{driver.page_source}")
354 |
355 | if req.delay:
356 | if isinstance(req.delay, int):
357 | time.sleep(req.delay)
358 | else:
359 | time.sleep(int(req.delay))
360 |
361 | # wait for the page
362 | html_element = driver.find_element(By.TAG_NAME, "html")
363 | page_title = driver.title
364 |
365 | # find access denied titles
366 | for title in ACCESS_DENIED_TITLES:
367 | if title == page_title:
368 | logging.info("Access denied detected. Refreshing page...")
369 | # refresh page
370 | driver.refresh()
371 | time.sleep(3)
372 | page_title = driver.title
373 | if title == page_title:
374 | raise Exception('Cloudflare has blocked this request. '
375 | 'Probably your IP is banned for this site, check in your web browser.')
376 |
377 | # find access denied selectors
378 | for selector in ACCESS_DENIED_SELECTORS:
379 | found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
380 | if len(found_elements) > 0:
381 | raise Exception('Cloudflare has blocked this request. '
382 | 'Probably your IP is banned for this site, check in your web browser.')
383 |
384 | # find challenge by title
385 | challenge_found = False
386 | for title in CHALLENGE_TITLES:
387 | if len(page_title) == 0:
388 | challenge_found = True
389 | logging.info("Challenge detected. Title is empty")
390 | break
391 | if title.lower() == page_title.lower():
392 | challenge_found = True
393 | logging.info("Challenge detected. Title found: " + page_title)
394 | break
395 |
396 | if not challenge_found:
397 | # find challenge by selectors
398 | for selector in CHALLENGE_SELECTORS:
399 | found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
400 | if len(found_elements) > 0:
401 | challenge_found = True
402 | logging.info("Challenge detected. Selector found: " + selector)
403 | break
404 |
405 | attempt = 0
406 | if challenge_found:
407 | while True:
408 | try:
409 | attempt = attempt + 1
410 | # wait until the title changes
411 | for title in CHALLENGE_TITLES:
412 | logging.debug("Waiting for title (attempt " + str(attempt) + "): " + title)
413 | WebDriverWait(driver, SHORT_TIMEOUT).until_not(title_is(title))
414 |
415 | # then wait until all the selectors disappear
416 | for selector in CHALLENGE_SELECTORS:
417 | logging.debug("Waiting for selector (attempt " + str(attempt) + "): " + selector)
418 | WebDriverWait(driver, SHORT_TIMEOUT).until_not(
419 | presence_of_element_located((By.CSS_SELECTOR, selector)))
420 |
421 | # all elements not found
422 | break
423 |
424 | except TimeoutException:
425 | logging.debug("Timeout waiting for selector")
426 |
427 | click_verify(driver)
428 |
429 | # update the html (cloudflare reloads the page every 5 s)
430 | html_element = driver.find_element(By.TAG_NAME, "html")
431 |
432 | # waits until cloudflare redirection ends
433 | logging.debug("Waiting for redirect")
434 | # noinspection PyBroadException
435 | try:
436 | WebDriverWait(driver, SHORT_TIMEOUT).until(staleness_of(html_element))
437 | except Exception:
438 | logging.debug("Timeout waiting for redirect")
439 |
440 | logging.info("Challenge solved!")
441 | res.message = "Challenge solved!"
442 | else:
443 | logging.info("Challenge not detected!")
444 | res.message = "Challenge not detected!"
445 |
446 | challenge_res = ChallengeResolutionResultT({})
447 | challenge_res.url = driver.current_url
448 | challenge_res.status = 200 # todo: fix, selenium not provides this info
449 | challenge_res.cookies = driver.get_cookies()
450 | challenge_res.userAgent = utils.get_user_agent(driver)
451 |
452 | if not req.returnOnlyCookies:
453 | challenge_res.headers = {} # todo: fix, selenium not provides this info
454 | challenge_res.response = driver.page_source
455 |
456 | res.result = challenge_res
457 | return res
458 |
459 |
460 | def _post_request(req: V1RequestBase, driver: WebDriver):
461 | post_form = f''
480 | html_content = f"""
481 |
482 |
483 |
484 | {post_form}
485 |
486 |
487 | """
488 | driver.get("data:text/html;charset=utf-8," + html_content)
489 |
--------------------------------------------------------------------------------
/src/sessions.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from dataclasses import dataclass
3 | from datetime import datetime, timedelta
4 | from typing import Optional, Tuple
5 | from uuid import uuid1
6 |
7 | from selenium.webdriver.chrome.webdriver import WebDriver
8 |
9 | import utils
10 | from dtos import V1RequestBase
11 |
12 |
13 | @dataclass
14 | class Session:
15 | session_id: str
16 | driver: WebDriver
17 | created_at: datetime
18 |
19 | def lifetime(self) -> timedelta:
20 | return datetime.now() - self.created_at
21 |
22 |
23 | class SessionsStorage:
24 | """SessionsStorage creates, stores and process all the sessions"""
25 |
26 | def __init__(self):
27 | # Self.sessions is a set of dictionaries with the following structure:
28 | self.sessions = {}
29 | self.drivers = {}
30 | self.real_sessions = []
31 | self.is_being_created = []
32 |
33 | def create(self, req: V1RequestBase = None, session_id: Optional[str] = None, force_new: Optional[bool] = False) -> Tuple[Session, bool]:
34 | """create creates new instance of WebDriver if necessary,
35 | assign defined (or newly generated) session_id to the instance
36 | and returns the session object. If a new session has been created
37 | second argument is set to True.
38 |
39 | Note: The function is idempotent, so in case if session_id
40 | already exists in the storage a new instance of WebDriver won't be created
41 | and existing session will be returned. Second argument defines if
42 | new session has been created (True) or an existing one was used (False).
43 | """
44 | session_id = session_id or str(uuid1())
45 |
46 | if force_new:
47 | self.destroy(session_id)
48 |
49 | if self.exists(session_id):
50 | return self.sessions[session_id], False
51 |
52 | # Try to add it to the real sessions
53 | if session_id not in self.real_sessions:
54 | self.real_sessions.append(session_id)
55 | self.is_being_created.append(session_id)
56 | else:
57 | if session_id in self.is_being_created:
58 | # Wait for it to be created
59 | while session_id in self.is_being_created:
60 | pass
61 |
62 | if session_id in self.sessions:
63 | return self.sessions[session_id], False
64 | else:
65 | self.is_being_created.append(session_id)
66 |
67 | if req is not None:
68 | driver = utils.get_webdriver(req)
69 | else:
70 | driver = utils.get_webdriver()
71 |
72 | created_at = datetime.now()
73 | session = Session(session_id, driver, created_at)
74 |
75 | self.sessions[session_id] = session
76 | self.drivers[session_id] = driver
77 |
78 | self.is_being_created.remove(session_id)
79 |
80 | return session, True
81 |
82 | def exists(self, session_id: str) -> bool:
83 | return session_id in self.sessions
84 |
85 | def destroy(self, session_id: str) -> bool:
86 | """destroy closes the driver instance and removes session from the storage.
87 | The function is noop if session_id doesn't exist.
88 | The function returns True if session was found and destroyed,
89 | and False if session_id wasn't found.
90 | """
91 | if not self.exists(session_id):
92 | return False
93 |
94 | self.sessions.pop(session_id)
95 |
96 | # Check if session_id is in the drivers dict
97 | if session_id in self.drivers:
98 | driver = self.drivers.pop(session_id)
99 | driver.quit()
100 | del driver
101 |
102 | if session_id in self.real_sessions:
103 | self.real_sessions.remove(session_id)
104 |
105 | return True
106 |
107 | def get(self, session_id: str, ttl: Optional[timedelta] = None, req: V1RequestBase = None) -> Tuple[Session, bool]:
108 | session, fresh = self.create(session_id=session_id)
109 |
110 | if ttl is not None and not fresh and session.lifetime() > ttl:
111 | # logging.debug(session\'s lifetime has expired, so the session is recreated (session_id={session_id})')
112 | logging.info(f'Session\'s lifetime has expired, so the session is being recreated (session_id={session_id})')
113 | session, fresh = self.create(req=req, session_id=session_id, force_new=True)
114 |
115 | return session, fresh
116 |
117 | def session_ids(self) -> list[str]:
118 | return list(self.sessions.keys())
119 |
--------------------------------------------------------------------------------
/src/tests.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import json
3 | import threading
4 | import time
5 |
6 | import requests
7 |
8 | def _post_json(json_data):
9 | url = "http://localhost:8192/v1"
10 |
11 | payload = json.dumps(json_data)
12 | headers = {
13 | 'Content-Type': 'application/json'
14 | }
15 | return requests.request("POST", url, headers=headers, data=payload)
16 |
17 |
18 | def test_session_create():
19 | session_count = 3
20 | for i in range(session_count):
21 | cmd = {
22 | "cmd": "sessions.create",
23 | "session": "1",
24 | "url": "http://www.soleretriever.com",
25 | "maxTimeout": 60000,
26 | "headless": True
27 | }
28 | threading.Thread(target=_post_json, args=(cmd,)).start()
29 |
30 | print(">> Created", session_count, "sessions.")
31 |
32 | def test_browser_request():
33 | cmd = {
34 | "cmd": "request.get",
35 | "url": "http://www.google.com",
36 | "session": "1",
37 | "maxTimeout": 60000,
38 | "headless": True,
39 | "returnOnlyCookies": True
40 | }
41 | response = _post_json(cmd)
42 | print(response.text)
43 |
44 | def test_ttl():
45 | time_now = datetime.datetime.now()
46 | cmd = {
47 | "cmd": "request.get",
48 | "session": "1",
49 | "url":"http://www.google.com",
50 | "session_ttl_minutes": 1,
51 | "maxTimeout": 60000,
52 | "headless": True,
53 | "returnOnlyCookies": True
54 | }
55 | while True:
56 | threading.Thread(target=_post_json, args=(cmd,)).start()
57 | time.sleep(10)
58 | print(">>", (datetime.datetime.now() - time_now).seconds, "seconds elapsed")
59 | if (datetime.datetime.now() - time_now).seconds > 80:
60 | break
61 |
62 | if __name__ == "__main__":
63 | test_session_create()
64 | test_browser_request()
65 | test_ttl()
--------------------------------------------------------------------------------
/src/tests_sites.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from webtest import TestApp
4 |
5 | from dtos import V1ResponseBase, STATUS_OK
6 | import flaresolverr
7 | import utils
8 |
9 |
10 | def _find_obj_by_key(key: str, value: str, _list: list) -> dict | None:
11 | for obj in _list:
12 | if obj[key] == value:
13 | return obj
14 | return None
15 |
16 |
17 | def asset_cloudflare_solution(self, res, site_url, site_text):
18 | self.assertEqual(res.status_code, 200)
19 |
20 | body = V1ResponseBase(res.json)
21 | self.assertEqual(STATUS_OK, body.status)
22 | self.assertEqual("Challenge solved!", body.message)
23 | self.assertGreater(body.startTimestamp, 10000)
24 | self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
25 | self.assertEqual(utils.get_flaresolverr_version(), body.version)
26 |
27 | solution = body.solution
28 | self.assertIn(site_url, solution.url)
29 | self.assertEqual(solution.status, 200)
30 | self.assertIs(len(solution.headers), 0)
31 | self.assertIn(site_text, solution.response)
32 | self.assertGreater(len(solution.cookies), 0)
33 | self.assertIn("Chrome/", solution.userAgent)
34 |
35 | cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies)
36 | self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found")
37 | self.assertGreater(len(cf_cookie["value"]), 30)
38 |
39 |
40 | class TestFlareSolverr(unittest.TestCase):
41 | app = TestApp(flaresolverr.app)
42 |
43 | def test_v1_endpoint_request_get_cloudflare(self):
44 | sites_get = [
45 | ('nowsecure', 'https://nowsecure.nl', 'nowSecure'),
46 | ('0magnet', 'https://0magnet.com/search?q=2022', 'Torrent Search - ØMagnet'),
47 | ('1337x', 'https://1337x.unblockit.cat/cat/Movies/time/desc/1/', ''),
48 | ('avistaz', 'https://avistaz.to/api/v1/jackett/torrents?in=1&type=0&search=',
49 | 'Access denied'),
50 | ('badasstorrents', 'https://badasstorrents.com/torrents/search/720p/date/desc',
51 | 'Latest Torrents - BadassTorrents'),
52 | ('bt4g', 'https://bt4g.org/search/2022', 'Download 2022 Torrents - BT4G'),
53 | ('cinemaz', 'https://cinemaz.to/api/v1/jackett/torrents?in=1&type=0&search=',
54 | 'Access denied'),
55 | ('epublibre', 'https://epublibre.unblockit.cat/catalogo/index/0/nuevo/todos/sin/todos/--/ajax',
56 | 'epublibre - catálogo'),
57 | ('ext', 'https://ext.to/latest/?order=age&sort=desc',
58 | 'Download Latest Torrents - EXT Torrents'),
59 | ('extratorrent', 'https://extratorrent.st/search/?srt=added&order=desc&search=720p&new=1&x=0&y=0',
60 | 'Page 1 - ExtraTorrent'),
61 | ('idope', 'https://idope.se/browse.html', 'Recent Torrents'),
62 | ('limetorrents', 'https://limetorrents.unblockninja.com/latest100',
63 | 'Latest 100 torrents - LimeTorrents'),
64 | ('privatehd', 'https://privatehd.to/api/v1/jackett/torrents?in=1&type=0&search=',
65 | 'Access denied'),
66 | ('torrentcore', 'https://torrentcore.xyz/index', 'Torrent[CORE] - Torrent community.'),
67 | ('torrentqq223', 'https://torrentqq223.com/torrent/newest.html', 'https://torrentqq223.com/ads/'),
68 | ('36dm', 'https://www.36dm.club/1.html', 'https://www.36dm.club/yesterday-1.html'),
69 | ('erai-raws', 'https://www.erai-raws.info/feed/?type=magnet', '403 Forbidden'),
70 | ('teamos', 'https://www.teamos.xyz/torrents/?filename=&freeleech=',
71 | 'Log in | Team OS : Your Only Destination To Custom OS !!'),
72 | ('yts', 'https://yts.unblockninja.com/api/v2/list_movies.json?query_term=&limit=50&sort=date_added',
73 | '{"movie_count":')
74 | ]
75 | for site_name, site_url, site_text in sites_get:
76 | with self.subTest(msg=site_name):
77 | res = self.app.post_json('/v1', {
78 | "cmd": "request.get",
79 | "url": site_url
80 | })
81 | asset_cloudflare_solution(self, res, site_url, site_text)
82 |
83 | def test_v1_endpoint_request_post_cloudflare(self):
84 | sites_post = [
85 | ('nnmclub', 'https://nnmclub.to/forum/tracker.php', 'Трекер :: NNM-Club',
86 | 'prev_sd=0&prev_a=0&prev_my=0&prev_n=0&prev_shc=0&prev_shf=1&prev_sha=1&prev_shs=0&prev_shr=0&prev_sht=0&f%5B%5D=-1&o=1&s=2&tm=-1&shf=1&sha=1&ta=-1&sns=-1&sds=-1&nm=&pn=&submit=%CF%EE%E8%F1%EA')
87 | ]
88 |
89 | for site_name, site_url, site_text, post_data in sites_post:
90 | with self.subTest(msg=site_name):
91 | res = self.app.post_json('/v1', {
92 | "cmd": "request.post",
93 | "url": site_url,
94 | "postData": post_data
95 | })
96 | asset_cloudflare_solution(self, res, site_url, site_text)
97 |
98 |
99 | if __name__ == '__main__':
100 | unittest.main()
101 |
--------------------------------------------------------------------------------
/src/undetected_chromedriver/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """
4 |
5 | 888 888 d8b
6 | 888 888 Y8P
7 | 888 888
8 | .d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888
9 | d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P"
10 | 888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888
11 | Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888
12 | "Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888
13 |
14 | by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
15 |
16 | """
17 | from __future__ import annotations
18 |
19 | __version__ = "3.5.3"
20 |
21 | import json
22 | import logging
23 | import os
24 | import pathlib
25 | import re
26 | import shutil
27 | import subprocess
28 | import sys
29 | import tempfile
30 | import time
31 | from weakref import finalize
32 |
33 | import selenium.webdriver.chrome.service
34 | import selenium.webdriver.chrome.webdriver
35 | from selenium.webdriver.common.by import By
36 | import selenium.webdriver.chromium.service
37 | import selenium.webdriver.remote.command
38 | import selenium.webdriver.remote.webdriver
39 |
40 | from .cdp import CDP
41 | from .dprocess import start_detached
42 | from .options import ChromeOptions
43 | from .patcher import IS_POSIX
44 | from .patcher import Patcher
45 | from .reactor import Reactor
46 | from .webelement import UCWebElement
47 | from .webelement import WebElement
48 |
49 | __all__ = (
50 | "Chrome",
51 | "ChromeOptions",
52 | "Patcher",
53 | "Reactor",
54 | "CDP",
55 | "find_chrome_executable",
56 | )
57 |
58 | logger = logging.getLogger("uc")
59 | logger.setLevel(logging.getLogger().getEffectiveLevel())
60 |
61 |
62 | class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
63 | """
64 |
65 | Controls the ChromeDriver and allows you to drive the browser.
66 |
67 | The webdriver file will be downloaded by this module automatically,
68 | you do not need to specify this. however, you may if you wish.
69 |
70 | Attributes
71 | ----------
72 |
73 | Methods
74 | -------
75 |
76 | reconnect()
77 |
78 | this can be useful in case of heavy detection methods
79 | -stops the chromedriver service which runs in the background
80 | -starts the chromedriver service which runs in the background
81 | -recreate session
82 |
83 |
84 | start_session(capabilities=None, browser_profile=None)
85 |
86 | differentiates from the regular method in that it does not
87 | require a capabilities argument. The capabilities are automatically
88 | recreated from the options at creation time.
89 |
90 | --------------------------------------------------------------------------
91 | NOTE:
92 | Chrome has everything included to work out of the box.
93 | it does not `need` customizations.
94 | any customizations MAY lead to trigger bot migitation systems.
95 |
96 | --------------------------------------------------------------------------
97 | """
98 |
99 | _instances = set()
100 | session_id = None
101 | debug = False
102 |
103 | def __init__(
104 | self,
105 | options=None,
106 | user_data_dir=None,
107 | driver_executable_path=None,
108 | browser_executable_path=None,
109 | port=0,
110 | enable_cdp_events=False,
111 | # service_args=None,
112 | # service_creationflags=None,
113 | desired_capabilities=None,
114 | advanced_elements=False,
115 | # service_log_path=None,
116 | keep_alive=True,
117 | log_level=0,
118 | headless=False,
119 | version_main=None,
120 | patcher_force_close=False,
121 | suppress_welcome=True,
122 | use_subprocess=True,
123 | debug=False,
124 | no_sandbox=True,
125 | user_multi_procs: bool = False,
126 | **kw,
127 | ):
128 | """
129 | Creates a new instance of the chrome driver.
130 |
131 | Starts the service and then creates new instance of chrome driver.
132 |
133 | Parameters
134 | ----------
135 |
136 | options: ChromeOptions, optional, default: None - automatic useful defaults
137 | this takes an instance of ChromeOptions, mainly to customize browser behavior.
138 | anything other dan the default, for example extensions or startup options
139 | are not supported in case of failure, and can probably lowers your undetectability.
140 |
141 |
142 | user_data_dir: str , optional, default: None (creates temp profile)
143 | if user_data_dir is a path to a valid chrome profile directory, use it,
144 | and turn off automatic removal mechanism at exit.
145 |
146 | driver_executable_path: str, optional, default: None(=downloads and patches new binary)
147 |
148 | browser_executable_path: str, optional, default: None - use find_chrome_executable
149 | Path to the browser executable.
150 | If not specified, make sure the executable's folder is in $PATH
151 |
152 | port: int, optional, default: 0
153 | port to be used by the chromedriver executable, this is NOT the debugger port.
154 | leave it at 0 unless you know what you are doing.
155 | the default value of 0 automatically picks an available port.
156 |
157 | enable_cdp_events: bool, default: False
158 | :: currently for chrome only
159 | this enables the handling of wire messages
160 | when enabled, you can subscribe to CDP events by using:
161 |
162 | driver.add_cdp_listener("Network.dataReceived", yourcallback)
163 | # yourcallback is an callable which accepts exactly 1 dict as parameter
164 |
165 |
166 | service_args: list of str, optional, default: None
167 | arguments to pass to the driver service
168 |
169 | desired_capabilities: dict, optional, default: None - auto from config
170 | Dictionary object with non-browser specific capabilities only, such as "item" or "loggingPref".
171 |
172 | advanced_elements: bool, optional, default: False
173 | makes it easier to recognize elements like you know them from html/browser inspection, especially when working
174 | in an interactive environment
175 |
176 | default webelement repr:
177 |
178 |
179 | advanced webelement repr
180 | )>
181 |
182 | note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and print them, it does take a little more time.
183 |
184 |
185 | service_log_path: str, optional, default: None
186 | path to log information from the driver.
187 |
188 | keep_alive: bool, optional, default: True
189 | Whether to configure ChromeRemoteConnection to use HTTP keep-alive.
190 |
191 | log_level: int, optional, default: adapts to python global log level
192 |
193 | headless: bool, optional, default: False
194 | can also be specified in the options instance.
195 | Specify whether you want to use the browser in headless mode.
196 | warning: this lowers undetectability and not fully supported.
197 |
198 | version_main: int, optional, default: None (=auto)
199 | if you, for god knows whatever reason, use
200 | an older version of Chrome. You can specify it's full rounded version number
201 | here. Example: 87 for all versions of 87
202 |
203 | patcher_force_close: bool, optional, default: False
204 | instructs the patcher to do whatever it can to access the chromedriver binary
205 | if the file is locked, it will force shutdown all instances.
206 | setting it is not recommended, unless you know the implications and think
207 | you might need it.
208 |
209 | suppress_welcome: bool, optional , default: True
210 | a "welcome" alert might show up on *nix-like systems asking whether you want to set
211 | chrome as your default browser, and if you want to send even more data to google.
212 | now, in case you are nag-fetishist, or a diagnostics data feeder to google, you can set this to False.
213 | Note: if you don't handle the nag screen in time, the browser loses it's connection and throws an Exception.
214 |
215 | use_subprocess: bool, optional , default: True,
216 |
217 | False (the default) makes sure Chrome will get it's own process (so no subprocess of chromedriver.exe or python
218 | This fixes a LOT of issues, like multithreaded run, but mst importantly. shutting corectly after
219 | program exits or using .quit()
220 | you should be knowing what you're doing, and know how python works.
221 |
222 | unfortunately, there is always an edge case in which one would like to write an single script with the only contents being:
223 | --start script--
224 | import undetected_chromedriver as uc
225 | d = uc.Chrome()
226 | d.get('https://somesite/')
227 | ---end script --
228 |
229 | and will be greeted with an error, since the program exists before chrome has a change to launch.
230 | in that case you can set this to `True`. The browser will start via subprocess, and will keep running most of times.
231 | ! setting it to True comes with NO support when being detected. !
232 |
233 | no_sandbox: bool, optional, default=True
234 | uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
235 | this option has a default of True since many people seem to run this as root (....) , and chrome does not start
236 | when running as root without using --no-sandbox flag.
237 |
238 | user_multi_procs:
239 | set to true when you are using multithreads/multiprocessing
240 | ensures not all processes are trying to modify a binary which is in use by another.
241 | for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
242 | this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
243 |
244 |
245 | """
246 |
247 | finalize(self, self._ensure_close, self)
248 | self.debug = debug
249 | self.patcher = Patcher(
250 | executable_path=driver_executable_path,
251 | force=patcher_force_close,
252 | version_main=version_main,
253 | user_multi_procs=user_multi_procs,
254 | )
255 | # self.patcher.auto(user_multiprocess = user_multi_num_procs)
256 | self.patcher.auto()
257 |
258 | # self.patcher = patcher
259 | if not options:
260 | options = ChromeOptions()
261 |
262 | try:
263 | if hasattr(options, "_session") and options._session is not None:
264 | # prevent reuse of options,
265 | # as it just appends arguments, not replace them
266 | # you'll get conflicts starting chrome
267 | raise RuntimeError("you cannot reuse the ChromeOptions object")
268 | except AttributeError:
269 | pass
270 |
271 | options._session = self
272 |
273 | if not options.debugger_address:
274 | debug_port = (
275 | port
276 | if port != 0
277 | else selenium.webdriver.common.service.utils.free_port()
278 | )
279 | debug_host = "127.0.0.1"
280 | options.debugger_address = "%s:%d" % (debug_host, debug_port)
281 | else:
282 | debug_host, debug_port = options.debugger_address.split(":")
283 | debug_port = int(debug_port)
284 |
285 | if enable_cdp_events:
286 | options.set_capability(
287 | "goog:loggingPrefs", {"performance": "ALL", "browser": "ALL"}
288 | )
289 |
290 | options.add_argument("--remote-debugging-host=%s" % debug_host)
291 | options.add_argument("--remote-debugging-port=%s" % debug_port)
292 |
293 | if user_data_dir:
294 | options.add_argument("--user-data-dir=%s" % user_data_dir)
295 |
296 | language, keep_user_data_dir = None, bool(user_data_dir)
297 |
298 | # see if a custom user profile is specified in options
299 | for arg in options.arguments:
300 |
301 | if any([_ in arg for _ in ("--headless", "headless")]):
302 | options.arguments.remove(arg)
303 | options.headless = True
304 |
305 | if "lang" in arg:
306 | m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
307 | try:
308 | language = m[1]
309 | except IndexError:
310 | logger.debug("will set the language to en-US,en;q=0.9")
311 | language = "en-US,en;q=0.9"
312 |
313 | if "user-data-dir" in arg:
314 | m = re.search("(?:--)?user-data-dir(?:[ =])?(.*)", arg)
315 | try:
316 | user_data_dir = m[1]
317 | logger.debug(
318 | "user-data-dir found in user argument %s => %s" % (arg, m[1])
319 | )
320 | keep_user_data_dir = True
321 |
322 | except IndexError:
323 | logger.debug(
324 | "no user data dir could be extracted from supplied argument %s "
325 | % arg
326 | )
327 |
328 | if not user_data_dir:
329 | # backward compatiblity
330 | # check if an old uc.ChromeOptions is used, and extract the user data dir
331 |
332 | if hasattr(options, "user_data_dir") and getattr(
333 | options, "user_data_dir", None
334 | ):
335 | import warnings
336 |
337 | warnings.warn(
338 | "using ChromeOptions.user_data_dir might stop working in future versions."
339 | "use uc.Chrome(user_data_dir='/xyz/some/data') in case you need existing profile folder"
340 | )
341 | options.add_argument("--user-data-dir=%s" % options.user_data_dir)
342 | keep_user_data_dir = True
343 | logger.debug(
344 | "user_data_dir property found in options object: %s" % user_data_dir
345 | )
346 |
347 | else:
348 | user_data_dir = os.path.normpath(tempfile.mkdtemp())
349 | keep_user_data_dir = False
350 | arg = "--user-data-dir=%s" % user_data_dir
351 | options.add_argument(arg)
352 | logger.debug(
353 | "created a temporary folder in which the user-data (profile) will be stored during this\n"
354 | "session, and added it to chrome startup arguments: %s" % arg
355 | )
356 |
357 | if not language:
358 | try:
359 | import locale
360 |
361 | language = locale.getdefaultlocale()[0].replace("_", "-")
362 | except Exception:
363 | pass
364 | if not language:
365 | language = "en-US"
366 |
367 | options.add_argument("--lang=%s" % language)
368 |
369 | if not options.binary_location:
370 | options.binary_location = (
371 | browser_executable_path or find_chrome_executable()
372 | )
373 |
374 | if not options.binary_location or not \
375 | pathlib.Path(options.binary_location).exists():
376 | raise FileNotFoundError(
377 | "\n---------------------\n"
378 | "Could not determine browser executable."
379 | "\n---------------------\n"
380 | "Make sure your browser is installed in the default location (path).\n"
381 | "If you are sure about the browser executable, you can specify it using\n"
382 | "the `browser_executable_path='{}` parameter.\n\n"
383 | .format("/path/to/browser/executable" if IS_POSIX else "c:/path/to/your/browser.exe")
384 | )
385 |
386 | self._delay = 3
387 |
388 | self.user_data_dir = user_data_dir
389 | self.keep_user_data_dir = keep_user_data_dir
390 |
391 | if suppress_welcome:
392 | options.arguments.extend(["--no-default-browser-check", "--no-first-run"])
393 | if no_sandbox:
394 | options.arguments.extend(["--no-sandbox", "--test-type"])
395 |
396 | if headless or options.headless:
397 | # workaround until a better checking is found
398 | try:
399 | if self.patcher.version_main < 108:
400 | options.add_argument("--headless=chrome")
401 | elif self.patcher.version_main >= 108:
402 | options.add_argument("--headless=new")
403 | except:
404 | logger.warning("could not detect version_main."
405 | "therefore, we are assuming it is chrome 108 or higher")
406 | options.add_argument("--headless=new")
407 |
408 | options.add_argument("--window-size=1920,1080")
409 | options.add_argument("--start-maximized")
410 | options.add_argument("--no-sandbox")
411 | # fixes "could not connect to chrome" error when running
412 | # on linux using privileged user like root (which i don't recommend)
413 |
414 | options.add_argument(
415 | "--log-level=%d" % log_level
416 | or divmod(logging.getLogger().getEffectiveLevel(), 10)[0]
417 | )
418 |
419 | if hasattr(options, "handle_prefs"):
420 | options.handle_prefs(user_data_dir)
421 |
422 | # fix exit_type flag to prevent tab-restore nag
423 | try:
424 | with open(
425 | os.path.join(user_data_dir, "Default/Preferences"),
426 | encoding="latin1",
427 | mode="r+",
428 | ) as fs:
429 | config = json.load(fs)
430 | if config["profile"]["exit_type"] is not None:
431 | # fixing the restore-tabs-nag
432 | config["profile"]["exit_type"] = None
433 | fs.seek(0, 0)
434 | json.dump(config, fs)
435 | fs.truncate() # the file might be shorter
436 | logger.debug("fixed exit_type flag")
437 | except Exception as e:
438 | logger.debug("did not find a bad exit_type flag ")
439 |
440 | self.options = options
441 |
442 | if not desired_capabilities:
443 | desired_capabilities = options.to_capabilities()
444 |
445 | if not use_subprocess:
446 | self.browser_pid = start_detached(
447 | options.binary_location, *options.arguments
448 | )
449 | else:
450 | browser = subprocess.Popen(
451 | [options.binary_location, *options.arguments],
452 | stdin=subprocess.PIPE,
453 | stdout=subprocess.PIPE,
454 | stderr=subprocess.PIPE,
455 | close_fds=IS_POSIX,
456 | )
457 | self.browser_pid = browser.pid
458 |
459 | service = selenium.webdriver.chromium.service.ChromiumService(
460 | self.patcher.executable_path,
461 | start_error_message="null"
462 | )
463 |
464 | super(Chrome, self).__init__(
465 | service=service,
466 | options=options,
467 | keep_alive=keep_alive,
468 | )
469 |
470 | self.reactor = None
471 |
472 | if enable_cdp_events:
473 | if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
474 | logging.getLogger(
475 | "selenium.webdriver.remote.remote_connection"
476 | ).setLevel(20)
477 | reactor = Reactor(self)
478 | reactor.start()
479 | self.reactor = reactor
480 |
481 | if advanced_elements:
482 | self._web_element_cls = UCWebElement
483 | else:
484 | self._web_element_cls = WebElement
485 |
486 | if options.headless:
487 | self._configure_headless()
488 |
489 | def _configure_headless(self):
490 | orig_get = self.get
491 | logger.info("setting properties for headless")
492 |
493 | def get_wrapped(*args, **kwargs):
494 | if self.execute_script("return navigator.webdriver"):
495 | logger.info("patch navigator.webdriver")
496 | self.execute_cdp_cmd(
497 | "Page.addScriptToEvaluateOnNewDocument",
498 | {
499 | "source": """
500 |
501 | Object.defineProperty(window, "navigator", {
502 | Object.defineProperty(window, "navigator", {
503 | value: new Proxy(navigator, {
504 | has: (target, key) => (key === "webdriver" ? false : key in target),
505 | get: (target, key) =>
506 | key === "webdriver"
507 | ? false
508 | : typeof target[key] === "function"
509 | ? target[key].bind(target)
510 | : target[key],
511 | }),
512 | });
513 | """
514 | },
515 | )
516 |
517 | logger.info("patch user-agent string")
518 | self.execute_cdp_cmd(
519 | "Network.setUserAgentOverride",
520 | {
521 | "userAgent": self.execute_script(
522 | "return navigator.userAgent"
523 | ).replace("Headless", "")
524 | },
525 | )
526 | self.execute_cdp_cmd(
527 | "Page.addScriptToEvaluateOnNewDocument",
528 | {
529 | "source": """
530 | Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 1});
531 | Object.defineProperty(navigator.connection, 'rtt', {get: () => 100});
532 |
533 | // https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/chrome-runtime.js
534 | window.chrome = {
535 | app: {
536 | isInstalled: false,
537 | InstallState: {
538 | DISABLED: 'disabled',
539 | INSTALLED: 'installed',
540 | NOT_INSTALLED: 'not_installed'
541 | },
542 | RunningState: {
543 | CANNOT_RUN: 'cannot_run',
544 | READY_TO_RUN: 'ready_to_run',
545 | RUNNING: 'running'
546 | }
547 | },
548 | runtime: {
549 | OnInstalledReason: {
550 | CHROME_UPDATE: 'chrome_update',
551 | INSTALL: 'install',
552 | SHARED_MODULE_UPDATE: 'shared_module_update',
553 | UPDATE: 'update'
554 | },
555 | OnRestartRequiredReason: {
556 | APP_UPDATE: 'app_update',
557 | OS_UPDATE: 'os_update',
558 | PERIODIC: 'periodic'
559 | },
560 | PlatformArch: {
561 | ARM: 'arm',
562 | ARM64: 'arm64',
563 | MIPS: 'mips',
564 | MIPS64: 'mips64',
565 | X86_32: 'x86-32',
566 | X86_64: 'x86-64'
567 | },
568 | PlatformNaclArch: {
569 | ARM: 'arm',
570 | MIPS: 'mips',
571 | MIPS64: 'mips64',
572 | X86_32: 'x86-32',
573 | X86_64: 'x86-64'
574 | },
575 | PlatformOs: {
576 | ANDROID: 'android',
577 | CROS: 'cros',
578 | LINUX: 'linux',
579 | MAC: 'mac',
580 | OPENBSD: 'openbsd',
581 | WIN: 'win'
582 | },
583 | RequestUpdateCheckStatus: {
584 | NO_UPDATE: 'no_update',
585 | THROTTLED: 'throttled',
586 | UPDATE_AVAILABLE: 'update_available'
587 | }
588 | }
589 | }
590 |
591 | // https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/navigator-permissions.js
592 | if (!window.Notification) {
593 | window.Notification = {
594 | permission: 'denied'
595 | }
596 | }
597 |
598 | const originalQuery = window.navigator.permissions.query
599 | window.navigator.permissions.__proto__.query = parameters =>
600 | parameters.name === 'notifications'
601 | ? Promise.resolve({ state: window.Notification.permission })
602 | : originalQuery(parameters)
603 |
604 | const oldCall = Function.prototype.call
605 | function call() {
606 | return oldCall.apply(this, arguments)
607 | }
608 | Function.prototype.call = call
609 |
610 | const nativeToStringFunctionString = Error.toString().replace(/Error/g, 'toString')
611 | const oldToString = Function.prototype.toString
612 |
613 | function functionToString() {
614 | if (this === window.navigator.permissions.query) {
615 | return 'function query() { [native code] }'
616 | }
617 | if (this === functionToString) {
618 | return nativeToStringFunctionString
619 | }
620 | return oldCall.call(oldToString, this)
621 | }
622 | // eslint-disable-next-line
623 | Function.prototype.toString = functionToString
624 | """
625 | },
626 | )
627 | return orig_get(*args, **kwargs)
628 |
629 | self.get = get_wrapped
630 |
631 | # def _get_cdc_props(self):
632 | # return self.execute_script(
633 | # """
634 | # let objectToInspect = window,
635 | # result = [];
636 | # while(objectToInspect !== null)
637 | # { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
638 | # objectToInspect = Object.getPrototypeOf(objectToInspect); }
639 | #
640 | # return result.filter(i => i.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig))
641 | # """
642 | # )
643 | #
644 | # def _hook_remove_cdc_props(self):
645 | # self.execute_cdp_cmd(
646 | # "Page.addScriptToEvaluateOnNewDocument",
647 | # {
648 | # "source": """
649 | # let objectToInspect = window,
650 | # result = [];
651 | # while(objectToInspect !== null)
652 | # { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
653 | # objectToInspect = Object.getPrototypeOf(objectToInspect); }
654 | # result.forEach(p => p.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig)
655 | # &&delete window[p]&&console.log('removed',p))
656 | # """
657 | # },
658 | # )
659 |
660 | def get(self, url):
661 | # if self._get_cdc_props():
662 | # self._hook_remove_cdc_props()
663 | return super().get(url)
664 |
665 | def add_cdp_listener(self, event_name, callback):
666 | if (
667 | self.reactor
668 | and self.reactor is not None
669 | and isinstance(self.reactor, Reactor)
670 | ):
671 | self.reactor.add_event_handler(event_name, callback)
672 | return self.reactor.handlers
673 | return False
674 |
675 | def clear_cdp_listeners(self):
676 | if self.reactor and isinstance(self.reactor, Reactor):
677 | self.reactor.handlers.clear()
678 |
679 | def window_new(self):
680 | self.execute(
681 | selenium.webdriver.remote.command.Command.NEW_WINDOW, {"type": "window"}
682 | )
683 |
684 | def tab_new(self, url: str):
685 | """
686 | this opens a url in a new tab.
687 | apparently, that passes all tests directly!
688 |
689 | Parameters
690 | ----------
691 | url
692 |
693 | Returns
694 | -------
695 |
696 | """
697 | if not hasattr(self, "cdp"):
698 | from .cdp import CDP
699 |
700 | cdp = CDP(self.options)
701 | cdp.tab_new(url)
702 |
703 | def reconnect(self, timeout=0.1):
704 | try:
705 | self.service.stop()
706 | except Exception as e:
707 | logger.debug(e)
708 | time.sleep(timeout)
709 | try:
710 | self.service.start()
711 | except Exception as e:
712 | logger.debug(e)
713 |
714 | try:
715 | self.start_session()
716 | except Exception as e:
717 | logger.debug(e)
718 |
719 | def start_session(self, capabilities=None, browser_profile=None):
720 | if not capabilities:
721 | capabilities = self.options.to_capabilities()
722 | super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
723 | capabilities
724 | )
725 | # super(Chrome, self).start_session(capabilities, browser_profile)
726 |
727 | def find_elements_recursive(self, by, value):
728 | """
729 | find elements in all frames
730 | this is a generator function, which is needed
731 | since if it would return a list of elements, they
732 | will be stale on arrival.
733 | using generator, when the element is returned we are in the correct frame
734 | to use it directly
735 | Args:
736 | by: By
737 | value: str
738 | Returns: Generator[webelement.WebElement]
739 | """
740 |
741 | def search_frame(f=None):
742 | if not f:
743 | # ensure we are on main content frame
744 | self.switch_to.default_content()
745 | else:
746 | self.switch_to.frame(f)
747 | for elem in self.find_elements(by, value):
748 | yield elem
749 | # switch back to main content, otherwise we will get StaleElementReferenceException
750 | self.switch_to.default_content()
751 |
752 | # search root frame
753 | for elem in search_frame():
754 | yield elem
755 | # get iframes
756 | frames = self.find_elements('css selector', 'iframe')
757 |
758 | # search per frame
759 | for f in frames:
760 | for elem in search_frame(f):
761 | yield elem
762 |
763 | def quit(self):
764 | try:
765 | self.service.process.kill()
766 | logger.debug("webdriver process ended")
767 | except (AttributeError, RuntimeError, OSError):
768 | pass
769 | try:
770 | self.reactor.event.set()
771 | logger.debug("shutting down reactor")
772 | except AttributeError:
773 | pass
774 | try:
775 | os.kill(self.browser_pid, 15)
776 | logger.debug("gracefully closed browser")
777 | except Exception as e: # noqa
778 | pass
779 | if (
780 | hasattr(self, "keep_user_data_dir")
781 | and hasattr(self, "user_data_dir")
782 | and not self.keep_user_data_dir
783 | ):
784 | for _ in range(5):
785 | try:
786 | shutil.rmtree(self.user_data_dir, ignore_errors=False)
787 | except FileNotFoundError:
788 | pass
789 | except (RuntimeError, OSError, PermissionError) as e:
790 | logger.debug(
791 | "When removing the temp profile, a %s occured: %s\nretrying..."
792 | % (e.__class__.__name__, e)
793 | )
794 | else:
795 | logger.debug("successfully removed %s" % self.user_data_dir)
796 | break
797 | time.sleep(0.1)
798 |
799 | # dereference patcher, so patcher can start cleaning up as well.
800 | # this must come last, otherwise it will throw 'in use' errors
801 | self.patcher = None
802 |
803 | def __getattribute__(self, item):
804 | if not super().__getattribute__("debug"):
805 | return super().__getattribute__(item)
806 | else:
807 | import inspect
808 |
809 | original = super().__getattribute__(item)
810 | if inspect.ismethod(original) and not inspect.isclass(original):
811 | def newfunc(*args, **kwargs):
812 | logger.debug(
813 | "calling %s with args %s and kwargs %s\n"
814 | % (original.__qualname__, args, kwargs)
815 | )
816 | return original(*args, **kwargs)
817 |
818 | return newfunc
819 | return original
820 |
821 | def __enter__(self):
822 | return self
823 |
824 | def __exit__(self, exc_type, exc_val, exc_tb):
825 | self.service.stop()
826 | time.sleep(self._delay)
827 | self.service.start()
828 | self.start_session()
829 |
830 | def __hash__(self):
831 | return hash(self.options.debugger_address)
832 |
833 | def __dir__(self):
834 | return object.__dir__(self)
835 |
836 | def __del__(self):
837 | try:
838 | self.service.process.kill()
839 | except: # noqa
840 | pass
841 | self.quit()
842 |
843 | @classmethod
844 | def _ensure_close(cls, self):
845 | # needs to be a classmethod so finalize can find the reference
846 | logger.info("ensuring close")
847 | if (
848 | hasattr(self, "service")
849 | and hasattr(self.service, "process")
850 | and hasattr(self.service.process, "kill")
851 | ):
852 | self.service.process.kill()
853 |
854 |
855 | def find_chrome_executable():
856 | """
857 | Finds the chrome, chrome beta, chrome canary, chromium executable
858 |
859 | Returns
860 | -------
861 | executable_path : str
862 | the full file path to found executable
863 |
864 | """
865 | candidates = set()
866 | if IS_POSIX:
867 | for item in os.environ.get("PATH").split(os.pathsep):
868 | for subitem in (
869 | "google-chrome",
870 | "chromium",
871 | "chromium-browser",
872 | "chrome",
873 | "google-chrome-stable",
874 | ):
875 | candidates.add(os.sep.join((item, subitem)))
876 | if "darwin" in sys.platform:
877 | candidates.update(
878 | [
879 | "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
880 | "/Applications/Chromium.app/Contents/MacOS/Chromium",
881 | ]
882 | )
883 | else:
884 | for item in map(
885 | os.environ.get,
886 | ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA", "PROGRAMW6432"),
887 | ):
888 | if item is not None:
889 | for subitem in (
890 | "Google/Chrome/Application",
891 | ):
892 | candidates.add(os.sep.join((item, subitem, "chrome.exe")))
893 | for candidate in candidates:
894 | logger.debug('checking if %s exists and is executable' % candidate)
895 | if os.path.exists(candidate) and os.access(candidate, os.X_OK):
896 | logger.debug('found! using %s' % candidate)
897 | return os.path.normpath(candidate)
898 |
--------------------------------------------------------------------------------
/src/undetected_chromedriver/cdp.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # this module is part of undetected_chromedriver
3 |
4 | import json
5 | import logging
6 |
7 | import requests
8 | import websockets
9 |
10 |
11 | log = logging.getLogger(__name__)
12 |
13 |
14 | class CDPObject(dict):
15 | def __init__(self, *a, **k):
16 | super().__init__(*a, **k)
17 | self.__dict__ = self
18 | for k in self.__dict__:
19 | if isinstance(self.__dict__[k], dict):
20 | self.__dict__[k] = CDPObject(self.__dict__[k])
21 | elif isinstance(self.__dict__[k], list):
22 | for i in range(len(self.__dict__[k])):
23 | if isinstance(self.__dict__[k][i], dict):
24 | self.__dict__[k][i] = CDPObject(self)
25 |
26 | def __repr__(self):
27 | tpl = f"{self.__class__.__name__}(\n\t{{}}\n\t)"
28 | return tpl.format("\n ".join(f"{k} = {v}" for k, v in self.items()))
29 |
30 |
31 | class PageElement(CDPObject):
32 | pass
33 |
34 |
35 | class CDP:
36 | log = logging.getLogger("CDP")
37 |
38 | endpoints = CDPObject(
39 | {
40 | "json": "/json",
41 | "protocol": "/json/protocol",
42 | "list": "/json/list",
43 | "new": "/json/new?{url}",
44 | "activate": "/json/activate/{id}",
45 | "close": "/json/close/{id}",
46 | }
47 | )
48 |
49 | def __init__(self, options: "ChromeOptions"): # noqa
50 | self.server_addr = "http://{0}:{1}".format(*options.debugger_address.split(":"))
51 |
52 | self._reqid = 0
53 | self._session = requests.Session()
54 | self._last_resp = None
55 | self._last_json = None
56 |
57 | resp = self.get(self.endpoints.json) # noqa
58 | self.sessionId = resp[0]["id"]
59 | self.wsurl = resp[0]["webSocketDebuggerUrl"]
60 |
61 | def tab_activate(self, id=None):
62 | if not id:
63 | active_tab = self.tab_list()[0]
64 | id = active_tab.id # noqa
65 | self.wsurl = active_tab.webSocketDebuggerUrl # noqa
66 | return self.post(self.endpoints["activate"].format(id=id))
67 |
68 | def tab_list(self):
69 | retval = self.get(self.endpoints["list"])
70 | return [PageElement(o) for o in retval]
71 |
72 | def tab_new(self, url):
73 | return self.post(self.endpoints["new"].format(url=url))
74 |
75 | def tab_close_last_opened(self):
76 | sessions = self.tab_list()
77 | opentabs = [s for s in sessions if s["type"] == "page"]
78 | return self.post(self.endpoints["close"].format(id=opentabs[-1]["id"]))
79 |
80 | async def send(self, method: str, params: dict):
81 | self._reqid += 1
82 | async with websockets.connect(self.wsurl) as ws:
83 | await ws.send(
84 | json.dumps({"method": method, "params": params, "id": self._reqid})
85 | )
86 | self._last_resp = await ws.recv()
87 | self._last_json = json.loads(self._last_resp)
88 | self.log.info(self._last_json)
89 |
90 | def get(self, uri):
91 | resp = self._session.get(self.server_addr + uri)
92 | try:
93 | self._last_resp = resp
94 | self._last_json = resp.json()
95 | except Exception:
96 | return
97 | else:
98 | return self._last_json
99 |
100 | def post(self, uri, data: dict = None):
101 | if not data:
102 | data = {}
103 | resp = self._session.post(self.server_addr + uri, json=data)
104 | try:
105 | self._last_resp = resp
106 | self._last_json = resp.json()
107 | except Exception:
108 | return self._last_resp
109 |
110 | @property
111 | def last_json(self):
112 | return self._last_json
113 |
--------------------------------------------------------------------------------
/src/undetected_chromedriver/devtool.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from collections.abc import Mapping
3 | from collections.abc import Sequence
4 | from functools import wraps
5 | import logging
6 | import threading
7 | import time
8 | import traceback
9 | from typing import Any
10 | from typing import Awaitable
11 | from typing import Callable
12 | from typing import List
13 | from typing import Optional
14 |
15 |
16 | class Structure(dict):
17 | """
18 | This is a dict-like object structure, which you should subclass
19 | Only properties defined in the class context are used on initialization.
20 |
21 | See example
22 | """
23 |
24 | _store = {}
25 |
26 | def __init__(self, *a, **kw):
27 | """
28 | Instantiate a new instance.
29 |
30 | :param a:
31 | :param kw:
32 | """
33 |
34 | super().__init__()
35 |
36 | # auxiliar dict
37 | d = dict(*a, **kw)
38 | for k, v in d.items():
39 | if isinstance(v, Mapping):
40 | self[k] = self.__class__(v)
41 | elif isinstance(v, Sequence) and not isinstance(v, (str, bytes)):
42 | self[k] = [self.__class__(i) for i in v]
43 | else:
44 | self[k] = v
45 | super().__setattr__("__dict__", self)
46 |
47 | def __getattr__(self, item):
48 | return getattr(super(), item)
49 |
50 | def __getitem__(self, item):
51 | return super().__getitem__(item)
52 |
53 | def __setattr__(self, key, value):
54 | self.__setitem__(key, value)
55 |
56 | def __setitem__(self, key, value):
57 | super().__setitem__(key, value)
58 |
59 | def update(self, *a, **kw):
60 | super().update(*a, **kw)
61 |
62 | def __eq__(self, other):
63 | return frozenset(other.items()) == frozenset(self.items())
64 |
65 | def __hash__(self):
66 | return hash(frozenset(self.items()))
67 |
68 | @classmethod
69 | def __init_subclass__(cls, **kwargs):
70 | cls._store = {}
71 |
72 | def _normalize_strings(self):
73 | for k, v in self.copy().items():
74 | if isinstance(v, (str)):
75 | self[k] = v.strip()
76 |
77 |
78 | def timeout(seconds=3, on_timeout: Optional[Callable[[callable], Any]] = None):
79 | def wrapper(func):
80 | @wraps(func)
81 | def wrapped(*args, **kwargs):
82 | def function_reached_timeout():
83 | if on_timeout:
84 | on_timeout(func)
85 | else:
86 | raise TimeoutError("function call timed out")
87 |
88 | t = threading.Timer(interval=seconds, function=function_reached_timeout)
89 | t.start()
90 | try:
91 | return func(*args, **kwargs)
92 | except:
93 | t.cancel()
94 | raise
95 | finally:
96 | t.cancel()
97 |
98 | return wrapped
99 |
100 | return wrapper
101 |
102 |
103 | def test():
104 | import sys, os
105 |
106 | sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
107 | import undetected_chromedriver as uc
108 | import threading
109 |
110 | def collector(
111 | driver: uc.Chrome,
112 | stop_event: threading.Event,
113 | on_event_coro: Optional[Callable[[List[str]], Awaitable[Any]]] = None,
114 | listen_events: Sequence = ("browser", "network", "performance"),
115 | ):
116 | def threaded(driver, stop_event, on_event_coro):
117 | async def _ensure_service_started():
118 | while (
119 | getattr(driver, "service", False)
120 | and getattr(driver.service, "process", False)
121 | and driver.service.process.poll()
122 | ):
123 | print("waiting for driver service to come back on")
124 | await asyncio.sleep(0.05)
125 | # await asyncio.sleep(driver._delay or .25)
126 |
127 | async def get_log_lines(typ):
128 | await _ensure_service_started()
129 | return driver.get_log(typ)
130 |
131 | async def looper():
132 | while not stop_event.is_set():
133 | log_lines = []
134 | try:
135 | for _ in listen_events:
136 | try:
137 | log_lines += await get_log_lines(_)
138 | except:
139 | if logging.getLogger().getEffectiveLevel() <= 10:
140 | traceback.print_exc()
141 | continue
142 | if log_lines and on_event_coro:
143 | await on_event_coro(log_lines)
144 | except Exception as e:
145 | if logging.getLogger().getEffectiveLevel() <= 10:
146 | traceback.print_exc()
147 |
148 | loop = asyncio.new_event_loop()
149 | asyncio.set_event_loop(loop)
150 | loop.run_until_complete(looper())
151 |
152 | t = threading.Thread(target=threaded, args=(driver, stop_event, on_event_coro))
153 | t.start()
154 |
155 | async def on_event(data):
156 | print("on_event")
157 | print("data:", data)
158 |
159 | def func_called(fn):
160 | def wrapped(*args, **kwargs):
161 | print(
162 | "func called! %s (args: %s, kwargs: %s)" % (fn.__name__, args, kwargs)
163 | )
164 | while driver.service.process and driver.service.process.poll() is not None:
165 | time.sleep(0.1)
166 | res = fn(*args, **kwargs)
167 | print("func completed! (result: %s)" % res)
168 | return res
169 |
170 | return wrapped
171 |
172 | logging.basicConfig(level=10)
173 |
174 | options = uc.ChromeOptions()
175 | options.set_capability(
176 | "goog:loggingPrefs", {"performance": "ALL", "browser": "ALL", "network": "ALL"}
177 | )
178 |
179 | driver = uc.Chrome(version_main=96, options=options)
180 |
181 | # driver.command_executor._request = timeout(seconds=1)(driver.command_executor._request)
182 | driver.command_executor._request = func_called(driver.command_executor._request)
183 | collector_stop = threading.Event()
184 | collector(driver, collector_stop, on_event)
185 |
186 | driver.get("https://nowsecure.nl")
187 |
188 | time.sleep(10)
189 |
190 | driver.quit()
191 |
--------------------------------------------------------------------------------
/src/undetected_chromedriver/dprocess.py:
--------------------------------------------------------------------------------
1 | import atexit
2 | import logging
3 | import multiprocessing
4 | import os
5 | import platform
6 | import signal
7 | from subprocess import PIPE
8 | from subprocess import Popen
9 | import sys
10 |
11 |
12 | CREATE_NEW_PROCESS_GROUP = 0x00000200
13 | DETACHED_PROCESS = 0x00000008
14 |
15 | REGISTERED = []
16 |
17 |
18 | def start_detached(executable, *args):
19 | """
20 | Starts a fully independent subprocess (with no parent)
21 | :param executable: executable
22 | :param args: arguments to the executable, eg: ['--param1_key=param1_val', '-vvv' ...]
23 | :return: pid of the grandchild process
24 | """
25 |
26 | # create pipe
27 | reader, writer = multiprocessing.Pipe(False)
28 |
29 | # do not keep reference
30 | multiprocessing.Process(
31 | target=_start_detached,
32 | args=(executable, *args),
33 | kwargs={"writer": writer},
34 | daemon=True,
35 | ).start()
36 | # receive pid from pipe
37 | pid = reader.recv()
38 | REGISTERED.append(pid)
39 | # close pipes
40 | writer.close()
41 | reader.close()
42 |
43 | return pid
44 |
45 |
46 | def _start_detached(executable, *args, writer: multiprocessing.Pipe = None):
47 | # configure launch
48 | kwargs = {}
49 | if platform.system() == "Windows":
50 | kwargs.update(creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP)
51 | elif sys.version_info < (3, 2):
52 | # assume posix
53 | kwargs.update(preexec_fn=os.setsid)
54 | else: # Python 3.2+ and Unix
55 | kwargs.update(start_new_session=True)
56 |
57 | # run
58 | p = Popen([executable, *args], stdin=PIPE, stdout=PIPE, stderr=PIPE, **kwargs)
59 |
60 | # send pid to pipe
61 | writer.send(p.pid)
62 | sys.exit()
63 |
64 |
65 | def _cleanup():
66 | for pid in REGISTERED:
67 | try:
68 | logging.getLogger(__name__).debug("cleaning up pid %d " % pid)
69 | os.kill(pid, signal.SIGTERM)
70 | except: # noqa
71 | pass
72 |
73 |
74 | atexit.register(_cleanup)
75 |
--------------------------------------------------------------------------------
/src/undetected_chromedriver/options.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # this module is part of undetected_chromedriver
3 |
4 |
5 | import json
6 | import os
7 |
8 | from selenium.webdriver.chromium.options import ChromiumOptions as _ChromiumOptions
9 |
10 |
11 | class ChromeOptions(_ChromiumOptions):
12 | _session = None
13 | _user_data_dir = None
14 |
15 | @property
16 | def user_data_dir(self):
17 | return self._user_data_dir
18 |
19 | @user_data_dir.setter
20 | def user_data_dir(self, path: str):
21 | """
22 | Sets the browser profile folder to use, or creates a new profile
23 | at given .
24 |
25 | Parameters
26 | ----------
27 | path: str
28 | the path to a chrome profile folder
29 | if it does not exist, a new profile will be created at given location
30 | """
31 | apath = os.path.abspath(path)
32 | self._user_data_dir = os.path.normpath(apath)
33 |
34 | @staticmethod
35 | def _undot_key(key, value):
36 | """turn a (dotted key, value) into a proper nested dict"""
37 | if "." in key:
38 | key, rest = key.split(".", 1)
39 | value = ChromeOptions._undot_key(rest, value)
40 | return {key: value}
41 |
42 | @staticmethod
43 | def _merge_nested(a, b):
44 | """
45 | merges b into a
46 | leaf values in a are overwritten with values from b
47 | """
48 | for key in b:
49 | if key in a:
50 | if isinstance(a[key], dict) and isinstance(b[key], dict):
51 | ChromeOptions._merge_nested(a[key], b[key])
52 | continue
53 | a[key] = b[key]
54 | return a
55 |
56 | def handle_prefs(self, user_data_dir):
57 | prefs = self.experimental_options.get("prefs")
58 | if prefs:
59 | user_data_dir = user_data_dir or self._user_data_dir
60 | default_path = os.path.join(user_data_dir, "Default")
61 | os.makedirs(default_path, exist_ok=True)
62 |
63 | # undot prefs dict keys
64 | undot_prefs = {}
65 | for key, value in prefs.items():
66 | undot_prefs = self._merge_nested(
67 | undot_prefs, self._undot_key(key, value)
68 | )
69 |
70 | prefs_file = os.path.join(default_path, "Preferences")
71 | if os.path.exists(prefs_file):
72 | with open(prefs_file, encoding="latin1", mode="r") as f:
73 | undot_prefs = self._merge_nested(json.load(f), undot_prefs)
74 |
75 | with open(prefs_file, encoding="latin1", mode="w") as f:
76 | json.dump(undot_prefs, f)
77 |
78 | # remove the experimental_options to avoid an error
79 | del self._experimental_options["prefs"]
80 |
81 | @classmethod
82 | def from_options(cls, options):
83 | o = cls()
84 | o.__dict__.update(options.__dict__)
85 | return o
86 |
--------------------------------------------------------------------------------
/src/undetected_chromedriver/patcher.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # this module is part of undetected_chromedriver
3 |
4 | from distutils.version import LooseVersion
5 | import io
6 | import json
7 | import logging
8 | import os
9 | import pathlib
10 | import platform
11 | import random
12 | import re
13 | import shutil
14 | import string
15 | import sys
16 | import time
17 | from urllib.request import urlopen
18 | from urllib.request import urlretrieve
19 | import zipfile
20 | from multiprocessing import Lock
21 |
22 | logger = logging.getLogger(__name__)
23 |
24 | IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
25 |
26 |
27 | class Patcher(object):
28 | lock = Lock()
29 | exe_name = "chromedriver%s"
30 |
31 | platform = sys.platform
32 | if platform.endswith("win32"):
33 | d = "~/appdata/roaming/undetected_chromedriver"
34 | elif "LAMBDA_TASK_ROOT" in os.environ:
35 | d = "/tmp/undetected_chromedriver"
36 | elif platform.startswith(("linux", "linux2")):
37 | d = "~/.local/share/undetected_chromedriver"
38 | elif platform.endswith("darwin"):
39 | d = "~/Library/Application Support/undetected_chromedriver"
40 | else:
41 | d = "~/.undetected_chromedriver"
42 | data_path = os.path.abspath(os.path.expanduser(d))
43 |
44 | def __init__(
45 | self,
46 | executable_path=None,
47 | force=False,
48 | version_main: int = 0,
49 | user_multi_procs=False,
50 | ):
51 | """
52 | Args:
53 | executable_path: None = automatic
54 | a full file path to the chromedriver executable
55 | force: False
56 | terminate processes which are holding lock
57 | version_main: 0 = auto
58 | specify main chrome version (rounded, ex: 82)
59 | """
60 | self.force = force
61 | self._custom_exe_path = False
62 | prefix = "undetected"
63 | self.user_multi_procs = user_multi_procs
64 |
65 | try:
66 | version_main = int(version_main)
67 | except (ValueError, TypeError):
68 | raise ValueError("version_main must be an integer")
69 |
70 | self.is_old_chromedriver = version_main and version_main <= 114
71 | # Needs to be called before self.exe_name is accessed
72 | self._set_platform_name()
73 |
74 | if not os.path.exists(self.data_path):
75 | os.makedirs(self.data_path, exist_ok=True)
76 |
77 | if not executable_path:
78 | self.executable_path = os.path.join(
79 | self.data_path, "_".join([prefix, self.exe_name])
80 | )
81 |
82 | if not IS_POSIX:
83 | if executable_path:
84 | if not executable_path[-4:] == ".exe":
85 | executable_path += ".exe"
86 |
87 | self.zip_path = os.path.join(self.data_path, prefix)
88 |
89 | if not executable_path:
90 | if not self.user_multi_procs:
91 | self.executable_path = os.path.abspath(
92 | os.path.join(".", self.executable_path)
93 | )
94 |
95 | if executable_path:
96 | self._custom_exe_path = True
97 | self.executable_path = executable_path
98 |
99 | # Set the correct repository to download the Chromedriver from
100 | if self.is_old_chromedriver:
101 | self.url_repo = "https://chromedriver.storage.googleapis.com"
102 | else:
103 | self.url_repo = "https://googlechromelabs.github.io/chrome-for-testing"
104 |
105 | self.version_main = version_main
106 | self.version_full = None
107 |
108 | def _set_platform_name(self):
109 | """
110 | Set the platform and exe name based on the platform undetected_chromedriver is running on
111 | in order to download the correct chromedriver.
112 | """
113 | if self.platform.endswith("win32"):
114 | self.platform_name = "win32"
115 | self.exe_name %= ".exe"
116 | if self.platform.endswith(("linux", "linux2")):
117 | self.platform_name = "linux64"
118 | self.exe_name %= ""
119 | if self.platform.endswith("darwin"):
120 | if self.is_old_chromedriver:
121 | self.platform_name = "mac64"
122 | else:
123 | self.platform_name = "mac-x64"
124 | self.exe_name %= ""
125 |
126 | def auto(self, executable_path=None, force=False, version_main=None, _=None):
127 | """
128 |
129 | Args:
130 | executable_path:
131 | force:
132 | version_main:
133 |
134 | Returns:
135 |
136 | """
137 | p = pathlib.Path(self.data_path)
138 | if self.user_multi_procs:
139 | with Lock():
140 | files = list(p.rglob("*chromedriver*"))
141 | most_recent = max(files, key=lambda f: f.stat().st_mtime)
142 | files.remove(most_recent)
143 | list(map(lambda f: f.unlink(), files))
144 | if self.is_binary_patched(most_recent):
145 | self.executable_path = str(most_recent)
146 | return True
147 |
148 | if executable_path:
149 | self.executable_path = executable_path
150 | self._custom_exe_path = True
151 |
152 | if self._custom_exe_path:
153 | ispatched = self.is_binary_patched(self.executable_path)
154 | if not ispatched:
155 | return self.patch_exe()
156 | else:
157 | return
158 |
159 | if version_main:
160 | self.version_main = version_main
161 | if force is True:
162 | self.force = force
163 |
164 | try:
165 | os.unlink(self.executable_path)
166 | except PermissionError:
167 | if self.force:
168 | self.force_kill_instances(self.executable_path)
169 | return self.auto(force=not self.force)
170 | try:
171 | if self.is_binary_patched():
172 | # assumes already running AND patched
173 | return True
174 | except PermissionError:
175 | pass
176 | # return False
177 | except FileNotFoundError:
178 | pass
179 |
180 | release = self.fetch_release_number()
181 | self.version_main = release.version[0]
182 | self.version_full = release
183 | self.unzip_package(self.fetch_package())
184 | return self.patch()
185 |
186 | def driver_binary_in_use(self, path: str = None) -> bool:
187 | """
188 | naive test to check if a found chromedriver binary is
189 | currently in use
190 |
191 | Args:
192 | path: a string or PathLike object to the binary to check.
193 | if not specified, we check use this object's executable_path
194 | """
195 | if not path:
196 | path = self.executable_path
197 | p = pathlib.Path(path)
198 |
199 | if not p.exists():
200 | raise OSError("file does not exist: %s" % p)
201 | try:
202 | with open(p, mode="a+b") as fs:
203 | exc = []
204 | try:
205 |
206 | fs.seek(0, 0)
207 | except PermissionError as e:
208 | exc.append(e) # since some systems apprently allow seeking
209 | # we conduct another test
210 | try:
211 | fs.readline()
212 | except PermissionError as e:
213 | exc.append(e)
214 |
215 | if exc:
216 |
217 | return True
218 | return False
219 | # ok safe to assume this is in use
220 | except Exception as e:
221 | # logger.exception("whoops ", e)
222 | pass
223 |
224 | def cleanup_unused_files(self):
225 | p = pathlib.Path(self.data_path)
226 | items = list(p.glob("*undetected*"))
227 | for item in items:
228 | try:
229 | item.unlink()
230 | except:
231 | pass
232 |
233 | def patch(self):
234 | self.patch_exe()
235 | return self.is_binary_patched()
236 |
237 | def fetch_release_number(self):
238 | """
239 | Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
240 | :return: version string
241 | :rtype: LooseVersion
242 | """
243 | # Endpoint for old versions of Chromedriver (114 and below)
244 | if self.is_old_chromedriver:
245 | path = f"/latest_release_{self.version_main}"
246 | path = path.upper()
247 | logger.debug("getting release number from %s" % path)
248 | return LooseVersion(urlopen(self.url_repo + path).read().decode())
249 |
250 | # Endpoint for new versions of Chromedriver (115+)
251 | if not self.version_main:
252 | # Fetch the latest version
253 | path = "/last-known-good-versions-with-downloads.json"
254 | logger.debug("getting release number from %s" % path)
255 | with urlopen(self.url_repo + path) as conn:
256 | response = conn.read().decode()
257 |
258 | last_versions = json.loads(response)
259 | return LooseVersion(last_versions["channels"]["Stable"]["version"])
260 |
261 | # Fetch the latest minor version of the major version provided
262 | path = "/latest-versions-per-milestone-with-downloads.json"
263 | logger.debug("getting release number from %s" % path)
264 | with urlopen(self.url_repo + path) as conn:
265 | response = conn.read().decode()
266 |
267 | major_versions = json.loads(response)
268 | return LooseVersion(major_versions["milestones"][str(self.version_main)]["version"])
269 |
270 | def parse_exe_version(self):
271 | with io.open(self.executable_path, "rb") as f:
272 | for line in iter(lambda: f.readline(), b""):
273 | match = re.search(rb"platform_handle\x00content\x00([0-9.]*)", line)
274 | if match:
275 | return LooseVersion(match[1].decode())
276 |
277 | def fetch_package(self):
278 | """
279 | Downloads ChromeDriver from source
280 |
281 | :return: path to downloaded file
282 | """
283 | zip_name = f"chromedriver_{self.platform_name}.zip"
284 | if self.is_old_chromedriver:
285 | download_url = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, zip_name)
286 | else:
287 | zip_name = zip_name.replace("_", "-", 1)
288 | download_url = "https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/%s/%s/%s"
289 | download_url %= (self.version_full.vstring, self.platform_name, zip_name)
290 |
291 | logger.debug("downloading from %s" % download_url)
292 | return urlretrieve(download_url)[0]
293 |
294 | def unzip_package(self, fp):
295 | """
296 | Does what it says
297 |
298 | :return: path to unpacked executable
299 | """
300 | exe_path = self.exe_name
301 | if not self.is_old_chromedriver:
302 | # The new chromedriver unzips into its own folder
303 | zip_name = f"chromedriver-{self.platform_name}"
304 | exe_path = os.path.join(zip_name, self.exe_name)
305 |
306 | logger.debug("unzipping %s" % fp)
307 | try:
308 | os.unlink(self.zip_path)
309 | except (FileNotFoundError, OSError):
310 | pass
311 |
312 | os.makedirs(self.zip_path, mode=0o755, exist_ok=True)
313 | with zipfile.ZipFile(fp, mode="r") as zf:
314 | zf.extractall(self.zip_path)
315 | os.rename(os.path.join(self.zip_path, exe_path), self.executable_path)
316 | os.remove(fp)
317 | shutil.rmtree(self.zip_path)
318 | os.chmod(self.executable_path, 0o755)
319 | return self.executable_path
320 |
321 | @staticmethod
322 | def force_kill_instances(exe_name):
323 | """
324 | kills running instances.
325 | :param: executable name to kill, may be a path as well
326 |
327 | :return: True on success else False
328 | """
329 | exe_name = os.path.basename(exe_name)
330 | if IS_POSIX:
331 | r = os.system("kill -f -9 $(pidof %s)" % exe_name)
332 | else:
333 | r = os.system("taskkill /f /im %s" % exe_name)
334 | return not r
335 |
336 | @staticmethod
337 | def gen_random_cdc():
338 | cdc = random.choices(string.ascii_letters, k=27)
339 | return "".join(cdc).encode()
340 |
341 | def is_binary_patched(self, executable_path=None):
342 | executable_path = executable_path or self.executable_path
343 | try:
344 | with io.open(executable_path, "rb") as fh:
345 | return fh.read().find(b"undetected chromedriver") != -1
346 | except FileNotFoundError:
347 | return False
348 |
349 | def patch_exe(self):
350 | start = time.perf_counter()
351 | logger.info("patching driver executable %s" % self.executable_path)
352 | with io.open(self.executable_path, "r+b") as fh:
353 | content = fh.read()
354 | # match_injected_codeblock = re.search(rb"{window.*;}", content)
355 | match_injected_codeblock = re.search(rb"\{window\.cdc.*?;\}", content)
356 | if match_injected_codeblock:
357 | target_bytes = match_injected_codeblock[0]
358 | new_target_bytes = (
359 | b'{console.log("undetected chromedriver 1337!")}'.ljust(
360 | len(target_bytes), b" "
361 | )
362 | )
363 | new_content = content.replace(target_bytes, new_target_bytes)
364 | if new_content == content:
365 | logger.warning(
366 | "something went wrong patching the driver binary. could not find injection code block"
367 | )
368 | else:
369 | logger.debug(
370 | "found block:\n%s\nreplacing with:\n%s"
371 | % (target_bytes, new_target_bytes)
372 | )
373 | fh.seek(0)
374 | fh.write(new_content)
375 | logger.debug(
376 | "patching took us {:.2f} seconds".format(time.perf_counter() - start)
377 | )
378 |
379 | def __repr__(self):
380 | return "{0:s}({1:s})".format(
381 | self.__class__.__name__,
382 | self.executable_path,
383 | )
384 |
385 | def __del__(self):
386 | if self._custom_exe_path:
387 | # if the driver binary is specified by user
388 | # we assume it is important enough to not delete it
389 | return
390 | else:
391 | timeout = 3 # stop trying after this many seconds
392 | t = time.monotonic()
393 | now = lambda: time.monotonic()
394 | while now() - t > timeout:
395 | # we don't want to wait until the end of time
396 | try:
397 | if self.user_multi_procs:
398 | break
399 | os.unlink(self.executable_path)
400 | logger.debug("successfully unlinked %s" % self.executable_path)
401 | break
402 | except (OSError, RuntimeError, PermissionError):
403 | time.sleep(0.01)
404 | continue
405 | except FileNotFoundError:
406 | break
--------------------------------------------------------------------------------
/src/undetected_chromedriver/reactor.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # this module is part of undetected_chromedriver
3 |
4 | import asyncio
5 | import json
6 | import logging
7 | import threading
8 |
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | class Reactor(threading.Thread):
14 | def __init__(self, driver: "Chrome"):
15 | super().__init__()
16 |
17 | self.driver = driver
18 | self.loop = asyncio.new_event_loop()
19 |
20 | self.lock = threading.Lock()
21 | self.event = threading.Event()
22 | self.daemon = True
23 | self.handlers = {}
24 |
25 | def add_event_handler(self, method_name, callback: callable):
26 | """
27 |
28 | Parameters
29 | ----------
30 | event_name: str
31 | example "Network.responseReceived"
32 |
33 | callback: callable
34 | callable which accepts 1 parameter: the message object dictionary
35 |
36 | Returns
37 | -------
38 |
39 | """
40 | with self.lock:
41 | self.handlers[method_name.lower()] = callback
42 |
43 | @property
44 | def running(self):
45 | return not self.event.is_set()
46 |
47 | def run(self):
48 | try:
49 | asyncio.set_event_loop(self.loop)
50 | self.loop.run_until_complete(self.listen())
51 | except Exception as e:
52 | logger.warning("Reactor.run() => %s", e)
53 |
54 | async def _wait_service_started(self):
55 | while True:
56 | with self.lock:
57 | if (
58 | getattr(self.driver, "service", None)
59 | and getattr(self.driver.service, "process", None)
60 | and self.driver.service.process.poll()
61 | ):
62 | await asyncio.sleep(self.driver._delay or 0.25)
63 | else:
64 | break
65 |
66 | async def listen(self):
67 | while self.running:
68 | await self._wait_service_started()
69 | await asyncio.sleep(1)
70 |
71 | try:
72 | with self.lock:
73 | log_entries = self.driver.get_log("performance")
74 |
75 | for entry in log_entries:
76 | try:
77 | obj_serialized: str = entry.get("message")
78 | obj = json.loads(obj_serialized)
79 | message = obj.get("message")
80 | method = message.get("method")
81 |
82 | if "*" in self.handlers:
83 | await self.loop.run_in_executor(
84 | None, self.handlers["*"], message
85 | )
86 | elif method.lower() in self.handlers:
87 | await self.loop.run_in_executor(
88 | None, self.handlers[method.lower()], message
89 | )
90 |
91 | # print(type(message), message)
92 | except Exception as e:
93 | raise e from None
94 |
95 | except Exception as e:
96 | if "invalid session id" in str(e):
97 | pass
98 | else:
99 | logging.debug("exception ignored :", e)
100 |
--------------------------------------------------------------------------------
/src/undetected_chromedriver/webelement.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from selenium.webdriver.common.by import By
4 | import selenium.webdriver.remote.webelement
5 |
6 |
7 | class WebElement(selenium.webdriver.remote.webelement.WebElement):
8 | def click_safe(self):
9 | super().click()
10 | self._parent.reconnect(0.1)
11 |
12 | def children(
13 | self, tag=None, recursive=False
14 | ) -> List[selenium.webdriver.remote.webelement.WebElement]:
15 | """
16 | returns direct child elements of current element
17 | :param tag: str, if supplied, returns nodes only
18 | """
19 | script = "return [... arguments[0].children]"
20 | if tag:
21 | script += ".filter( node => node.tagName === '%s')" % tag.upper()
22 | if recursive:
23 | return list(_recursive_children(self, tag))
24 | return list(self._parent.execute_script(script, self))
25 |
26 |
27 | class UCWebElement(WebElement):
28 | """
29 | Custom WebElement class which makes it easier to view elements when
30 | working in an interactive environment.
31 |
32 | standard webelement repr:
33 |
34 |
35 | using this WebElement class:
36 | )>
37 |
38 | """
39 |
40 | def __init__(self, parent, id_):
41 | super().__init__(parent, id_)
42 | self._attrs = None
43 |
44 | @property
45 | def attrs(self):
46 | if not self._attrs:
47 | self._attrs = self._parent.execute_script(
48 | """
49 | var items = {};
50 | for (index = 0; index < arguments[0].attributes.length; ++index)
51 | {
52 | items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value
53 | };
54 | return items;
55 | """,
56 | self,
57 | )
58 | return self._attrs
59 |
60 | def __repr__(self):
61 | strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()])
62 | if strattrs:
63 | strattrs = " " + strattrs
64 | return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
65 |
66 |
67 | def _recursive_children(element, tag: str = None, _results=None):
68 | """
69 | returns all children of recursively
70 |
71 | :param element: `WebElement` object.
72 | find children below this
73 |
74 | :param tag: str = None.
75 | if provided, return only elements. example: 'a', or 'img'
76 | :param _results: do not use!
77 | """
78 | results = _results or set()
79 | for element in element.children():
80 | if tag:
81 | if element.tag_name == tag:
82 | results.add(element)
83 | else:
84 | results.add(element)
85 | results |= _recursive_children(element, tag, results)
86 | return results
87 |
--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | import os
4 | import random
5 | import re
6 | import shutil
7 |
8 | from selenium.webdriver.chrome.webdriver import WebDriver
9 |
10 | import undetected_chromedriver as uc
11 | from dtos import V1RequestBase
12 |
13 | FLARESOLVERR_VERSION = 0.1
14 | CHROME_MAJOR_VERSION = None
15 | USER_AGENT = None
16 | XVFB_DISPLAY = None
17 | PATCHED_DRIVER_PATH = None
18 |
19 |
20 | def get_config_log_html() -> bool:
21 | return os.environ.get('LOG_HTML', 'false').lower() == 'true'
22 |
23 |
24 | def get_config_headless() -> bool:
25 | return os.environ.get('HEADLESS', 'true').lower() == 'true'
26 |
27 |
28 | def get_flaresolverr_version() -> str:
29 | global FLARESOLVERR_VERSION
30 | if FLARESOLVERR_VERSION is not None:
31 | return FLARESOLVERR_VERSION
32 |
33 | package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json')
34 | with open(package_path) as f:
35 | FLARESOLVERR_VERSION = json.loads(f.read())['version']
36 | return FLARESOLVERR_VERSION
37 |
38 |
39 | def get_webdriver(req: V1RequestBase = None, retry: int = 0) -> WebDriver:
40 | global PATCHED_DRIVER_PATH
41 | logging.debug('Launching web browser...')
42 |
43 | try:
44 | # undetected_chromedriver
45 | options = uc.ChromeOptions()
46 | options.add_argument('--no-sandbox')
47 |
48 | random_w = random.randint(800, 1200)
49 | random_h = random.randint(600, 800)
50 | options.add_argument(f'--window-size={random_w},{random_h}')
51 |
52 | # todo: this param shows a warning in chrome head-full
53 | options.add_argument('--disable-setuid-sandbox')
54 | options.add_argument('--disable-dev-shm-usage')
55 | # this option removes the zygote sandbox (it seems that the resolution is a bit faster)
56 | options.add_argument('--no-zygote')
57 |
58 | # Test
59 | if req.beta_args is not None and req.beta_args is True:
60 | options.add_argument('--auto-open-devtools-for-tabs')
61 |
62 | # Proxy Support
63 | if req is not None and req.proxy is not None:
64 | proxy = req.proxy['url']
65 | options.add_argument('--proxy-server=%s' % proxy)
66 | # print("Added proxy: %s" % proxy)
67 |
68 | if req is not None:
69 | if req.headless:
70 | options.add_argument("--headless")
71 |
72 | # note: headless mode is detected (options.headless = True)
73 | # we launch the browser in head-full mode with the window hidden
74 | windows_headless = False
75 | if get_config_headless():
76 | if req is not None and req.headless is True or os.name == 'nt':
77 | windows_headless = True
78 |
79 | # Make headless
80 | # Add start minimized
81 | # options.add_argument('--start-minimized')
82 | else:
83 | start_xvfb_display()
84 |
85 | # If we are inside the Docker container, we avoid downloading the driver
86 | driver_exe_path = None
87 | version_main = None
88 | if os.path.exists("/app/chromedriver"):
89 | # Running inside Docker
90 | driver_exe_path = "/app/chromedriver"
91 | else:
92 | version_main = get_chrome_major_version()
93 | if PATCHED_DRIVER_PATH is not None:
94 | driver_exe_path = PATCHED_DRIVER_PATH
95 |
96 | # downloads and patches the chromedriver
97 | # if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
98 | driver = uc.Chrome(options=options, driver_executable_path=driver_exe_path, version_main=version_main,
99 | windows_headless=windows_headless, start_error_message="")
100 |
101 | # Temporary fix for headless mode
102 | if windows_headless:
103 | # Hide the window
104 | driver.minimize_window()
105 |
106 | # save the patched driver to avoid re-downloads
107 | if driver_exe_path is None:
108 | PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name)
109 | shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH)
110 |
111 | # selenium vanilla
112 | # options = webdriver.ChromeOptions()
113 | # options.add_argument('--no-sandbox')
114 | # options.add_argument('--window-size=1920,1080')
115 | # options.add_argument('--disable-setuid-sandbox')
116 | # options.add_argument('--disable-dev-shm-usage')
117 | # driver = webdriver.Chrome(options=options)
118 |
119 | return driver
120 | except Exception as e:
121 |
122 | # Retry up to 3 times
123 | if retry < 3:
124 | logging.exception(e)
125 | logging.debug('Retrying...')
126 | return get_webdriver(req, retry + 1)
127 |
128 | logging.exception(e)
129 | tb = e.__traceback__
130 | lineno = tb.tb_lineno
131 | raise Exception(f'Error launching web browser: {e} (line {lineno})')
132 |
133 |
134 | def get_chrome_exe_path() -> str:
135 | return uc.find_chrome_executable()
136 |
137 |
138 | def get_chrome_major_version() -> str:
139 | global CHROME_MAJOR_VERSION
140 | if CHROME_MAJOR_VERSION is not None:
141 | return CHROME_MAJOR_VERSION
142 |
143 | if os.name == 'nt':
144 | try:
145 | stream = os.popen(
146 | 'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"')
147 | output = stream.read()
148 | # Example: '104.0.5112.79'
149 | complete_version = extract_version_registry(output)
150 |
151 | # noinspection PyBroadException
152 | except Exception:
153 | # Example: '104.0.5112.79'
154 | complete_version = extract_version_folder()
155 | else:
156 | chrome_path = uc.find_chrome_executable()
157 | process = os.popen(f'"{chrome_path}" --version')
158 | # Example 1: 'Chromium 104.0.5112.79 Arch Linux\n'
159 | # Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n'
160 | complete_version = process.read()
161 | process.close()
162 |
163 | CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1]
164 | return CHROME_MAJOR_VERSION
165 |
166 |
167 | def extract_version_registry(output) -> str:
168 | try:
169 | google_version = ''
170 | for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]:
171 | if letter != '\n':
172 | google_version += letter
173 | else:
174 | break
175 | return google_version.strip()
176 | except TypeError:
177 | return ''
178 |
179 |
180 | def extract_version_folder() -> str:
181 | # Check if the Chrome folder exists in the x32 or x64 Program Files folders.
182 | for i in range(2):
183 | path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application'
184 | if os.path.isdir(path):
185 | paths = [f.path for f in os.scandir(path) if f.is_dir()]
186 | for path in paths:
187 | filename = os.path.basename(path)
188 | pattern = '\d+\.\d+\.\d+\.\d+'
189 | match = re.search(pattern, filename)
190 | if match and match.group():
191 | # Found a Chrome version.
192 | return match.group(0)
193 | return ''
194 |
195 |
196 | def get_user_agent(driver=None) -> str:
197 | global USER_AGENT
198 | if USER_AGENT is not None:
199 | return USER_AGENT
200 |
201 | try:
202 | if driver is None:
203 | req = V1RequestBase(_dict={})
204 | req.headless = True
205 | driver = get_webdriver(req=req)
206 |
207 | USER_AGENT = driver.execute_script("return navigator.userAgent")
208 | return USER_AGENT
209 | except Exception as e:
210 | raise Exception("Error getting browser User-Agent. " + str(e))
211 | finally:
212 | if driver is not None:
213 | driver.quit()
214 |
215 |
216 | def start_xvfb_display():
217 | global XVFB_DISPLAY
218 | if XVFB_DISPLAY is None:
219 | from xvfbwrapper import Xvfb
220 | XVFB_DISPLAY = Xvfb()
221 | XVFB_DISPLAY.start()
222 |
223 |
224 | def object_to_dict(_object):
225 | json_dict = json.loads(json.dumps(_object, default=lambda o: o.__dict__))
226 | # remove hidden fields
227 | return {k: v for k, v in json_dict.items() if not k.startswith('__')}
228 |
--------------------------------------------------------------------------------
/test-requirements.txt:
--------------------------------------------------------------------------------
1 | WebTest==3.0.0
2 |
--------------------------------------------------------------------------------