├── .dockerignore ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── README.md ├── UPDATE_FILE ├── docker-compose.yml ├── html_samples ├── cloudflare_captcha_hcaptcha_v1.html ├── cloudflare_captcha_norobot_v1.html ├── cloudflare_init_v1.html └── cloudflare_spinner_v1.html ├── package.json ├── requirements.txt ├── resources ├── flaresolverr_logo.png └── flaresolverr_logo.svg ├── src ├── bottle_plugins │ ├── __init__.py │ ├── error_plugin.py │ └── logger_plugin.py ├── dtos.py ├── flaresolverr.py ├── flaresolverr_service.py ├── sessions.py ├── tests.py ├── tests_sites.py ├── undetected_chromedriver │ ├── __init__.py │ ├── cdp.py │ ├── devtool.py │ ├── dprocess.py │ ├── options.py │ ├── patcher.py │ ├── reactor.py │ └── webelement.py └── utils.py └── test-requirements.txt /.dockerignore: -------------------------------------------------------------------------------- 1 | .git/ 2 | .github/ 3 | .idea/ 4 | html_samples/ 5 | resources/ 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Editors 2 | .vscode/ 3 | .idea/ 4 | 5 | # Vagrant 6 | .vagrant/ 7 | 8 | # Mac/OSX 9 | .DS_Store 10 | 11 | # Windows 12 | Thumbs.db 13 | 14 | # Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore 15 | # Byte-compiled / optimized / DLL files 16 | __pycache__/ 17 | *.py[cod] 18 | *$py.class 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | .Python 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | lib/ 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | wheels/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | MANIFEST 41 | 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .tox/ 55 | .nox/ 56 | .coverage 57 | .coverage.* 58 | .cache 59 | nosetests.xml 60 | coverage.xml 61 | *.cover 62 | .hypothesis/ 63 | .pytest_cache/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | .python-version 96 | 97 | # celery beat schedule file 98 | celerybeat-schedule 99 | 100 | # SageMath parsed files 101 | *.sage.py 102 | 103 | # Environments 104 | .env 105 | .venv 106 | env/ 107 | venv/ 108 | ENV/ 109 | env.bak/ 110 | venv.bak/ 111 | 112 | # Spyder project settings 113 | .spyderproject 114 | .spyproject 115 | 116 | # Rope project settings 117 | .ropeproject 118 | 119 | # mkdocs documentation 120 | /site 121 | 122 | # mypy 123 | .mypy_cache/ 124 | .dmypy.json 125 | dmypy.json 126 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v3.0.2 (2023/01/08) 4 | 5 | * Detect Cloudflare blocked access 6 | * Check Chrome / Chromium web browser is installed correctly 7 | 8 | ## v3.0.1 (2023/01/06) 9 | 10 | * Kill Chromium processes properly to avoid defunct/zombie processes 11 | * Update undetected-chromedriver 12 | * Disable Zygote sandbox in Chromium browser 13 | * Add more selectors to detect blocked access 14 | * Include procps (ps), curl and vim packages in the Docker image 15 | 16 | ## v3.0.0 (2023/01/04) 17 | 18 | * This is the first release of FlareSolverr v3. There are some breaking changes 19 | * Docker images for linux/386, linux/amd64, linux/arm/v7 and linux/arm64/v8 20 | * Replaced Firefox with Chrome 21 | * Replaced NodeJS / Typescript with Python 22 | * Replaced Puppeter with Selenium 23 | * No binaries for Linux / Windows. You have to use the Docker image or install from Source code 24 | * No proxy support 25 | * No session support 26 | 27 | ## v2.2.10 (2022/10/22) 28 | 29 | * Detect DDoS-Guard through title content 30 | 31 | ## v2.2.9 (2022/09/25) 32 | 33 | * Detect Cloudflare Access Denied 34 | * Commit the complete changelog 35 | 36 | ## v2.2.8 (2022/09/17) 37 | 38 | * Remove 30 s delay and clean legacy code 39 | 40 | ## v2.2.7 (2022/09/12) 41 | 42 | * Temporary fix: add 30s delay 43 | * Update README.md 44 | 45 | ## v2.2.6 (2022/07/31) 46 | 47 | * Fix Cloudflare detection in POST requests 48 | 49 | ## v2.2.5 (2022/07/30) 50 | 51 | * Update GitHub actions to build executables with NodeJs 16 52 | * Update Cloudflare selectors and add HTML samples 53 | * Install Firefox 94 instead of the latest Nightly 54 | * Update dependencies 55 | * Upgrade Puppeteer (#396) 56 | 57 | ## v2.2.4 (2022/04/17) 58 | 59 | * Detect DDoS-Guard challenge 60 | 61 | ## v2.2.3 (2022/04/16) 62 | 63 | * Fix 2000 ms navigation timeout 64 | * Update README.md (libseccomp2 package in Debian) 65 | * Update README.md (clarify proxy parameter) (#307) 66 | * Update NPM dependencies 67 | * Disable Cloudflare ban detection 68 | 69 | ## v2.2.2 (2022/03/19) 70 | 71 | * Fix ban detection. Resolves #330 (#336) 72 | 73 | ## v2.2.1 (2022/02/06) 74 | 75 | * Fix max timeout error in some pages 76 | * Avoid crashing in NodeJS 17 due to Unhandled promise rejection 77 | * Improve proxy validation and debug traces 78 | * Remove @types/puppeteer dependency 79 | 80 | ## v2.2.0 (2022/01/31) 81 | 82 | * Increase default BROWSER_TIMEOUT=40000 (40 seconds) 83 | * Fix Puppeter deprecation warnings 84 | * Update base Docker image Alpine 3.15 / NodeJS 16 85 | * Build precompiled binaries with NodeJS 16 86 | * Update Puppeter and other dependencies 87 | * Add support for Custom CloudFlare challenge 88 | * Add support for DDoS-GUARD challenge 89 | 90 | ## v2.1.0 (2021/12/12) 91 | 92 | * Add aarch64 to user agents to be replaced (#248) 93 | * Fix SOCKSv4 and SOCKSv5 proxy. resolves #214 #220 94 | * Remove redundant JSON key (postData) (#242) 95 | * Make test URL configurable with TEST_URL env var. resolves #240 96 | * Bypass new Cloudflare protection 97 | * Update donation links 98 | 99 | ## v2.0.2 (2021/10/31) 100 | 101 | * Fix SOCKS5 proxy. Resolves #214 102 | * Replace Firefox ERS with a newer version 103 | * Catch startup exceptions and give some advices 104 | * Add env var BROWSER_TIMEOUT for slow systems 105 | * Fix NPM warning in Docker images 106 | 107 | ## v2.0.1 (2021/10/24) 108 | 109 | * Check user home dir before testing web browser installation 110 | 111 | ## v2.0.0 (2021/10/20) 112 | 113 | FlareSolverr 2.0.0 is out with some important changes: 114 | 115 | * It is capable of solving the automatic challenges of Cloudflare. CAPTCHAs (hCaptcha) cannot be resolved and the old solvers have been removed. 116 | * The Chrome browser has been replaced by Firefox. This has caused some functionality to be removed. Parameters: `userAgent`, `headers`, `rawHtml` and `downloadare` no longer available. 117 | * Included `proxy` support without user/password credentials. If you are writing your own integration with FlareSolverr, make sure your client uses the same User-Agent header and Proxy that FlareSolverr uses. Those values together with the Cookie are checked and detected by Cloudflare. 118 | * FlareSolverr has been rewritten from scratch. From now on it should be easier to maintain and test. 119 | * If you are using Jackett make sure you have version v0.18.1041 or higher. FlareSolverSharp v2.0.0 is out too. 120 | 121 | Complete changelog: 122 | 123 | * Bump version 2.0.0 124 | * Set puppeteer timeout half of maxTimeout param. Resolves #180 125 | * Add test for blocked IP 126 | * Avoid reloading the page in case of error 127 | * Improve Cloudflare detection 128 | * Fix version 129 | * Fix browser preferences and proxy 130 | * Fix request.post method and clean error traces 131 | * Use Firefox ESR for Docker images 132 | * Improve Firefox start time and code clean up 133 | * Improve bad request management and tests 134 | * Build native packages with Firefox 135 | * Update readme 136 | * Improve Docker image and clean TODOs 137 | * Add proxy support 138 | * Implement request.post method for Firefox 139 | * Code clean up, remove returnRawHtml, download, headers params 140 | * Remove outdated chaptcha solvers 141 | * Refactor the app to use Express server and Jest for tests 142 | * Fix Cloudflare resolver for Linux ARM builds 143 | * Fix Cloudflare resolver 144 | * Replace Chrome web browser with Firefox 145 | * Remove userAgent parameter since any modification is detected by CF 146 | * Update dependencies 147 | * Remove Puppeter steath plugin 148 | 149 | ## v1.2.9 (2021/08/01) 150 | 151 | * Improve "Execution context was destroyed" error handling 152 | * Implement returnRawHtml parameter. resolves #172 resolves #165 153 | * Capture Docker stop signal. resolves #158 154 | * Reduce Docker image size 20 MB 155 | * Fix page reload after challenge is solved. resolves #162 resolves #143 156 | * Avoid loading images/css/fonts to speed up page load 157 | * Improve Cloudflare IP ban detection 158 | * Fix vulnerabilities 159 | 160 | ## v1.2.8 (2021/06/01) 161 | 162 | * Improve old JS challenge waiting. Resolves #129 163 | 164 | ## v1.2.7 (2021/06/01) 165 | 166 | * Improvements in Cloudflare redirect detection. Resolves #140 167 | * Fix installation instructions 168 | 169 | ## v1.2.6 (2021/05/30) 170 | 171 | * Handle new Cloudflare challenge. Resolves #135 Resolves #134 172 | * Provide reference Systemd unit file. Resolves #72 173 | * Fix EACCES: permission denied, open '/tmp/flaresolverr.txt'. Resolves #120 174 | * Configure timezone with TZ env var. Resolves #109 175 | * Return the redirected URL in the response (#126) 176 | * Show an error in hcaptcha-solver. Resolves #132 177 | * Regenerate package-lock.json lockfileVersion 2 178 | * Update issue template. Resolves #130 179 | * Bump ws from 7.4.1 to 7.4.6 (#137) 180 | * Bump hosted-git-info from 2.8.8 to 2.8.9 (#124) 181 | * Bump lodash from 4.17.20 to 4.17.21 (#125) 182 | 183 | ## v1.2.5 (2021/04/05) 184 | 185 | * Fix memory regression, close test browser 186 | * Fix release-docker GitHub action 187 | 188 | ## v1.2.4 (2021/04/04) 189 | 190 | * Include license in release zips. resolves #75 191 | * Validate Chrome is working at startup 192 | * Speedup Docker image build 193 | * Add health check endpoint 194 | * Update issue template 195 | * Minor improvements in debug traces 196 | * Validate environment variables at startup. resolves #101 197 | * Add FlareSolverr logo. resolves #23 198 | 199 | ## v1.2.3 (2021/01/10) 200 | 201 | * CI/CD: Generate release changelog from commits. resolves #34 202 | * Update README.md 203 | * Add donation links 204 | * Simplify docker-compose.yml 205 | * Allow to configure "none" captcha resolver 206 | * Override docker-compose.yml variables via .env resolves #64 (#66) 207 | 208 | ## v1.2.2 (2021/01/09) 209 | 210 | * Add documentation for precompiled binaries installation 211 | * Add instructions to set environment variables in Windows 212 | * Build Windows and Linux binaries. resolves #18 213 | * Add release badge in the readme 214 | * CI/CD: Generate release changelog from commits. resolves #34 215 | * Add a notice about captcha solvers 216 | * Add Chrome flag --disable-dev-shm-usage to fix crashes. resolves #45 217 | * Fix Docker CLI documentation 218 | * Add traces with captcha solver service. resolves #39 219 | * Improve logic to detect Cloudflare captcha. resolves #48 220 | * Move Cloudflare provider logic to his own class 221 | * Simplify and document the "return only cookies" parameter 222 | * Show message when debug log is enabled 223 | * Update readme to add more clarifications. resolves #53 (#60) 224 | * issue_template: typo fix (#52) 225 | 226 | ## v1.2.1 (2020/12/20) 227 | 228 | * Change version to match release tag / 1.2.0 => v1.2.0 229 | * CI/CD Publish release in GitHub repository. resolves #34 230 | * Add welcome message in / endpoint 231 | * Rewrite request timeout handling (maxTimeout) resolves #42 232 | * Add http status for better logging 233 | * Return an error when no selectors are found, #25 234 | * Add issue template, fix #32 235 | * Moving log.html right after loading the page and add one on reload, fix #30 236 | * Update User-Agent to match chromium version, ref: #15 (#28) 237 | * Update install from source code documentation 238 | * Update readme to add Docker instructions (#20) 239 | * Clean up readme (#19) 240 | * Add docker-compose 241 | * Change default log level to info 242 | 243 | ## v1.2.0 (2020/12/20) 244 | 245 | * Fix User-Agent detected by CouldFlare (Docker ARM) resolves #15 246 | * Include exception message in error response 247 | * CI/CD: Rename GitHub Action build => publish 248 | * Bump version 249 | * Fix TypeScript compilation and bump minor version 250 | * CI/CD: Bump minor version 251 | * CI/CD: Configure GitHub Actions 252 | * CI/CD: Configure GitHub Actions 253 | * CI/CD: Bump minor version 254 | * CI/CD: Configure Build GitHub Action 255 | * CI/CD: Configure AutoTag GitHub Action (#14) 256 | * CI/CD: Build the Docker images with GitHub Actions (#13) 257 | * Update dependencies 258 | * Backport changes from Cloudproxy (#11) 259 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye as builder 2 | 3 | # Build dummy packages to skip installing them and their dependencies 4 | RUN apt-get update \ 5 | && apt-get install -y --no-install-recommends equivs \ 6 | && equivs-control libgl1-mesa-dri \ 7 | && printf 'Section: misc\nPriority: optional\nStandards-Version: 3.9.2\nPackage: libgl1-mesa-dri\nVersion: 99.0.0\nDescription: Dummy package for libgl1-mesa-dri\n' >> libgl1-mesa-dri \ 8 | && equivs-build libgl1-mesa-dri \ 9 | && mv libgl1-mesa-dri_*.deb /libgl1-mesa-dri.deb \ 10 | && equivs-control adwaita-icon-theme \ 11 | && printf 'Section: misc\nPriority: optional\nStandards-Version: 3.9.2\nPackage: adwaita-icon-theme\nVersion: 99.0.0\nDescription: Dummy package for adwaita-icon-theme\n' >> adwaita-icon-theme \ 12 | && equivs-build adwaita-icon-theme \ 13 | && mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb 14 | 15 | FROM python:3.11-slim-bullseye 16 | 17 | # Copy dummy packages 18 | COPY --from=builder /*.deb / 19 | 20 | # Install dependencies and create flaresolverr user 21 | # You can test Chromium running this command inside the container: 22 | # xvfb-run -s "-screen 0 1600x1200x24" chromium --no-sandbox 23 | # The error traces is like this: "*** stack smashing detected ***: terminated" 24 | # To check the package versions available you can use this command: 25 | # apt-cache madison chromium 26 | WORKDIR /app 27 | # Install dummy packages 28 | RUN dpkg -i /libgl1-mesa-dri.deb \ 29 | && dpkg -i /adwaita-icon-theme.deb \ 30 | # Install dependencies 31 | && apt-get update \ 32 | && apt-get install -y --no-install-recommends chromium chromium-common chromium-driver xvfb dumb-init \ 33 | procps curl vim \ 34 | # Remove temporary files and hardware decoding libraries 35 | && rm -rf /var/lib/apt/lists/* \ 36 | && rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \ 37 | && rm -f /usr/lib/x86_64-linux-gnu/mfx/* \ 38 | # Create flaresolverr user 39 | && useradd --home-dir /app --shell /bin/sh flaresolverr \ 40 | && mv /usr/bin/chromedriver chromedriver \ 41 | && chown -R flaresolverr:flaresolverr . 42 | 43 | # Install Python dependencies 44 | COPY requirements.txt . 45 | RUN pip install -r requirements.txt \ 46 | # Remove temporary files 47 | && rm -rf /root/.cache 48 | 49 | USER flaresolverr 50 | 51 | COPY src . 52 | 53 | EXPOSE 8191 54 | 55 | # dumb-init avoids zombie chromium processes 56 | ENTRYPOINT ["/usr/bin/dumb-init", "--"] 57 | 58 | CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"] 59 | 60 | # Local build 61 | # docker build -t ngosang/flaresolverr:3.0.0 . 62 | # docker run -p 8191:8191 ngosang/flaresolverr:3.0.0 63 | 64 | # Multi-arch build 65 | # docker run --rm --privileged multiarch/qemu-user-static --reset -p yes 66 | # docker buildx create --use 67 | # docker buildx build -t ngosang/flaresolverr:3.0.0 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 . 68 | # add --push to publish in DockerHub 69 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Diego Heras (ngosang / ngosang@hotmail.es) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyCFSolver 2 | 3 | [![Python 3.10](https://img.shields.io/badge/python-3.10-blue.svg)](https://www.python.org/downloads/) 4 | 5 | pyCFSolver is a fork of (FlareSolver) which is a proxy server to bypass Cloudflare and DDoS-GUARD protection. 6 | 7 | ### Features 8 | - [x] All V3 FlareSolverr features 9 | - [x] Proxy Support 10 | - [x] Session support (Ty @furdarius & @Xefir) 11 | 12 | ### TODO 13 | - [ ] Add Docker support 14 | 15 | 16 | ## How it works 17 | 18 | pyCFSolver starts a proxy server, and it waits for user requests in an idle state using few resources. 19 | When some request arrives, it uses [Selenium](https://www.selenium.dev) with the 20 | [undetected-chromedriver](https://github.com/ultrafunkamsterdam/undetected-chromedriver) 21 | to create a web browser (Chrome). It opens the URL with user parameters and waits until the Cloudflare challenge 22 | is solved (or timeout). The HTML code and the cookies are sent back to the user, and those cookies can be used to 23 | bypass Cloudflare using other HTTP clients. 24 | 25 | **NOTE**: Web browsers consume a lot of memory. If you are running pyCFSolver on a machine with few RAM, do not make 26 | many requests at once. With each request a new browser is launched. 27 | 28 | It is also possible to use a permanent session. However, if you use sessions, you should make sure to close them as 29 | soon as you are done using them. 30 | 31 | ## Installation 32 | 33 | ### Docker 34 | 35 | Not supported yet. See manual installation. 36 | 37 | ### Precompiled binaries 38 | 39 | Precompiled binaries are not currently available for v3. Please see https://github.com/FlareSolverr/FlareSolverr/issues/660 for updates, 40 | or below for instructions of how to build pyCFSolver from source code. 41 | 42 | ### From source code 43 | 44 | * Install [Python 3.10](https://www.python.org/downloads/). 45 | * Install [Chrome](https://www.google.com/intl/en_us/chrome/) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) web browser. 46 | * (Only in Linux / macOS) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package. 47 | * Clone this repository and open a shell in that path. 48 | * Run `pip install -r requirements.txt` command to install pyCFSolver dependencies. 49 | * Run `python src/flaresolverr.py` command to start pyCFSolver. 50 | 51 | ### Systemd service 52 | 53 | We provide an example Systemd unit file `flaresolverr.service` as reference. You have to modify the file to suit your needs: paths, user and environment variables. 54 | 55 | ## Usage 56 | 57 | Example request: 58 | ```bash 59 | curl -L -X POST 'http://localhost:8192/v1' \ 60 | -H 'Content-Type: application/json' \ 61 | --data-raw '{ 62 | "cmd": "request.get", 63 | "url":"http://www.google.com/", 64 | "maxTimeout": 60000, 65 | "proxy": {"url": "http://0.0.0.0:8888"} 66 | }' 67 | ``` 68 | 69 | Create a session: 70 | ```bash 71 | curl -L -X POST 'http://localhost:8192/v1' \ 72 | -H 'Content-Type: application/json' \ 73 | --data-raw '{ 74 | "cmd": "sessions.create", 75 | "session": "session_id_1", 76 | "headless": true, 77 | }' 78 | ``` 79 | Use a session: 80 | ```bash 81 | curl -L -X POST 'http://localhost:8192/v1' \ 82 | -H 'Content-Type: application/json' \ 83 | --data-raw '{ 84 | "cmd": "request.get", 85 | "url":"http://www.google.com/", 86 | "maxTimeout": 60000, 87 | "session": "session_id_1", 88 | "session_ttl_minutes": 10, # Time to live in minutes 89 | "proxy": {"url": "http://0.0.0.0:8888"} 90 | }' 91 | ``` 92 | 93 | Destroy a session: 94 | ```bash 95 | curl -L -X POST 'http://localhost:8192/v1' \ 96 | -H 'Content-Type: application/json' \ 97 | --data-raw '{ 98 | "cmd": "sessions.destroy", 99 | "session": "session_id_1", 100 | }' 101 | ``` 102 | 103 | 104 | ### Commands 105 | 106 | #### + `sessions.create` 107 | 108 | This will launch a new browser instance which will retain cookies until you destroy it with `sessions.destroy`. 109 | This comes in handy, so you don't have to keep solving challenges over and over and you won't need to keep sending 110 | cookies for the browser to use. 111 | 112 | This also speeds up the requests since it won't have to launch a new browser instance for every request. 113 | 114 | | Parameter | Notes | 115 | |-----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 116 | | session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. | 117 | | proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. | 118 | 119 | #### + `sessions.list` 120 | 121 | Returns a list of all the active sessions. More for debugging if you are curious to see how many sessions are running. 122 | You should always make sure to properly close each session when you are done using them as too many may slow your 123 | computer down. 124 | 125 | Example response: 126 | 127 | ```json 128 | { 129 | "sessions": [ 130 | "session_id_1", 131 | "session_id_2", 132 | "session_id_3..." 133 | ] 134 | } 135 | ``` 136 | 137 | #### + `sessions.destroy` 138 | 139 | This will properly shut down a browser instance and remove all files associated with it to free up resources for a new 140 | session. When you no longer need to use a session you should make sure to close it. 141 | 142 | | Parameter | Notes | 143 | |-----------|-----------------------------------------------| 144 | | session | The session ID that you want to be destroyed. | 145 | 146 | #### + `request.get` 147 | 148 | | Parameter | Notes | 149 | |-------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 150 | | url | Mandatory | 151 | | session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. | 152 | | maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. | 153 | | cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format. | 154 | | returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. | 155 | | proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) | 156 | 157 | :warning: If you want to use Cloudflare clearance cookie in your scripts, make sure you use the pyCFSolver User-Agent too. If they don't match you will see the challenge. 158 | 159 | Example response from running the `curl` above: 160 | 161 | ```json 162 | { 163 | "solution": { 164 | "url": "https://www.google.com/?gws_rd=ssl", 165 | "status": 200, 166 | "headers": { 167 | "status": "200", 168 | "date": "Thu, 16 Jul 2020 04:15:49 GMT", 169 | "expires": "-1", 170 | "cache-control": "private, max-age=0", 171 | "content-type": "text/html; charset=UTF-8", 172 | "strict-transport-security": "max-age=31536000", 173 | "p3p": "CP=\"This is not a P3P policy! See g.co/p3phelp for more info.\"", 174 | "content-encoding": "br", 175 | "server": "gws", 176 | "content-length": "61587", 177 | "x-xss-protection": "0", 178 | "x-frame-options": "SAMEORIGIN", 179 | "set-cookie": "1P_JAR=2020-07-16-04; expires=Sat..." 180 | }, 181 | "response":"...", 182 | "cookies": [ 183 | { 184 | "name": "NID", 185 | "value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57...", 186 | "domain": ".google.com", 187 | "path": "/", 188 | "expires": 1610684149.307722, 189 | "size": 178, 190 | "httpOnly": true, 191 | "secure": true, 192 | "session": false, 193 | "sameSite": "None" 194 | }, 195 | { 196 | "name": "1P_JAR", 197 | "value": "2020-07-16-04", 198 | "domain": ".google.com", 199 | "path": "/", 200 | "expires": 1597464949.307626, 201 | "size": 19, 202 | "httpOnly": false, 203 | "secure": true, 204 | "session": false, 205 | "sameSite": "None" 206 | } 207 | ], 208 | "userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/5..." 209 | }, 210 | "status": "ok", 211 | "message": "", 212 | "startTimestamp": 1594872947467, 213 | "endTimestamp": 1594872949617, 214 | "version": "1.0.0" 215 | } 216 | ``` 217 | 218 | ### + `request.post` 219 | 220 | This is the same as `request.get` but it takes one more param: 221 | 222 | | Parameter | Notes | 223 | |-----------|--------------------------------------------------------------------------| 224 | | postData | Must be a string with `application/x-www-form-urlencoded`. Eg: `a=b&c=d` | 225 | 226 | ## Environment variables 227 | 228 | | Name | Default | Notes | 229 | |-----------------|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| 230 | | LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. | 231 | | LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. | 232 | | CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. | 233 | | TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. | 234 | | HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. | 235 | | BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. | 236 | | TEST_URL | https://www.google.com | pyCFSolver makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. | 237 | | PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. | 238 | | HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. | 239 | 240 | Environment variables are set differently depending on the operating system. Some examples: 241 | * Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command. 242 | * Linux: Run `export LOG_LEVEL=debug` and then start pyCFSolver in the same shell. 243 | * Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start pyCFSolver in the same shell. 244 | 245 | ## Captcha Solvers 246 | 247 | :warning: At this time none of the captcha solvers work. You can check the status in the open issues. Any help is welcome. 248 | 249 | Sometimes CloudFlare not only gives mathematical computations and browser tests, sometimes they also require the user to 250 | solve a captcha. 251 | If this is the case, pyCFSolver will return the error `Captcha detected but no automatic solver is configured.` 252 | 253 | pyCFSolver can be customized to solve the captcha automatically by setting the environment variable `CAPTCHA_SOLVER` 254 | to the file name of one of the adapters inside the [/captcha](src/captcha) directory. 255 | 256 | ## Related projects 257 | 258 | * C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp 259 | -------------------------------------------------------------------------------- /UPDATE_FILE: -------------------------------------------------------------------------------- 1 | project:PyCFSolver 2 | version:1.0.9 3 | overwrite:src/__init__.py 4 | overwrite:src/flaresolverr.py 5 | overwrite:src/flaresolverr_service.py 6 | overwrite:src/sessions.py 7 | overwrite:src/utils.py 8 | overwrite:src/dtos.py 9 | overwrite:src/undetected_chromedriver/patcher.py -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: "2.1" 3 | services: 4 | flaresolverr: 5 | # DockerHub mirror flaresolverr/flaresolverr:latest 6 | image: ghcr.io/flaresolverr/flaresolverr:latest 7 | container_name: flaresolverr 8 | environment: 9 | - LOG_LEVEL=${LOG_LEVEL:-info} 10 | - LOG_HTML=${LOG_HTML:-false} 11 | - CAPTCHA_SOLVER=${CAPTCHA_SOLVER:-none} 12 | - TZ=Europe/London 13 | ports: 14 | - "${PORT:-8191}:8191" 15 | restart: unless-stopped 16 | -------------------------------------------------------------------------------- /html_samples/cloudflare_captcha_hcaptcha_v1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Just a moment... 6 | 7 | 8 | 9 | 10 | 11 | 12 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
49 | 51 | Privacy Pass 52 | 53 |
54 |
55 |
56 |
57 | 58 |
59 |
60 |

61 | 63 | 0MAGNET.COM 64 |

65 |

66 | Checking if the site connection is secure 67 |

68 |
69 |
70 | 76 |
82 |
83 |
84 | 92 | 104 |
106 |
107 |
108 | 0magnet.com needs to review the security of your connection before 109 | proceeding. 110 |
111 | 116 | 128 | 134 |
137 | 139 | 141 | 142 | error code: 143 | 1020 144 |
145 |
146 |
147 | 167 | 168 | 179 | 180 | 181 | 199 | 217 | 218 | 219 | -------------------------------------------------------------------------------- /html_samples/cloudflare_captcha_norobot_v1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Just a moment... 6 | 7 | 8 | 9 | 10 | 11 | 12 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
49 | 51 | Privacy Pass 52 | 53 |
54 |
55 |
56 |
57 | 58 |
59 |
60 |

61 | 63 | 0MAGNET.COM 64 |

65 |

66 | Checking if the site connection is secure 67 |

68 |
69 |
71 |
72 | 80 | 92 |
94 |
95 |
96 | 0magnet.com needs to review the security of your connection before 97 | proceeding. 98 |
99 | 103 | 115 | 121 |
124 | 126 | 128 | 129 | 131 |
132 |
133 |
134 | 154 | 155 | 166 | 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /html_samples/cloudflare_init_v1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Just a moment... 6 | 7 | 8 | 9 | 10 | 11 | 12 | 41 | 42 | 43 | 44 | 45 | 46 |
47 |
48 |

49 | 51 | 0MAGNET.COM 52 |

53 |

54 | Checking if the site connection is secure 55 |

56 | 68 |
70 |
71 |
72 | 0magnet.com needs to review the security of your connection before 73 | proceeding. 74 |
75 |
78 | 80 | 82 | 83 |
84 |
85 |
86 | 106 | 107 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /html_samples/cloudflare_spinner_v1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Just a moment... 5 | 6 | 7 | 8 | 9 | 10 | 11 | 40 | 41 | 42 | 44 | 45 | 46 | 47 | 48 |
49 | 52 | Privacy Pass 53 | 54 |
55 |
56 |
57 |
58 | 59 |
60 |
61 |

62 | 64 | 0MAGNET.COM 65 |

66 |

67 | Checking if the site connection is secure 68 |

69 | 70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 | 90 |
92 |
93 |
94 | 0magnet.com needs to review the security of your connection before 95 | proceeding. 96 |
97 | 101 | 112 | 118 |
121 | 123 | 125 | 126 | error code: 127 | 1020 128 |
129 |
130 |
131 | 151 | 152 | 163 | 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "flaresolverr", 3 | "version": "3.0.2", 4 | "description": "Proxy server to bypass Cloudflare protection", 5 | "author": "Diego Heras (ngosang / ngosang@hotmail.es)", 6 | "license": "MIT" 7 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bottle==0.12.23 2 | waitress==2.1.2 3 | selenium==4.7.2 4 | func-timeout==4.3.5 5 | # required by undetected_chromedriver 6 | requests==2.28.1 7 | websockets==10.4 8 | # only required for linux 9 | xvfbwrapper==0.2.9 10 | -------------------------------------------------------------------------------- /resources/flaresolverr_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rawandahmad698/pyCFSolver/b425a5de0945006a95c80c6f5479f14a24990587/resources/flaresolverr_logo.png -------------------------------------------------------------------------------- /resources/flaresolverr_logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 22 | 24 | 44 | 46 | 47 | 49 | image/svg+xml 50 | 52 | 53 | 54 | 55 | 56 | 61 | 64 | 66 | 68 | 70 | 72 | 74 | 76 | 78 | 80 | 82 | 84 | 86 | 88 | 90 | 92 | 94 | 97 | 99 | 102 | 107 | 112 | 113 | 114 | 115 | 119 | 122 | 127 | 132 | 133 | 136 | 139 | 142 | 145 | 148 | 151 | 154 | 157 | 160 | 163 | 166 | 169 | 172 | 175 | 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /src/bottle_plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rawandahmad698/pyCFSolver/b425a5de0945006a95c80c6f5479f14a24990587/src/bottle_plugins/__init__.py -------------------------------------------------------------------------------- /src/bottle_plugins/error_plugin.py: -------------------------------------------------------------------------------- 1 | from bottle import response 2 | import logging 3 | 4 | 5 | def error_plugin(callback): 6 | """ 7 | Bottle plugin to handle exceptions 8 | https://stackoverflow.com/a/32764250 9 | """ 10 | 11 | def wrapper(*args, **kwargs): 12 | try: 13 | actual_response = callback(*args, **kwargs) 14 | except Exception as e: 15 | logging.error(str(e)) 16 | actual_response = { 17 | "error": str(e) 18 | } 19 | response.status = 500 20 | return actual_response 21 | 22 | return wrapper 23 | -------------------------------------------------------------------------------- /src/bottle_plugins/logger_plugin.py: -------------------------------------------------------------------------------- 1 | from bottle import request, response 2 | import logging 3 | 4 | 5 | def logger_plugin(callback): 6 | """ 7 | Bottle plugin to use logging module 8 | http://bottlepy.org/docs/dev/plugindev.html 9 | 10 | Wrap a Bottle request so that a log line is emitted after it's handled. 11 | (This decorator can be extended to take the desired logger as a param.) 12 | """ 13 | 14 | def wrapper(*args, **kwargs): 15 | actual_response = callback(*args, **kwargs) 16 | if not request.url.endswith("/health"): 17 | logging.info('%s %s %s %s' % (request.remote_addr, 18 | request.method, 19 | request.url, 20 | response.status)) 21 | return actual_response 22 | 23 | return wrapper 24 | -------------------------------------------------------------------------------- /src/dtos.py: -------------------------------------------------------------------------------- 1 | 2 | STATUS_OK = "ok" 3 | STATUS_ERROR = "error" 4 | 5 | 6 | class ChallengeResolutionResultT: 7 | url: str = None 8 | status: int = None 9 | headers: list = None 10 | response: str = None 11 | cookies: list = None 12 | userAgent: str = None 13 | 14 | def __init__(self, _dict): 15 | self.__dict__.update(_dict) 16 | 17 | 18 | class ChallengeResolutionT: 19 | status: str = None 20 | message: str = None 21 | result: ChallengeResolutionResultT = None 22 | 23 | def __init__(self, _dict): 24 | self.__dict__.update(_dict) 25 | if self.result is not None: 26 | self.result = ChallengeResolutionResultT(self.result) 27 | 28 | 29 | class V1RequestBase(object): 30 | # V1RequestBase 31 | cmd: str = None 32 | cookies: list = None 33 | maxTimeout: int = None 34 | proxy: dict = None 35 | session_ttl_minutes: int = None 36 | headless: bool = None 37 | delay: int = None 38 | beta_args: bool = None 39 | session: str = None 40 | headers: list = None # deprecated v2.0.0, not used 41 | userAgent: str = None # deprecated v2.0.0, not used 42 | 43 | # V1Request 44 | url: str = None 45 | postData: str = None 46 | returnOnlyCookies: bool = None 47 | download: bool = None # deprecated v2.0.0, not used 48 | returnRawHtml: bool = None # deprecated v2.0.0, not used 49 | 50 | def __init__(self, _dict): 51 | self.__dict__.update(_dict) 52 | 53 | 54 | class V1ResponseBase(object): 55 | # V1ResponseBase 56 | status: str = None 57 | message: str = None 58 | startTimestamp: int = None 59 | endTimestamp: int = None 60 | version: str = None 61 | 62 | # V1ResponseSolution 63 | solution: ChallengeResolutionResultT = None 64 | 65 | # hidden vars 66 | __error_500__: bool = False 67 | 68 | def __init__(self, _dict): 69 | self.__dict__.update(_dict) 70 | if self.solution is not None: 71 | self.solution = ChallengeResolutionResultT(self.solution) 72 | 73 | 74 | class IndexResponse(object): 75 | msg: str = None 76 | version: str = None 77 | userAgent: str = None 78 | 79 | def __init__(self, _dict): 80 | self.__dict__.update(_dict) 81 | 82 | 83 | class HealthResponse(object): 84 | status: str = None 85 | 86 | def __init__(self, _dict): 87 | self.__dict__.update(_dict) 88 | -------------------------------------------------------------------------------- /src/flaresolverr.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import sys 5 | 6 | from bottle import run, response, Bottle, request 7 | 8 | from bottle_plugins.error_plugin import error_plugin 9 | from bottle_plugins.logger_plugin import logger_plugin 10 | from dtos import IndexResponse, V1RequestBase 11 | import flaresolverr_service 12 | import utils 13 | 14 | envi = "prod" 15 | 16 | 17 | class JSONErrorBottle(Bottle): 18 | """ 19 | Handle 404 errors 20 | """ 21 | def default_error_handler(self, res): 22 | response.content_type = 'application/json' 23 | return json.dumps(dict(error=res.body, status_code=res.status_code)) 24 | 25 | 26 | app = JSONErrorBottle() 27 | 28 | # plugin order is important 29 | app.install(logger_plugin) 30 | app.install(error_plugin) 31 | 32 | 33 | @app.route('/') 34 | def index(): 35 | """ 36 | Show welcome message 37 | """ 38 | res = flaresolverr_service.index_endpoint() 39 | return utils.object_to_dict(res) 40 | 41 | 42 | @app.route('/health') 43 | def health(): 44 | """ 45 | Healthcheck endpoint. 46 | This endpoint is special because it doesn't print traces 47 | """ 48 | res = flaresolverr_service.health_endpoint() 49 | return utils.object_to_dict(res) 50 | 51 | 52 | @app.post('/v1') 53 | def controller_v1(): 54 | """ 55 | Controller v1 56 | """ 57 | req = V1RequestBase(request.json) 58 | res = flaresolverr_service.controller_v1_endpoint(req) 59 | if res.__error_500__: 60 | response.status = 500 61 | return utils.object_to_dict(res) 62 | 63 | 64 | if __name__ == "__main__": 65 | # validate configuration 66 | log_level = os.environ.get('LOG_LEVEL', 'info').upper() 67 | log_html = utils.get_config_log_html() 68 | headless = utils.get_config_headless() 69 | server_host = os.environ.get('HOST', '0.0.0.0') 70 | port = int(os.environ.get('PORT', 8191)) if envi == "dev" else 8192 71 | server_port = port 72 | 73 | # configure logger 74 | logger_format = '%(asctime)s %(levelname)-8s %(message)s' 75 | if log_level == 'DEBUG': 76 | logger_format = '%(asctime)s %(levelname)-8s ReqId %(thread)s %(message)s' 77 | 78 | logging.basicConfig( 79 | format=logger_format, 80 | level=log_level, 81 | datefmt='%Y-%m-%d %H:%M:%S', 82 | handlers=[ 83 | logging.StreamHandler(sys.stdout) 84 | ] 85 | ) 86 | # disable warning traces from urllib3 87 | logging.getLogger('urllib3').setLevel(logging.ERROR) 88 | logging.getLogger('selenium.webdriver.remote.remote_connection').setLevel(logging.WARNING) 89 | logging.getLogger('undetected_chromedriver').setLevel(logging.WARNING) 90 | 91 | logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}') 92 | logging.debug('Debug log enabled') 93 | 94 | # test browser installation 95 | flaresolverr_service.test_browser_installation() 96 | 97 | # start webserver 98 | # default server 'wsgiref' does not support concurrent requests 99 | run(app, host=server_host, port=server_port, quiet=True, server='waitress') 100 | -------------------------------------------------------------------------------- /src/flaresolverr_service.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import platform 3 | import sys 4 | import time 5 | import traceback 6 | from datetime import timedelta 7 | from urllib.parse import unquote 8 | 9 | from func_timeout import FunctionTimedOut, func_timeout 10 | # Import timeout exception from selenium 11 | from selenium.common.exceptions import TimeoutException 12 | from selenium.webdriver.chrome.webdriver import WebDriver 13 | from selenium.webdriver.common.by import By 14 | from selenium.webdriver.support.expected_conditions import ( 15 | presence_of_element_located, staleness_of, title_is) 16 | from selenium.webdriver.common.action_chains import ActionChains 17 | from selenium.webdriver.support.wait import WebDriverWait 18 | 19 | import utils 20 | from dtos import (STATUS_ERROR, STATUS_OK, ChallengeResolutionResultT, 21 | ChallengeResolutionT, HealthResponse, IndexResponse, 22 | V1RequestBase, V1ResponseBase) 23 | 24 | from flaresolverr import envi 25 | 26 | from sessions import SessionsStorage 27 | 28 | ACCESS_DENIED_TITLES = [ 29 | # Cloudflare 30 | 'Access denied', 31 | # Cloudflare http://bitturk.net/ Firefox 32 | 'Attention Required! | Cloudflare' 33 | ] 34 | ACCESS_DENIED_SELECTORS = [ 35 | # Cloudflare 36 | 'div.cf-error-title span.cf-code-label span', 37 | # Cloudflare http://bitturk.net/ Firefox 38 | '#cf-error-details div.cf-error-overview h1' 39 | ] 40 | CHALLENGE_TITLES = [ 41 | # Cloudflare 42 | 'Just a moment...', 43 | # DDoS-GUARD 44 | 'DDoS-Guard' 45 | ] 46 | CHALLENGE_SELECTORS = [ 47 | # Cloudflare 48 | '#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', 49 | # Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands 50 | 'td.info #js_info', 51 | # Fairlane / pararius.com 52 | 'div.vc div.text-box h2' 53 | ] 54 | SHORT_TIMEOUT = 10 55 | SESSIONS_STORAGE = SessionsStorage() 56 | 57 | 58 | def test_browser_installation(): 59 | logging.info("Testing web browser installation...") 60 | logging.info("Platform: " + platform.platform()) 61 | 62 | chrome_exe_path = utils.get_chrome_exe_path() 63 | if chrome_exe_path is None: 64 | logging.error("Chrome / Chromium web browser not installed!") 65 | sys.exit(1) 66 | else: 67 | logging.info("Chrome / Chromium path: " + chrome_exe_path) 68 | 69 | chrome_major_version = utils.get_chrome_major_version() 70 | if chrome_major_version == '': 71 | logging.error("Chrome / Chromium version not detected!") 72 | sys.exit(1) 73 | else: 74 | logging.info("Chrome / Chromium major version: " + chrome_major_version) 75 | 76 | logging.info("Launching web browser...") 77 | user_agent = utils.get_user_agent() 78 | logging.info("FlareSolverr User-Agent: " + user_agent) 79 | logging.info("Test successful!") 80 | 81 | 82 | def index_endpoint() -> IndexResponse: 83 | res = IndexResponse({}) 84 | res.msg = "FlareSolverr is ready!" 85 | res.version = utils.get_flaresolverr_version() 86 | res.userAgent = utils.get_user_agent() 87 | return res 88 | 89 | 90 | def health_endpoint() -> HealthResponse: 91 | res = HealthResponse({}) 92 | res.status = STATUS_OK 93 | return res 94 | 95 | 96 | def controller_v1_endpoint(req: V1RequestBase) -> V1ResponseBase: 97 | start_ts = int(time.time() * 1000) 98 | logging.info(f"Incoming request => POST /v1 body: {utils.object_to_dict(req)}") 99 | res: V1ResponseBase 100 | 101 | if envi == "dev": 102 | req.headless = True 103 | 104 | try: 105 | res = _controller_v1_handler(req) 106 | except Exception as e: 107 | res = V1ResponseBase({}) 108 | res.__error_500__ = True 109 | res.status = STATUS_ERROR 110 | res.message = "X=Error: " + str(e) 111 | logging.error(res.message) 112 | # Get the traceback and log it 113 | tb = traceback.format_exc() 114 | print(tb) 115 | 116 | res.startTimestamp = start_ts 117 | res.endTimestamp = int(time.time() * 1000) 118 | res.version = utils.get_flaresolverr_version() 119 | logging.debug(f"Response => POST /v1 body: {utils.object_to_dict(res)}") 120 | logging.info(f"Response in {(res.endTimestamp - res.startTimestamp) / 1000} s") 121 | return res 122 | 123 | 124 | def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase: 125 | # do some validations 126 | if req.cmd is None: 127 | raise Exception("Request parameter 'cmd' is mandatory.") 128 | if req.headers is not None: 129 | logging.warning("Request parameter 'headers' was removed in FlareSolverr v2.") 130 | if req.userAgent is not None: 131 | logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.") 132 | 133 | # set default values 134 | if req.maxTimeout is None or req.maxTimeout < 1: 135 | req.maxTimeout = 60000 136 | 137 | # execute the command 138 | res: V1ResponseBase 139 | if req.cmd == 'sessions.create': 140 | res = _cmd_sessions_create(req) 141 | elif req.cmd == 'sessions.list': 142 | res = _cmd_sessions_list(req) 143 | elif req.cmd == 'sessions.destroy': 144 | res = _cmd_sessions_destroy(req) 145 | elif req.cmd == 'request.get': 146 | res = _cmd_request_get(req) 147 | elif req.cmd == 'request.post': 148 | res = _cmd_request_post(req) 149 | else: 150 | raise Exception(f"Request parameter 'cmd' = '{req.cmd}' is invalid.") 151 | 152 | return res 153 | 154 | 155 | def _cmd_request_get(req: V1RequestBase) -> V1ResponseBase: 156 | # do some validations 157 | if req.url is None: 158 | raise Exception("Request parameter 'url' is mandatory in 'request.get' command.") 159 | if req.postData is not None: 160 | raise Exception("Cannot use 'postBody' when sending a GET request.") 161 | if req.returnRawHtml is not None: 162 | logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.") 163 | if req.download is not None: 164 | logging.warning("Request parameter 'download' was removed in FlareSolverr v2.") 165 | 166 | if req.headless: 167 | logging.info("Headless mode is enabled.") 168 | 169 | challenge_res = _resolve_challenge(req, 'GET') 170 | if challenge_res is None: 171 | res = V1ResponseBase({}) 172 | res.status = STATUS_ERROR 173 | res.message = "An error occurred while resolving the challenge." 174 | return res 175 | 176 | res = V1ResponseBase({}) 177 | res.status = challenge_res.status 178 | res.message = challenge_res.message 179 | res.solution = challenge_res.result 180 | return res 181 | 182 | 183 | def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase: 184 | # do some validations 185 | if req.postData is None: 186 | raise Exception("Request parameter 'postData' is mandatory in 'request.post' command.") 187 | if req.returnRawHtml is not None: 188 | logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.") 189 | if req.download is not None: 190 | logging.warning("Request parameter 'download' was removed in FlareSolverr v2.") 191 | 192 | challenge_res = _resolve_challenge(req, 'POST') 193 | res = V1ResponseBase({}) 194 | res.status = challenge_res.status 195 | res.message = challenge_res.message 196 | res.solution = challenge_res.result 197 | return res 198 | 199 | 200 | def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase: 201 | logging.debug("Creating new session...") 202 | session, fresh = SESSIONS_STORAGE.create(req=req, session_id=req.session) 203 | session_id = session.session_id 204 | 205 | if not fresh: 206 | return V1ResponseBase({ 207 | "status": STATUS_OK, 208 | "message": "Session already exists.", 209 | "session": session_id 210 | }) 211 | 212 | return V1ResponseBase({ 213 | "status": STATUS_OK, 214 | "message": "Session created successfully.", 215 | "session": session_id 216 | }) 217 | 218 | 219 | def _cmd_sessions_list(req: V1RequestBase) -> V1ResponseBase: 220 | session_ids = SESSIONS_STORAGE.session_ids() 221 | 222 | return V1ResponseBase({ 223 | "status": STATUS_OK, 224 | "message": "", 225 | "sessions": session_ids 226 | }) 227 | 228 | 229 | def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase: 230 | try: 231 | session_id = req.session 232 | except Exception as e: 233 | print("Error: " + str(e)) 234 | return V1ResponseBase({ 235 | "status": STATUS_ERROR, 236 | "message": str(e) 237 | }) 238 | 239 | existed = SESSIONS_STORAGE.destroy(session_id) 240 | 241 | if not existed: 242 | raise Exception("The session doesn't exist.") 243 | 244 | logging.info(f"Session destroyed (session_id={session_id})") 245 | 246 | return V1ResponseBase({ 247 | "status": STATUS_OK, 248 | "message": "The session has been removed." 249 | }) 250 | 251 | 252 | def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT: 253 | timeout = req.maxTimeout / 1000 254 | driver = None 255 | try: 256 | if req.session: 257 | session_id = req.session 258 | ttl = timedelta(minutes=req.session_ttl_minutes) if req.session_ttl_minutes else None 259 | logging.debug(f"Trying to get session (session_id={session_id}, ttl={str(ttl)})") 260 | isb = session_id in SESSIONS_STORAGE.is_being_created 261 | if isb: 262 | # If you want to not wait for the session to be created, uncomment the following lines 263 | # logging.error(f"Session is being created (session_id={session_id})") 264 | # res = ChallengeResolutionT({}) 265 | # res.status = STATUS_ERROR 266 | # res.message = "Session is being created...." 267 | # return res 268 | logging.info(f"Waiting for session to be created (session_id={session_id})") 269 | 270 | session, fresh = SESSIONS_STORAGE.get(session_id, ttl, req=req) 271 | if fresh: 272 | logging.info(f"new session created to perform the request (session_id={session_id})") 273 | else: 274 | logging.info(f"existing session is used to perform the request (session_id={session_id}, " 275 | f"lifetime={str(session.lifetime())}, ttl={str(ttl)})") 276 | 277 | driver = session.driver 278 | else: 279 | driver = utils.get_webdriver(req=req) 280 | logging.debug('New instance of webdriver has been created to perform the request') 281 | return func_timeout(timeout, _evil_logic, (req, driver, method)) 282 | except FunctionTimedOut: 283 | raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.') 284 | except Exception as e: 285 | # Get error line number 286 | tb = traceback.format_exc() 287 | line_number = tb.split('File')[1].split(',')[1].split(')')[0] 288 | print(f'Error on line {line_number} in file {__file__}') 289 | # raise Exception('Error solving the challenge. ' + str(e)) 290 | print('Error solving the challenge. ' + str(e)) 291 | print(f'Traceback: {tb}') 292 | finally: 293 | if not req.session: 294 | if driver: 295 | driver.quit() 296 | else: 297 | logging.debug('No instance of webdriver has been created to perform the request') 298 | logging.debug('A used instance of webdriver has been destroyed') 299 | 300 | 301 | def click_verify(driver: WebDriver): 302 | try: 303 | logging.debug("Try to find the Cloudflare verify checkbox") 304 | iframe = driver.find_element(By.XPATH, "//iframe[@title='Widget containing a Cloudflare security challenge']") 305 | driver.switch_to.frame(iframe) 306 | checkbox = driver.find_element( 307 | by=By.XPATH, 308 | value='//*[@id="challenge-stage"]/div/label/input', 309 | ) 310 | if checkbox: 311 | actions = ActionChains(driver) 312 | actions.move_to_element_with_offset(checkbox, 5, 7) 313 | actions.click(checkbox) 314 | actions.perform() 315 | logging.debug("Cloudflare verify checkbox found and clicked") 316 | except Exception as e: 317 | logging.exception(e) 318 | logging.debug("Cloudflare verify checkbox not found on the page") 319 | finally: 320 | driver.switch_to.default_content() 321 | 322 | try: 323 | logging.debug("Try to find the Cloudflare 'Verify you are human' button") 324 | button = driver.find_element( 325 | by=By.XPATH, 326 | value="//input[@type='button' and @value='Verify you are human']", 327 | ) 328 | if button: 329 | actions = ActionChains(driver) 330 | actions.move_to_element_with_offset(button, 5, 7) 331 | actions.click(button) 332 | actions.perform() 333 | logging.debug("The Cloudflare 'Verify you are human' button found and clicked") 334 | except Exception as e: 335 | logging.debug("The Cloudflare 'Verify you are human' button not found on the page") 336 | # print(e) 337 | 338 | time.sleep(2) 339 | 340 | 341 | def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT: 342 | res = ChallengeResolutionT({}) 343 | res.status = STATUS_OK 344 | res.message = "" 345 | 346 | # navigate to the page 347 | logging.debug(f'Navigating to... {req.url}') 348 | if method == 'POST': 349 | _post_request(req, driver) 350 | else: 351 | driver.get(req.url) 352 | if utils.get_config_log_html(): 353 | logging.debug(f"Response HTML:\n{driver.page_source}") 354 | 355 | if req.delay: 356 | if isinstance(req.delay, int): 357 | time.sleep(req.delay) 358 | else: 359 | time.sleep(int(req.delay)) 360 | 361 | # wait for the page 362 | html_element = driver.find_element(By.TAG_NAME, "html") 363 | page_title = driver.title 364 | 365 | # find access denied titles 366 | for title in ACCESS_DENIED_TITLES: 367 | if title == page_title: 368 | logging.info("Access denied detected. Refreshing page...") 369 | # refresh page 370 | driver.refresh() 371 | time.sleep(3) 372 | page_title = driver.title 373 | if title == page_title: 374 | raise Exception('Cloudflare has blocked this request. ' 375 | 'Probably your IP is banned for this site, check in your web browser.') 376 | 377 | # find access denied selectors 378 | for selector in ACCESS_DENIED_SELECTORS: 379 | found_elements = driver.find_elements(By.CSS_SELECTOR, selector) 380 | if len(found_elements) > 0: 381 | raise Exception('Cloudflare has blocked this request. ' 382 | 'Probably your IP is banned for this site, check in your web browser.') 383 | 384 | # find challenge by title 385 | challenge_found = False 386 | for title in CHALLENGE_TITLES: 387 | if len(page_title) == 0: 388 | challenge_found = True 389 | logging.info("Challenge detected. Title is empty") 390 | break 391 | if title.lower() == page_title.lower(): 392 | challenge_found = True 393 | logging.info("Challenge detected. Title found: " + page_title) 394 | break 395 | 396 | if not challenge_found: 397 | # find challenge by selectors 398 | for selector in CHALLENGE_SELECTORS: 399 | found_elements = driver.find_elements(By.CSS_SELECTOR, selector) 400 | if len(found_elements) > 0: 401 | challenge_found = True 402 | logging.info("Challenge detected. Selector found: " + selector) 403 | break 404 | 405 | attempt = 0 406 | if challenge_found: 407 | while True: 408 | try: 409 | attempt = attempt + 1 410 | # wait until the title changes 411 | for title in CHALLENGE_TITLES: 412 | logging.debug("Waiting for title (attempt " + str(attempt) + "): " + title) 413 | WebDriverWait(driver, SHORT_TIMEOUT).until_not(title_is(title)) 414 | 415 | # then wait until all the selectors disappear 416 | for selector in CHALLENGE_SELECTORS: 417 | logging.debug("Waiting for selector (attempt " + str(attempt) + "): " + selector) 418 | WebDriverWait(driver, SHORT_TIMEOUT).until_not( 419 | presence_of_element_located((By.CSS_SELECTOR, selector))) 420 | 421 | # all elements not found 422 | break 423 | 424 | except TimeoutException: 425 | logging.debug("Timeout waiting for selector") 426 | 427 | click_verify(driver) 428 | 429 | # update the html (cloudflare reloads the page every 5 s) 430 | html_element = driver.find_element(By.TAG_NAME, "html") 431 | 432 | # waits until cloudflare redirection ends 433 | logging.debug("Waiting for redirect") 434 | # noinspection PyBroadException 435 | try: 436 | WebDriverWait(driver, SHORT_TIMEOUT).until(staleness_of(html_element)) 437 | except Exception: 438 | logging.debug("Timeout waiting for redirect") 439 | 440 | logging.info("Challenge solved!") 441 | res.message = "Challenge solved!" 442 | else: 443 | logging.info("Challenge not detected!") 444 | res.message = "Challenge not detected!" 445 | 446 | challenge_res = ChallengeResolutionResultT({}) 447 | challenge_res.url = driver.current_url 448 | challenge_res.status = 200 # todo: fix, selenium not provides this info 449 | challenge_res.cookies = driver.get_cookies() 450 | challenge_res.userAgent = utils.get_user_agent(driver) 451 | 452 | if not req.returnOnlyCookies: 453 | challenge_res.headers = {} # todo: fix, selenium not provides this info 454 | challenge_res.response = driver.page_source 455 | 456 | res.result = challenge_res 457 | return res 458 | 459 | 460 | def _post_request(req: V1RequestBase, driver: WebDriver): 461 | post_form = f'
' 462 | query_string = req.postData if req.postData[0] != '?' else req.postData[1:] 463 | pairs = query_string.split('&') 464 | for pair in pairs: 465 | parts = pair.split('=') 466 | # noinspection PyBroadException 467 | try: 468 | name = unquote(parts[0]) 469 | except Exception: 470 | name = parts[0] 471 | if name == 'submit': 472 | continue 473 | # noinspection PyBroadException 474 | try: 475 | value = unquote(parts[1]) 476 | except Exception: 477 | value = parts[1] 478 | post_form += f'
' 479 | post_form += '
' 480 | html_content = f""" 481 | 482 | 483 | 484 | {post_form} 485 | 486 | 487 | """ 488 | driver.get("data:text/html;charset=utf-8," + html_content) 489 | -------------------------------------------------------------------------------- /src/sessions.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from dataclasses import dataclass 3 | from datetime import datetime, timedelta 4 | from typing import Optional, Tuple 5 | from uuid import uuid1 6 | 7 | from selenium.webdriver.chrome.webdriver import WebDriver 8 | 9 | import utils 10 | from dtos import V1RequestBase 11 | 12 | 13 | @dataclass 14 | class Session: 15 | session_id: str 16 | driver: WebDriver 17 | created_at: datetime 18 | 19 | def lifetime(self) -> timedelta: 20 | return datetime.now() - self.created_at 21 | 22 | 23 | class SessionsStorage: 24 | """SessionsStorage creates, stores and process all the sessions""" 25 | 26 | def __init__(self): 27 | # Self.sessions is a set of dictionaries with the following structure: 28 | self.sessions = {} 29 | self.drivers = {} 30 | self.real_sessions = [] 31 | self.is_being_created = [] 32 | 33 | def create(self, req: V1RequestBase = None, session_id: Optional[str] = None, force_new: Optional[bool] = False) -> Tuple[Session, bool]: 34 | """create creates new instance of WebDriver if necessary, 35 | assign defined (or newly generated) session_id to the instance 36 | and returns the session object. If a new session has been created 37 | second argument is set to True. 38 | 39 | Note: The function is idempotent, so in case if session_id 40 | already exists in the storage a new instance of WebDriver won't be created 41 | and existing session will be returned. Second argument defines if 42 | new session has been created (True) or an existing one was used (False). 43 | """ 44 | session_id = session_id or str(uuid1()) 45 | 46 | if force_new: 47 | self.destroy(session_id) 48 | 49 | if self.exists(session_id): 50 | return self.sessions[session_id], False 51 | 52 | # Try to add it to the real sessions 53 | if session_id not in self.real_sessions: 54 | self.real_sessions.append(session_id) 55 | self.is_being_created.append(session_id) 56 | else: 57 | if session_id in self.is_being_created: 58 | # Wait for it to be created 59 | while session_id in self.is_being_created: 60 | pass 61 | 62 | if session_id in self.sessions: 63 | return self.sessions[session_id], False 64 | else: 65 | self.is_being_created.append(session_id) 66 | 67 | if req is not None: 68 | driver = utils.get_webdriver(req) 69 | else: 70 | driver = utils.get_webdriver() 71 | 72 | created_at = datetime.now() 73 | session = Session(session_id, driver, created_at) 74 | 75 | self.sessions[session_id] = session 76 | self.drivers[session_id] = driver 77 | 78 | self.is_being_created.remove(session_id) 79 | 80 | return session, True 81 | 82 | def exists(self, session_id: str) -> bool: 83 | return session_id in self.sessions 84 | 85 | def destroy(self, session_id: str) -> bool: 86 | """destroy closes the driver instance and removes session from the storage. 87 | The function is noop if session_id doesn't exist. 88 | The function returns True if session was found and destroyed, 89 | and False if session_id wasn't found. 90 | """ 91 | if not self.exists(session_id): 92 | return False 93 | 94 | self.sessions.pop(session_id) 95 | 96 | # Check if session_id is in the drivers dict 97 | if session_id in self.drivers: 98 | driver = self.drivers.pop(session_id) 99 | driver.quit() 100 | del driver 101 | 102 | if session_id in self.real_sessions: 103 | self.real_sessions.remove(session_id) 104 | 105 | return True 106 | 107 | def get(self, session_id: str, ttl: Optional[timedelta] = None, req: V1RequestBase = None) -> Tuple[Session, bool]: 108 | session, fresh = self.create(session_id=session_id) 109 | 110 | if ttl is not None and not fresh and session.lifetime() > ttl: 111 | # logging.debug(session\'s lifetime has expired, so the session is recreated (session_id={session_id})') 112 | logging.info(f'Session\'s lifetime has expired, so the session is being recreated (session_id={session_id})') 113 | session, fresh = self.create(req=req, session_id=session_id, force_new=True) 114 | 115 | return session, fresh 116 | 117 | def session_ids(self) -> list[str]: 118 | return list(self.sessions.keys()) 119 | -------------------------------------------------------------------------------- /src/tests.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import threading 4 | import time 5 | 6 | import requests 7 | 8 | def _post_json(json_data): 9 | url = "http://localhost:8192/v1" 10 | 11 | payload = json.dumps(json_data) 12 | headers = { 13 | 'Content-Type': 'application/json' 14 | } 15 | return requests.request("POST", url, headers=headers, data=payload) 16 | 17 | 18 | def test_session_create(): 19 | session_count = 3 20 | for i in range(session_count): 21 | cmd = { 22 | "cmd": "sessions.create", 23 | "session": "1", 24 | "url": "http://www.soleretriever.com", 25 | "maxTimeout": 60000, 26 | "headless": True 27 | } 28 | threading.Thread(target=_post_json, args=(cmd,)).start() 29 | 30 | print(">> Created", session_count, "sessions.") 31 | 32 | def test_browser_request(): 33 | cmd = { 34 | "cmd": "request.get", 35 | "url": "http://www.google.com", 36 | "session": "1", 37 | "maxTimeout": 60000, 38 | "headless": True, 39 | "returnOnlyCookies": True 40 | } 41 | response = _post_json(cmd) 42 | print(response.text) 43 | 44 | def test_ttl(): 45 | time_now = datetime.datetime.now() 46 | cmd = { 47 | "cmd": "request.get", 48 | "session": "1", 49 | "url":"http://www.google.com", 50 | "session_ttl_minutes": 1, 51 | "maxTimeout": 60000, 52 | "headless": True, 53 | "returnOnlyCookies": True 54 | } 55 | while True: 56 | threading.Thread(target=_post_json, args=(cmd,)).start() 57 | time.sleep(10) 58 | print(">>", (datetime.datetime.now() - time_now).seconds, "seconds elapsed") 59 | if (datetime.datetime.now() - time_now).seconds > 80: 60 | break 61 | 62 | if __name__ == "__main__": 63 | test_session_create() 64 | test_browser_request() 65 | test_ttl() -------------------------------------------------------------------------------- /src/tests_sites.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from webtest import TestApp 4 | 5 | from dtos import V1ResponseBase, STATUS_OK 6 | import flaresolverr 7 | import utils 8 | 9 | 10 | def _find_obj_by_key(key: str, value: str, _list: list) -> dict | None: 11 | for obj in _list: 12 | if obj[key] == value: 13 | return obj 14 | return None 15 | 16 | 17 | def asset_cloudflare_solution(self, res, site_url, site_text): 18 | self.assertEqual(res.status_code, 200) 19 | 20 | body = V1ResponseBase(res.json) 21 | self.assertEqual(STATUS_OK, body.status) 22 | self.assertEqual("Challenge solved!", body.message) 23 | self.assertGreater(body.startTimestamp, 10000) 24 | self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) 25 | self.assertEqual(utils.get_flaresolverr_version(), body.version) 26 | 27 | solution = body.solution 28 | self.assertIn(site_url, solution.url) 29 | self.assertEqual(solution.status, 200) 30 | self.assertIs(len(solution.headers), 0) 31 | self.assertIn(site_text, solution.response) 32 | self.assertGreater(len(solution.cookies), 0) 33 | self.assertIn("Chrome/", solution.userAgent) 34 | 35 | cf_cookie = _find_obj_by_key("name", "cf_clearance", solution.cookies) 36 | self.assertIsNotNone(cf_cookie, "Cloudflare cookie not found") 37 | self.assertGreater(len(cf_cookie["value"]), 30) 38 | 39 | 40 | class TestFlareSolverr(unittest.TestCase): 41 | app = TestApp(flaresolverr.app) 42 | 43 | def test_v1_endpoint_request_get_cloudflare(self): 44 | sites_get = [ 45 | ('nowsecure', 'https://nowsecure.nl', 'nowSecure'), 46 | ('0magnet', 'https://0magnet.com/search?q=2022', 'Torrent Search - ØMagnet'), 47 | ('1337x', 'https://1337x.unblockit.cat/cat/Movies/time/desc/1/', ''), 48 | ('avistaz', 'https://avistaz.to/api/v1/jackett/torrents?in=1&type=0&search=', 49 | 'Access denied'), 50 | ('badasstorrents', 'https://badasstorrents.com/torrents/search/720p/date/desc', 51 | 'Latest Torrents - BadassTorrents'), 52 | ('bt4g', 'https://bt4g.org/search/2022', 'Download 2022 Torrents - BT4G'), 53 | ('cinemaz', 'https://cinemaz.to/api/v1/jackett/torrents?in=1&type=0&search=', 54 | 'Access denied'), 55 | ('epublibre', 'https://epublibre.unblockit.cat/catalogo/index/0/nuevo/todos/sin/todos/--/ajax', 56 | 'epublibre - catálogo'), 57 | ('ext', 'https://ext.to/latest/?order=age&sort=desc', 58 | 'Download Latest Torrents - EXT Torrents'), 59 | ('extratorrent', 'https://extratorrent.st/search/?srt=added&order=desc&search=720p&new=1&x=0&y=0', 60 | 'Page 1 - ExtraTorrent'), 61 | ('idope', 'https://idope.se/browse.html', 'Recent Torrents'), 62 | ('limetorrents', 'https://limetorrents.unblockninja.com/latest100', 63 | 'Latest 100 torrents - LimeTorrents'), 64 | ('privatehd', 'https://privatehd.to/api/v1/jackett/torrents?in=1&type=0&search=', 65 | 'Access denied'), 66 | ('torrentcore', 'https://torrentcore.xyz/index', 'Torrent[CORE] - Torrent community.'), 67 | ('torrentqq223', 'https://torrentqq223.com/torrent/newest.html', 'https://torrentqq223.com/ads/'), 68 | ('36dm', 'https://www.36dm.club/1.html', 'https://www.36dm.club/yesterday-1.html'), 69 | ('erai-raws', 'https://www.erai-raws.info/feed/?type=magnet', '403 Forbidden'), 70 | ('teamos', 'https://www.teamos.xyz/torrents/?filename=&freeleech=', 71 | 'Log in | Team OS : Your Only Destination To Custom OS !!'), 72 | ('yts', 'https://yts.unblockninja.com/api/v2/list_movies.json?query_term=&limit=50&sort=date_added', 73 | '{"movie_count":') 74 | ] 75 | for site_name, site_url, site_text in sites_get: 76 | with self.subTest(msg=site_name): 77 | res = self.app.post_json('/v1', { 78 | "cmd": "request.get", 79 | "url": site_url 80 | }) 81 | asset_cloudflare_solution(self, res, site_url, site_text) 82 | 83 | def test_v1_endpoint_request_post_cloudflare(self): 84 | sites_post = [ 85 | ('nnmclub', 'https://nnmclub.to/forum/tracker.php', 'Трекер :: NNM-Club', 86 | 'prev_sd=0&prev_a=0&prev_my=0&prev_n=0&prev_shc=0&prev_shf=1&prev_sha=1&prev_shs=0&prev_shr=0&prev_sht=0&f%5B%5D=-1&o=1&s=2&tm=-1&shf=1&sha=1&ta=-1&sns=-1&sds=-1&nm=&pn=&submit=%CF%EE%E8%F1%EA') 87 | ] 88 | 89 | for site_name, site_url, site_text, post_data in sites_post: 90 | with self.subTest(msg=site_name): 91 | res = self.app.post_json('/v1', { 92 | "cmd": "request.post", 93 | "url": site_url, 94 | "postData": post_data 95 | }) 96 | asset_cloudflare_solution(self, res, site_url, site_text) 97 | 98 | 99 | if __name__ == '__main__': 100 | unittest.main() 101 | -------------------------------------------------------------------------------- /src/undetected_chromedriver/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | 5 | 888 888 d8b 6 | 888 888 Y8P 7 | 888 888 8 | .d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888 9 | d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P" 10 | 888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888 11 | Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888 12 | "Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888 13 | 14 | by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) 15 | 16 | """ 17 | from __future__ import annotations 18 | 19 | __version__ = "3.5.3" 20 | 21 | import json 22 | import logging 23 | import os 24 | import pathlib 25 | import re 26 | import shutil 27 | import subprocess 28 | import sys 29 | import tempfile 30 | import time 31 | from weakref import finalize 32 | 33 | import selenium.webdriver.chrome.service 34 | import selenium.webdriver.chrome.webdriver 35 | from selenium.webdriver.common.by import By 36 | import selenium.webdriver.chromium.service 37 | import selenium.webdriver.remote.command 38 | import selenium.webdriver.remote.webdriver 39 | 40 | from .cdp import CDP 41 | from .dprocess import start_detached 42 | from .options import ChromeOptions 43 | from .patcher import IS_POSIX 44 | from .patcher import Patcher 45 | from .reactor import Reactor 46 | from .webelement import UCWebElement 47 | from .webelement import WebElement 48 | 49 | __all__ = ( 50 | "Chrome", 51 | "ChromeOptions", 52 | "Patcher", 53 | "Reactor", 54 | "CDP", 55 | "find_chrome_executable", 56 | ) 57 | 58 | logger = logging.getLogger("uc") 59 | logger.setLevel(logging.getLogger().getEffectiveLevel()) 60 | 61 | 62 | class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): 63 | """ 64 | 65 | Controls the ChromeDriver and allows you to drive the browser. 66 | 67 | The webdriver file will be downloaded by this module automatically, 68 | you do not need to specify this. however, you may if you wish. 69 | 70 | Attributes 71 | ---------- 72 | 73 | Methods 74 | ------- 75 | 76 | reconnect() 77 | 78 | this can be useful in case of heavy detection methods 79 | -stops the chromedriver service which runs in the background 80 | -starts the chromedriver service which runs in the background 81 | -recreate session 82 | 83 | 84 | start_session(capabilities=None, browser_profile=None) 85 | 86 | differentiates from the regular method in that it does not 87 | require a capabilities argument. The capabilities are automatically 88 | recreated from the options at creation time. 89 | 90 | -------------------------------------------------------------------------- 91 | NOTE: 92 | Chrome has everything included to work out of the box. 93 | it does not `need` customizations. 94 | any customizations MAY lead to trigger bot migitation systems. 95 | 96 | -------------------------------------------------------------------------- 97 | """ 98 | 99 | _instances = set() 100 | session_id = None 101 | debug = False 102 | 103 | def __init__( 104 | self, 105 | options=None, 106 | user_data_dir=None, 107 | driver_executable_path=None, 108 | browser_executable_path=None, 109 | port=0, 110 | enable_cdp_events=False, 111 | # service_args=None, 112 | # service_creationflags=None, 113 | desired_capabilities=None, 114 | advanced_elements=False, 115 | # service_log_path=None, 116 | keep_alive=True, 117 | log_level=0, 118 | headless=False, 119 | version_main=None, 120 | patcher_force_close=False, 121 | suppress_welcome=True, 122 | use_subprocess=True, 123 | debug=False, 124 | no_sandbox=True, 125 | user_multi_procs: bool = False, 126 | **kw, 127 | ): 128 | """ 129 | Creates a new instance of the chrome driver. 130 | 131 | Starts the service and then creates new instance of chrome driver. 132 | 133 | Parameters 134 | ---------- 135 | 136 | options: ChromeOptions, optional, default: None - automatic useful defaults 137 | this takes an instance of ChromeOptions, mainly to customize browser behavior. 138 | anything other dan the default, for example extensions or startup options 139 | are not supported in case of failure, and can probably lowers your undetectability. 140 | 141 | 142 | user_data_dir: str , optional, default: None (creates temp profile) 143 | if user_data_dir is a path to a valid chrome profile directory, use it, 144 | and turn off automatic removal mechanism at exit. 145 | 146 | driver_executable_path: str, optional, default: None(=downloads and patches new binary) 147 | 148 | browser_executable_path: str, optional, default: None - use find_chrome_executable 149 | Path to the browser executable. 150 | If not specified, make sure the executable's folder is in $PATH 151 | 152 | port: int, optional, default: 0 153 | port to be used by the chromedriver executable, this is NOT the debugger port. 154 | leave it at 0 unless you know what you are doing. 155 | the default value of 0 automatically picks an available port. 156 | 157 | enable_cdp_events: bool, default: False 158 | :: currently for chrome only 159 | this enables the handling of wire messages 160 | when enabled, you can subscribe to CDP events by using: 161 | 162 | driver.add_cdp_listener("Network.dataReceived", yourcallback) 163 | # yourcallback is an callable which accepts exactly 1 dict as parameter 164 | 165 | 166 | service_args: list of str, optional, default: None 167 | arguments to pass to the driver service 168 | 169 | desired_capabilities: dict, optional, default: None - auto from config 170 | Dictionary object with non-browser specific capabilities only, such as "item" or "loggingPref". 171 | 172 | advanced_elements: bool, optional, default: False 173 | makes it easier to recognize elements like you know them from html/browser inspection, especially when working 174 | in an interactive environment 175 | 176 | default webelement repr: 177 | 178 | 179 | advanced webelement repr 180 | )> 181 | 182 | note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and print them, it does take a little more time. 183 | 184 | 185 | service_log_path: str, optional, default: None 186 | path to log information from the driver. 187 | 188 | keep_alive: bool, optional, default: True 189 | Whether to configure ChromeRemoteConnection to use HTTP keep-alive. 190 | 191 | log_level: int, optional, default: adapts to python global log level 192 | 193 | headless: bool, optional, default: False 194 | can also be specified in the options instance. 195 | Specify whether you want to use the browser in headless mode. 196 | warning: this lowers undetectability and not fully supported. 197 | 198 | version_main: int, optional, default: None (=auto) 199 | if you, for god knows whatever reason, use 200 | an older version of Chrome. You can specify it's full rounded version number 201 | here. Example: 87 for all versions of 87 202 | 203 | patcher_force_close: bool, optional, default: False 204 | instructs the patcher to do whatever it can to access the chromedriver binary 205 | if the file is locked, it will force shutdown all instances. 206 | setting it is not recommended, unless you know the implications and think 207 | you might need it. 208 | 209 | suppress_welcome: bool, optional , default: True 210 | a "welcome" alert might show up on *nix-like systems asking whether you want to set 211 | chrome as your default browser, and if you want to send even more data to google. 212 | now, in case you are nag-fetishist, or a diagnostics data feeder to google, you can set this to False. 213 | Note: if you don't handle the nag screen in time, the browser loses it's connection and throws an Exception. 214 | 215 | use_subprocess: bool, optional , default: True, 216 | 217 | False (the default) makes sure Chrome will get it's own process (so no subprocess of chromedriver.exe or python 218 | This fixes a LOT of issues, like multithreaded run, but mst importantly. shutting corectly after 219 | program exits or using .quit() 220 | you should be knowing what you're doing, and know how python works. 221 | 222 | unfortunately, there is always an edge case in which one would like to write an single script with the only contents being: 223 | --start script-- 224 | import undetected_chromedriver as uc 225 | d = uc.Chrome() 226 | d.get('https://somesite/') 227 | ---end script -- 228 | 229 | and will be greeted with an error, since the program exists before chrome has a change to launch. 230 | in that case you can set this to `True`. The browser will start via subprocess, and will keep running most of times. 231 | ! setting it to True comes with NO support when being detected. ! 232 | 233 | no_sandbox: bool, optional, default=True 234 | uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar 235 | this option has a default of True since many people seem to run this as root (....) , and chrome does not start 236 | when running as root without using --no-sandbox flag. 237 | 238 | user_multi_procs: 239 | set to true when you are using multithreads/multiprocessing 240 | ensures not all processes are trying to modify a binary which is in use by another. 241 | for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER. 242 | this requirement can be easily satisfied, by just running this program "normal" and close/kill it. 243 | 244 | 245 | """ 246 | 247 | finalize(self, self._ensure_close, self) 248 | self.debug = debug 249 | self.patcher = Patcher( 250 | executable_path=driver_executable_path, 251 | force=patcher_force_close, 252 | version_main=version_main, 253 | user_multi_procs=user_multi_procs, 254 | ) 255 | # self.patcher.auto(user_multiprocess = user_multi_num_procs) 256 | self.patcher.auto() 257 | 258 | # self.patcher = patcher 259 | if not options: 260 | options = ChromeOptions() 261 | 262 | try: 263 | if hasattr(options, "_session") and options._session is not None: 264 | # prevent reuse of options, 265 | # as it just appends arguments, not replace them 266 | # you'll get conflicts starting chrome 267 | raise RuntimeError("you cannot reuse the ChromeOptions object") 268 | except AttributeError: 269 | pass 270 | 271 | options._session = self 272 | 273 | if not options.debugger_address: 274 | debug_port = ( 275 | port 276 | if port != 0 277 | else selenium.webdriver.common.service.utils.free_port() 278 | ) 279 | debug_host = "127.0.0.1" 280 | options.debugger_address = "%s:%d" % (debug_host, debug_port) 281 | else: 282 | debug_host, debug_port = options.debugger_address.split(":") 283 | debug_port = int(debug_port) 284 | 285 | if enable_cdp_events: 286 | options.set_capability( 287 | "goog:loggingPrefs", {"performance": "ALL", "browser": "ALL"} 288 | ) 289 | 290 | options.add_argument("--remote-debugging-host=%s" % debug_host) 291 | options.add_argument("--remote-debugging-port=%s" % debug_port) 292 | 293 | if user_data_dir: 294 | options.add_argument("--user-data-dir=%s" % user_data_dir) 295 | 296 | language, keep_user_data_dir = None, bool(user_data_dir) 297 | 298 | # see if a custom user profile is specified in options 299 | for arg in options.arguments: 300 | 301 | if any([_ in arg for _ in ("--headless", "headless")]): 302 | options.arguments.remove(arg) 303 | options.headless = True 304 | 305 | if "lang" in arg: 306 | m = re.search("(?:--)?lang(?:[ =])?(.*)", arg) 307 | try: 308 | language = m[1] 309 | except IndexError: 310 | logger.debug("will set the language to en-US,en;q=0.9") 311 | language = "en-US,en;q=0.9" 312 | 313 | if "user-data-dir" in arg: 314 | m = re.search("(?:--)?user-data-dir(?:[ =])?(.*)", arg) 315 | try: 316 | user_data_dir = m[1] 317 | logger.debug( 318 | "user-data-dir found in user argument %s => %s" % (arg, m[1]) 319 | ) 320 | keep_user_data_dir = True 321 | 322 | except IndexError: 323 | logger.debug( 324 | "no user data dir could be extracted from supplied argument %s " 325 | % arg 326 | ) 327 | 328 | if not user_data_dir: 329 | # backward compatiblity 330 | # check if an old uc.ChromeOptions is used, and extract the user data dir 331 | 332 | if hasattr(options, "user_data_dir") and getattr( 333 | options, "user_data_dir", None 334 | ): 335 | import warnings 336 | 337 | warnings.warn( 338 | "using ChromeOptions.user_data_dir might stop working in future versions." 339 | "use uc.Chrome(user_data_dir='/xyz/some/data') in case you need existing profile folder" 340 | ) 341 | options.add_argument("--user-data-dir=%s" % options.user_data_dir) 342 | keep_user_data_dir = True 343 | logger.debug( 344 | "user_data_dir property found in options object: %s" % user_data_dir 345 | ) 346 | 347 | else: 348 | user_data_dir = os.path.normpath(tempfile.mkdtemp()) 349 | keep_user_data_dir = False 350 | arg = "--user-data-dir=%s" % user_data_dir 351 | options.add_argument(arg) 352 | logger.debug( 353 | "created a temporary folder in which the user-data (profile) will be stored during this\n" 354 | "session, and added it to chrome startup arguments: %s" % arg 355 | ) 356 | 357 | if not language: 358 | try: 359 | import locale 360 | 361 | language = locale.getdefaultlocale()[0].replace("_", "-") 362 | except Exception: 363 | pass 364 | if not language: 365 | language = "en-US" 366 | 367 | options.add_argument("--lang=%s" % language) 368 | 369 | if not options.binary_location: 370 | options.binary_location = ( 371 | browser_executable_path or find_chrome_executable() 372 | ) 373 | 374 | if not options.binary_location or not \ 375 | pathlib.Path(options.binary_location).exists(): 376 | raise FileNotFoundError( 377 | "\n---------------------\n" 378 | "Could not determine browser executable." 379 | "\n---------------------\n" 380 | "Make sure your browser is installed in the default location (path).\n" 381 | "If you are sure about the browser executable, you can specify it using\n" 382 | "the `browser_executable_path='{}` parameter.\n\n" 383 | .format("/path/to/browser/executable" if IS_POSIX else "c:/path/to/your/browser.exe") 384 | ) 385 | 386 | self._delay = 3 387 | 388 | self.user_data_dir = user_data_dir 389 | self.keep_user_data_dir = keep_user_data_dir 390 | 391 | if suppress_welcome: 392 | options.arguments.extend(["--no-default-browser-check", "--no-first-run"]) 393 | if no_sandbox: 394 | options.arguments.extend(["--no-sandbox", "--test-type"]) 395 | 396 | if headless or options.headless: 397 | # workaround until a better checking is found 398 | try: 399 | if self.patcher.version_main < 108: 400 | options.add_argument("--headless=chrome") 401 | elif self.patcher.version_main >= 108: 402 | options.add_argument("--headless=new") 403 | except: 404 | logger.warning("could not detect version_main." 405 | "therefore, we are assuming it is chrome 108 or higher") 406 | options.add_argument("--headless=new") 407 | 408 | options.add_argument("--window-size=1920,1080") 409 | options.add_argument("--start-maximized") 410 | options.add_argument("--no-sandbox") 411 | # fixes "could not connect to chrome" error when running 412 | # on linux using privileged user like root (which i don't recommend) 413 | 414 | options.add_argument( 415 | "--log-level=%d" % log_level 416 | or divmod(logging.getLogger().getEffectiveLevel(), 10)[0] 417 | ) 418 | 419 | if hasattr(options, "handle_prefs"): 420 | options.handle_prefs(user_data_dir) 421 | 422 | # fix exit_type flag to prevent tab-restore nag 423 | try: 424 | with open( 425 | os.path.join(user_data_dir, "Default/Preferences"), 426 | encoding="latin1", 427 | mode="r+", 428 | ) as fs: 429 | config = json.load(fs) 430 | if config["profile"]["exit_type"] is not None: 431 | # fixing the restore-tabs-nag 432 | config["profile"]["exit_type"] = None 433 | fs.seek(0, 0) 434 | json.dump(config, fs) 435 | fs.truncate() # the file might be shorter 436 | logger.debug("fixed exit_type flag") 437 | except Exception as e: 438 | logger.debug("did not find a bad exit_type flag ") 439 | 440 | self.options = options 441 | 442 | if not desired_capabilities: 443 | desired_capabilities = options.to_capabilities() 444 | 445 | if not use_subprocess: 446 | self.browser_pid = start_detached( 447 | options.binary_location, *options.arguments 448 | ) 449 | else: 450 | browser = subprocess.Popen( 451 | [options.binary_location, *options.arguments], 452 | stdin=subprocess.PIPE, 453 | stdout=subprocess.PIPE, 454 | stderr=subprocess.PIPE, 455 | close_fds=IS_POSIX, 456 | ) 457 | self.browser_pid = browser.pid 458 | 459 | service = selenium.webdriver.chromium.service.ChromiumService( 460 | self.patcher.executable_path, 461 | start_error_message="null" 462 | ) 463 | 464 | super(Chrome, self).__init__( 465 | service=service, 466 | options=options, 467 | keep_alive=keep_alive, 468 | ) 469 | 470 | self.reactor = None 471 | 472 | if enable_cdp_events: 473 | if logging.getLogger().getEffectiveLevel() == logging.DEBUG: 474 | logging.getLogger( 475 | "selenium.webdriver.remote.remote_connection" 476 | ).setLevel(20) 477 | reactor = Reactor(self) 478 | reactor.start() 479 | self.reactor = reactor 480 | 481 | if advanced_elements: 482 | self._web_element_cls = UCWebElement 483 | else: 484 | self._web_element_cls = WebElement 485 | 486 | if options.headless: 487 | self._configure_headless() 488 | 489 | def _configure_headless(self): 490 | orig_get = self.get 491 | logger.info("setting properties for headless") 492 | 493 | def get_wrapped(*args, **kwargs): 494 | if self.execute_script("return navigator.webdriver"): 495 | logger.info("patch navigator.webdriver") 496 | self.execute_cdp_cmd( 497 | "Page.addScriptToEvaluateOnNewDocument", 498 | { 499 | "source": """ 500 | 501 | Object.defineProperty(window, "navigator", { 502 | Object.defineProperty(window, "navigator", { 503 | value: new Proxy(navigator, { 504 | has: (target, key) => (key === "webdriver" ? false : key in target), 505 | get: (target, key) => 506 | key === "webdriver" 507 | ? false 508 | : typeof target[key] === "function" 509 | ? target[key].bind(target) 510 | : target[key], 511 | }), 512 | }); 513 | """ 514 | }, 515 | ) 516 | 517 | logger.info("patch user-agent string") 518 | self.execute_cdp_cmd( 519 | "Network.setUserAgentOverride", 520 | { 521 | "userAgent": self.execute_script( 522 | "return navigator.userAgent" 523 | ).replace("Headless", "") 524 | }, 525 | ) 526 | self.execute_cdp_cmd( 527 | "Page.addScriptToEvaluateOnNewDocument", 528 | { 529 | "source": """ 530 | Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 1}); 531 | Object.defineProperty(navigator.connection, 'rtt', {get: () => 100}); 532 | 533 | // https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/chrome-runtime.js 534 | window.chrome = { 535 | app: { 536 | isInstalled: false, 537 | InstallState: { 538 | DISABLED: 'disabled', 539 | INSTALLED: 'installed', 540 | NOT_INSTALLED: 'not_installed' 541 | }, 542 | RunningState: { 543 | CANNOT_RUN: 'cannot_run', 544 | READY_TO_RUN: 'ready_to_run', 545 | RUNNING: 'running' 546 | } 547 | }, 548 | runtime: { 549 | OnInstalledReason: { 550 | CHROME_UPDATE: 'chrome_update', 551 | INSTALL: 'install', 552 | SHARED_MODULE_UPDATE: 'shared_module_update', 553 | UPDATE: 'update' 554 | }, 555 | OnRestartRequiredReason: { 556 | APP_UPDATE: 'app_update', 557 | OS_UPDATE: 'os_update', 558 | PERIODIC: 'periodic' 559 | }, 560 | PlatformArch: { 561 | ARM: 'arm', 562 | ARM64: 'arm64', 563 | MIPS: 'mips', 564 | MIPS64: 'mips64', 565 | X86_32: 'x86-32', 566 | X86_64: 'x86-64' 567 | }, 568 | PlatformNaclArch: { 569 | ARM: 'arm', 570 | MIPS: 'mips', 571 | MIPS64: 'mips64', 572 | X86_32: 'x86-32', 573 | X86_64: 'x86-64' 574 | }, 575 | PlatformOs: { 576 | ANDROID: 'android', 577 | CROS: 'cros', 578 | LINUX: 'linux', 579 | MAC: 'mac', 580 | OPENBSD: 'openbsd', 581 | WIN: 'win' 582 | }, 583 | RequestUpdateCheckStatus: { 584 | NO_UPDATE: 'no_update', 585 | THROTTLED: 'throttled', 586 | UPDATE_AVAILABLE: 'update_available' 587 | } 588 | } 589 | } 590 | 591 | // https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/navigator-permissions.js 592 | if (!window.Notification) { 593 | window.Notification = { 594 | permission: 'denied' 595 | } 596 | } 597 | 598 | const originalQuery = window.navigator.permissions.query 599 | window.navigator.permissions.__proto__.query = parameters => 600 | parameters.name === 'notifications' 601 | ? Promise.resolve({ state: window.Notification.permission }) 602 | : originalQuery(parameters) 603 | 604 | const oldCall = Function.prototype.call 605 | function call() { 606 | return oldCall.apply(this, arguments) 607 | } 608 | Function.prototype.call = call 609 | 610 | const nativeToStringFunctionString = Error.toString().replace(/Error/g, 'toString') 611 | const oldToString = Function.prototype.toString 612 | 613 | function functionToString() { 614 | if (this === window.navigator.permissions.query) { 615 | return 'function query() { [native code] }' 616 | } 617 | if (this === functionToString) { 618 | return nativeToStringFunctionString 619 | } 620 | return oldCall.call(oldToString, this) 621 | } 622 | // eslint-disable-next-line 623 | Function.prototype.toString = functionToString 624 | """ 625 | }, 626 | ) 627 | return orig_get(*args, **kwargs) 628 | 629 | self.get = get_wrapped 630 | 631 | # def _get_cdc_props(self): 632 | # return self.execute_script( 633 | # """ 634 | # let objectToInspect = window, 635 | # result = []; 636 | # while(objectToInspect !== null) 637 | # { result = result.concat(Object.getOwnPropertyNames(objectToInspect)); 638 | # objectToInspect = Object.getPrototypeOf(objectToInspect); } 639 | # 640 | # return result.filter(i => i.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig)) 641 | # """ 642 | # ) 643 | # 644 | # def _hook_remove_cdc_props(self): 645 | # self.execute_cdp_cmd( 646 | # "Page.addScriptToEvaluateOnNewDocument", 647 | # { 648 | # "source": """ 649 | # let objectToInspect = window, 650 | # result = []; 651 | # while(objectToInspect !== null) 652 | # { result = result.concat(Object.getOwnPropertyNames(objectToInspect)); 653 | # objectToInspect = Object.getPrototypeOf(objectToInspect); } 654 | # result.forEach(p => p.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig) 655 | # &&delete window[p]&&console.log('removed',p)) 656 | # """ 657 | # }, 658 | # ) 659 | 660 | def get(self, url): 661 | # if self._get_cdc_props(): 662 | # self._hook_remove_cdc_props() 663 | return super().get(url) 664 | 665 | def add_cdp_listener(self, event_name, callback): 666 | if ( 667 | self.reactor 668 | and self.reactor is not None 669 | and isinstance(self.reactor, Reactor) 670 | ): 671 | self.reactor.add_event_handler(event_name, callback) 672 | return self.reactor.handlers 673 | return False 674 | 675 | def clear_cdp_listeners(self): 676 | if self.reactor and isinstance(self.reactor, Reactor): 677 | self.reactor.handlers.clear() 678 | 679 | def window_new(self): 680 | self.execute( 681 | selenium.webdriver.remote.command.Command.NEW_WINDOW, {"type": "window"} 682 | ) 683 | 684 | def tab_new(self, url: str): 685 | """ 686 | this opens a url in a new tab. 687 | apparently, that passes all tests directly! 688 | 689 | Parameters 690 | ---------- 691 | url 692 | 693 | Returns 694 | ------- 695 | 696 | """ 697 | if not hasattr(self, "cdp"): 698 | from .cdp import CDP 699 | 700 | cdp = CDP(self.options) 701 | cdp.tab_new(url) 702 | 703 | def reconnect(self, timeout=0.1): 704 | try: 705 | self.service.stop() 706 | except Exception as e: 707 | logger.debug(e) 708 | time.sleep(timeout) 709 | try: 710 | self.service.start() 711 | except Exception as e: 712 | logger.debug(e) 713 | 714 | try: 715 | self.start_session() 716 | except Exception as e: 717 | logger.debug(e) 718 | 719 | def start_session(self, capabilities=None, browser_profile=None): 720 | if not capabilities: 721 | capabilities = self.options.to_capabilities() 722 | super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session( 723 | capabilities 724 | ) 725 | # super(Chrome, self).start_session(capabilities, browser_profile) 726 | 727 | def find_elements_recursive(self, by, value): 728 | """ 729 | find elements in all frames 730 | this is a generator function, which is needed 731 | since if it would return a list of elements, they 732 | will be stale on arrival. 733 | using generator, when the element is returned we are in the correct frame 734 | to use it directly 735 | Args: 736 | by: By 737 | value: str 738 | Returns: Generator[webelement.WebElement] 739 | """ 740 | 741 | def search_frame(f=None): 742 | if not f: 743 | # ensure we are on main content frame 744 | self.switch_to.default_content() 745 | else: 746 | self.switch_to.frame(f) 747 | for elem in self.find_elements(by, value): 748 | yield elem 749 | # switch back to main content, otherwise we will get StaleElementReferenceException 750 | self.switch_to.default_content() 751 | 752 | # search root frame 753 | for elem in search_frame(): 754 | yield elem 755 | # get iframes 756 | frames = self.find_elements('css selector', 'iframe') 757 | 758 | # search per frame 759 | for f in frames: 760 | for elem in search_frame(f): 761 | yield elem 762 | 763 | def quit(self): 764 | try: 765 | self.service.process.kill() 766 | logger.debug("webdriver process ended") 767 | except (AttributeError, RuntimeError, OSError): 768 | pass 769 | try: 770 | self.reactor.event.set() 771 | logger.debug("shutting down reactor") 772 | except AttributeError: 773 | pass 774 | try: 775 | os.kill(self.browser_pid, 15) 776 | logger.debug("gracefully closed browser") 777 | except Exception as e: # noqa 778 | pass 779 | if ( 780 | hasattr(self, "keep_user_data_dir") 781 | and hasattr(self, "user_data_dir") 782 | and not self.keep_user_data_dir 783 | ): 784 | for _ in range(5): 785 | try: 786 | shutil.rmtree(self.user_data_dir, ignore_errors=False) 787 | except FileNotFoundError: 788 | pass 789 | except (RuntimeError, OSError, PermissionError) as e: 790 | logger.debug( 791 | "When removing the temp profile, a %s occured: %s\nretrying..." 792 | % (e.__class__.__name__, e) 793 | ) 794 | else: 795 | logger.debug("successfully removed %s" % self.user_data_dir) 796 | break 797 | time.sleep(0.1) 798 | 799 | # dereference patcher, so patcher can start cleaning up as well. 800 | # this must come last, otherwise it will throw 'in use' errors 801 | self.patcher = None 802 | 803 | def __getattribute__(self, item): 804 | if not super().__getattribute__("debug"): 805 | return super().__getattribute__(item) 806 | else: 807 | import inspect 808 | 809 | original = super().__getattribute__(item) 810 | if inspect.ismethod(original) and not inspect.isclass(original): 811 | def newfunc(*args, **kwargs): 812 | logger.debug( 813 | "calling %s with args %s and kwargs %s\n" 814 | % (original.__qualname__, args, kwargs) 815 | ) 816 | return original(*args, **kwargs) 817 | 818 | return newfunc 819 | return original 820 | 821 | def __enter__(self): 822 | return self 823 | 824 | def __exit__(self, exc_type, exc_val, exc_tb): 825 | self.service.stop() 826 | time.sleep(self._delay) 827 | self.service.start() 828 | self.start_session() 829 | 830 | def __hash__(self): 831 | return hash(self.options.debugger_address) 832 | 833 | def __dir__(self): 834 | return object.__dir__(self) 835 | 836 | def __del__(self): 837 | try: 838 | self.service.process.kill() 839 | except: # noqa 840 | pass 841 | self.quit() 842 | 843 | @classmethod 844 | def _ensure_close(cls, self): 845 | # needs to be a classmethod so finalize can find the reference 846 | logger.info("ensuring close") 847 | if ( 848 | hasattr(self, "service") 849 | and hasattr(self.service, "process") 850 | and hasattr(self.service.process, "kill") 851 | ): 852 | self.service.process.kill() 853 | 854 | 855 | def find_chrome_executable(): 856 | """ 857 | Finds the chrome, chrome beta, chrome canary, chromium executable 858 | 859 | Returns 860 | ------- 861 | executable_path : str 862 | the full file path to found executable 863 | 864 | """ 865 | candidates = set() 866 | if IS_POSIX: 867 | for item in os.environ.get("PATH").split(os.pathsep): 868 | for subitem in ( 869 | "google-chrome", 870 | "chromium", 871 | "chromium-browser", 872 | "chrome", 873 | "google-chrome-stable", 874 | ): 875 | candidates.add(os.sep.join((item, subitem))) 876 | if "darwin" in sys.platform: 877 | candidates.update( 878 | [ 879 | "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", 880 | "/Applications/Chromium.app/Contents/MacOS/Chromium", 881 | ] 882 | ) 883 | else: 884 | for item in map( 885 | os.environ.get, 886 | ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA", "PROGRAMW6432"), 887 | ): 888 | if item is not None: 889 | for subitem in ( 890 | "Google/Chrome/Application", 891 | ): 892 | candidates.add(os.sep.join((item, subitem, "chrome.exe"))) 893 | for candidate in candidates: 894 | logger.debug('checking if %s exists and is executable' % candidate) 895 | if os.path.exists(candidate) and os.access(candidate, os.X_OK): 896 | logger.debug('found! using %s' % candidate) 897 | return os.path.normpath(candidate) 898 | -------------------------------------------------------------------------------- /src/undetected_chromedriver/cdp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # this module is part of undetected_chromedriver 3 | 4 | import json 5 | import logging 6 | 7 | import requests 8 | import websockets 9 | 10 | 11 | log = logging.getLogger(__name__) 12 | 13 | 14 | class CDPObject(dict): 15 | def __init__(self, *a, **k): 16 | super().__init__(*a, **k) 17 | self.__dict__ = self 18 | for k in self.__dict__: 19 | if isinstance(self.__dict__[k], dict): 20 | self.__dict__[k] = CDPObject(self.__dict__[k]) 21 | elif isinstance(self.__dict__[k], list): 22 | for i in range(len(self.__dict__[k])): 23 | if isinstance(self.__dict__[k][i], dict): 24 | self.__dict__[k][i] = CDPObject(self) 25 | 26 | def __repr__(self): 27 | tpl = f"{self.__class__.__name__}(\n\t{{}}\n\t)" 28 | return tpl.format("\n ".join(f"{k} = {v}" for k, v in self.items())) 29 | 30 | 31 | class PageElement(CDPObject): 32 | pass 33 | 34 | 35 | class CDP: 36 | log = logging.getLogger("CDP") 37 | 38 | endpoints = CDPObject( 39 | { 40 | "json": "/json", 41 | "protocol": "/json/protocol", 42 | "list": "/json/list", 43 | "new": "/json/new?{url}", 44 | "activate": "/json/activate/{id}", 45 | "close": "/json/close/{id}", 46 | } 47 | ) 48 | 49 | def __init__(self, options: "ChromeOptions"): # noqa 50 | self.server_addr = "http://{0}:{1}".format(*options.debugger_address.split(":")) 51 | 52 | self._reqid = 0 53 | self._session = requests.Session() 54 | self._last_resp = None 55 | self._last_json = None 56 | 57 | resp = self.get(self.endpoints.json) # noqa 58 | self.sessionId = resp[0]["id"] 59 | self.wsurl = resp[0]["webSocketDebuggerUrl"] 60 | 61 | def tab_activate(self, id=None): 62 | if not id: 63 | active_tab = self.tab_list()[0] 64 | id = active_tab.id # noqa 65 | self.wsurl = active_tab.webSocketDebuggerUrl # noqa 66 | return self.post(self.endpoints["activate"].format(id=id)) 67 | 68 | def tab_list(self): 69 | retval = self.get(self.endpoints["list"]) 70 | return [PageElement(o) for o in retval] 71 | 72 | def tab_new(self, url): 73 | return self.post(self.endpoints["new"].format(url=url)) 74 | 75 | def tab_close_last_opened(self): 76 | sessions = self.tab_list() 77 | opentabs = [s for s in sessions if s["type"] == "page"] 78 | return self.post(self.endpoints["close"].format(id=opentabs[-1]["id"])) 79 | 80 | async def send(self, method: str, params: dict): 81 | self._reqid += 1 82 | async with websockets.connect(self.wsurl) as ws: 83 | await ws.send( 84 | json.dumps({"method": method, "params": params, "id": self._reqid}) 85 | ) 86 | self._last_resp = await ws.recv() 87 | self._last_json = json.loads(self._last_resp) 88 | self.log.info(self._last_json) 89 | 90 | def get(self, uri): 91 | resp = self._session.get(self.server_addr + uri) 92 | try: 93 | self._last_resp = resp 94 | self._last_json = resp.json() 95 | except Exception: 96 | return 97 | else: 98 | return self._last_json 99 | 100 | def post(self, uri, data: dict = None): 101 | if not data: 102 | data = {} 103 | resp = self._session.post(self.server_addr + uri, json=data) 104 | try: 105 | self._last_resp = resp 106 | self._last_json = resp.json() 107 | except Exception: 108 | return self._last_resp 109 | 110 | @property 111 | def last_json(self): 112 | return self._last_json 113 | -------------------------------------------------------------------------------- /src/undetected_chromedriver/devtool.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections.abc import Mapping 3 | from collections.abc import Sequence 4 | from functools import wraps 5 | import logging 6 | import threading 7 | import time 8 | import traceback 9 | from typing import Any 10 | from typing import Awaitable 11 | from typing import Callable 12 | from typing import List 13 | from typing import Optional 14 | 15 | 16 | class Structure(dict): 17 | """ 18 | This is a dict-like object structure, which you should subclass 19 | Only properties defined in the class context are used on initialization. 20 | 21 | See example 22 | """ 23 | 24 | _store = {} 25 | 26 | def __init__(self, *a, **kw): 27 | """ 28 | Instantiate a new instance. 29 | 30 | :param a: 31 | :param kw: 32 | """ 33 | 34 | super().__init__() 35 | 36 | # auxiliar dict 37 | d = dict(*a, **kw) 38 | for k, v in d.items(): 39 | if isinstance(v, Mapping): 40 | self[k] = self.__class__(v) 41 | elif isinstance(v, Sequence) and not isinstance(v, (str, bytes)): 42 | self[k] = [self.__class__(i) for i in v] 43 | else: 44 | self[k] = v 45 | super().__setattr__("__dict__", self) 46 | 47 | def __getattr__(self, item): 48 | return getattr(super(), item) 49 | 50 | def __getitem__(self, item): 51 | return super().__getitem__(item) 52 | 53 | def __setattr__(self, key, value): 54 | self.__setitem__(key, value) 55 | 56 | def __setitem__(self, key, value): 57 | super().__setitem__(key, value) 58 | 59 | def update(self, *a, **kw): 60 | super().update(*a, **kw) 61 | 62 | def __eq__(self, other): 63 | return frozenset(other.items()) == frozenset(self.items()) 64 | 65 | def __hash__(self): 66 | return hash(frozenset(self.items())) 67 | 68 | @classmethod 69 | def __init_subclass__(cls, **kwargs): 70 | cls._store = {} 71 | 72 | def _normalize_strings(self): 73 | for k, v in self.copy().items(): 74 | if isinstance(v, (str)): 75 | self[k] = v.strip() 76 | 77 | 78 | def timeout(seconds=3, on_timeout: Optional[Callable[[callable], Any]] = None): 79 | def wrapper(func): 80 | @wraps(func) 81 | def wrapped(*args, **kwargs): 82 | def function_reached_timeout(): 83 | if on_timeout: 84 | on_timeout(func) 85 | else: 86 | raise TimeoutError("function call timed out") 87 | 88 | t = threading.Timer(interval=seconds, function=function_reached_timeout) 89 | t.start() 90 | try: 91 | return func(*args, **kwargs) 92 | except: 93 | t.cancel() 94 | raise 95 | finally: 96 | t.cancel() 97 | 98 | return wrapped 99 | 100 | return wrapper 101 | 102 | 103 | def test(): 104 | import sys, os 105 | 106 | sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) 107 | import undetected_chromedriver as uc 108 | import threading 109 | 110 | def collector( 111 | driver: uc.Chrome, 112 | stop_event: threading.Event, 113 | on_event_coro: Optional[Callable[[List[str]], Awaitable[Any]]] = None, 114 | listen_events: Sequence = ("browser", "network", "performance"), 115 | ): 116 | def threaded(driver, stop_event, on_event_coro): 117 | async def _ensure_service_started(): 118 | while ( 119 | getattr(driver, "service", False) 120 | and getattr(driver.service, "process", False) 121 | and driver.service.process.poll() 122 | ): 123 | print("waiting for driver service to come back on") 124 | await asyncio.sleep(0.05) 125 | # await asyncio.sleep(driver._delay or .25) 126 | 127 | async def get_log_lines(typ): 128 | await _ensure_service_started() 129 | return driver.get_log(typ) 130 | 131 | async def looper(): 132 | while not stop_event.is_set(): 133 | log_lines = [] 134 | try: 135 | for _ in listen_events: 136 | try: 137 | log_lines += await get_log_lines(_) 138 | except: 139 | if logging.getLogger().getEffectiveLevel() <= 10: 140 | traceback.print_exc() 141 | continue 142 | if log_lines and on_event_coro: 143 | await on_event_coro(log_lines) 144 | except Exception as e: 145 | if logging.getLogger().getEffectiveLevel() <= 10: 146 | traceback.print_exc() 147 | 148 | loop = asyncio.new_event_loop() 149 | asyncio.set_event_loop(loop) 150 | loop.run_until_complete(looper()) 151 | 152 | t = threading.Thread(target=threaded, args=(driver, stop_event, on_event_coro)) 153 | t.start() 154 | 155 | async def on_event(data): 156 | print("on_event") 157 | print("data:", data) 158 | 159 | def func_called(fn): 160 | def wrapped(*args, **kwargs): 161 | print( 162 | "func called! %s (args: %s, kwargs: %s)" % (fn.__name__, args, kwargs) 163 | ) 164 | while driver.service.process and driver.service.process.poll() is not None: 165 | time.sleep(0.1) 166 | res = fn(*args, **kwargs) 167 | print("func completed! (result: %s)" % res) 168 | return res 169 | 170 | return wrapped 171 | 172 | logging.basicConfig(level=10) 173 | 174 | options = uc.ChromeOptions() 175 | options.set_capability( 176 | "goog:loggingPrefs", {"performance": "ALL", "browser": "ALL", "network": "ALL"} 177 | ) 178 | 179 | driver = uc.Chrome(version_main=96, options=options) 180 | 181 | # driver.command_executor._request = timeout(seconds=1)(driver.command_executor._request) 182 | driver.command_executor._request = func_called(driver.command_executor._request) 183 | collector_stop = threading.Event() 184 | collector(driver, collector_stop, on_event) 185 | 186 | driver.get("https://nowsecure.nl") 187 | 188 | time.sleep(10) 189 | 190 | driver.quit() 191 | -------------------------------------------------------------------------------- /src/undetected_chromedriver/dprocess.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import logging 3 | import multiprocessing 4 | import os 5 | import platform 6 | import signal 7 | from subprocess import PIPE 8 | from subprocess import Popen 9 | import sys 10 | 11 | 12 | CREATE_NEW_PROCESS_GROUP = 0x00000200 13 | DETACHED_PROCESS = 0x00000008 14 | 15 | REGISTERED = [] 16 | 17 | 18 | def start_detached(executable, *args): 19 | """ 20 | Starts a fully independent subprocess (with no parent) 21 | :param executable: executable 22 | :param args: arguments to the executable, eg: ['--param1_key=param1_val', '-vvv' ...] 23 | :return: pid of the grandchild process 24 | """ 25 | 26 | # create pipe 27 | reader, writer = multiprocessing.Pipe(False) 28 | 29 | # do not keep reference 30 | multiprocessing.Process( 31 | target=_start_detached, 32 | args=(executable, *args), 33 | kwargs={"writer": writer}, 34 | daemon=True, 35 | ).start() 36 | # receive pid from pipe 37 | pid = reader.recv() 38 | REGISTERED.append(pid) 39 | # close pipes 40 | writer.close() 41 | reader.close() 42 | 43 | return pid 44 | 45 | 46 | def _start_detached(executable, *args, writer: multiprocessing.Pipe = None): 47 | # configure launch 48 | kwargs = {} 49 | if platform.system() == "Windows": 50 | kwargs.update(creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP) 51 | elif sys.version_info < (3, 2): 52 | # assume posix 53 | kwargs.update(preexec_fn=os.setsid) 54 | else: # Python 3.2+ and Unix 55 | kwargs.update(start_new_session=True) 56 | 57 | # run 58 | p = Popen([executable, *args], stdin=PIPE, stdout=PIPE, stderr=PIPE, **kwargs) 59 | 60 | # send pid to pipe 61 | writer.send(p.pid) 62 | sys.exit() 63 | 64 | 65 | def _cleanup(): 66 | for pid in REGISTERED: 67 | try: 68 | logging.getLogger(__name__).debug("cleaning up pid %d " % pid) 69 | os.kill(pid, signal.SIGTERM) 70 | except: # noqa 71 | pass 72 | 73 | 74 | atexit.register(_cleanup) 75 | -------------------------------------------------------------------------------- /src/undetected_chromedriver/options.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # this module is part of undetected_chromedriver 3 | 4 | 5 | import json 6 | import os 7 | 8 | from selenium.webdriver.chromium.options import ChromiumOptions as _ChromiumOptions 9 | 10 | 11 | class ChromeOptions(_ChromiumOptions): 12 | _session = None 13 | _user_data_dir = None 14 | 15 | @property 16 | def user_data_dir(self): 17 | return self._user_data_dir 18 | 19 | @user_data_dir.setter 20 | def user_data_dir(self, path: str): 21 | """ 22 | Sets the browser profile folder to use, or creates a new profile 23 | at given . 24 | 25 | Parameters 26 | ---------- 27 | path: str 28 | the path to a chrome profile folder 29 | if it does not exist, a new profile will be created at given location 30 | """ 31 | apath = os.path.abspath(path) 32 | self._user_data_dir = os.path.normpath(apath) 33 | 34 | @staticmethod 35 | def _undot_key(key, value): 36 | """turn a (dotted key, value) into a proper nested dict""" 37 | if "." in key: 38 | key, rest = key.split(".", 1) 39 | value = ChromeOptions._undot_key(rest, value) 40 | return {key: value} 41 | 42 | @staticmethod 43 | def _merge_nested(a, b): 44 | """ 45 | merges b into a 46 | leaf values in a are overwritten with values from b 47 | """ 48 | for key in b: 49 | if key in a: 50 | if isinstance(a[key], dict) and isinstance(b[key], dict): 51 | ChromeOptions._merge_nested(a[key], b[key]) 52 | continue 53 | a[key] = b[key] 54 | return a 55 | 56 | def handle_prefs(self, user_data_dir): 57 | prefs = self.experimental_options.get("prefs") 58 | if prefs: 59 | user_data_dir = user_data_dir or self._user_data_dir 60 | default_path = os.path.join(user_data_dir, "Default") 61 | os.makedirs(default_path, exist_ok=True) 62 | 63 | # undot prefs dict keys 64 | undot_prefs = {} 65 | for key, value in prefs.items(): 66 | undot_prefs = self._merge_nested( 67 | undot_prefs, self._undot_key(key, value) 68 | ) 69 | 70 | prefs_file = os.path.join(default_path, "Preferences") 71 | if os.path.exists(prefs_file): 72 | with open(prefs_file, encoding="latin1", mode="r") as f: 73 | undot_prefs = self._merge_nested(json.load(f), undot_prefs) 74 | 75 | with open(prefs_file, encoding="latin1", mode="w") as f: 76 | json.dump(undot_prefs, f) 77 | 78 | # remove the experimental_options to avoid an error 79 | del self._experimental_options["prefs"] 80 | 81 | @classmethod 82 | def from_options(cls, options): 83 | o = cls() 84 | o.__dict__.update(options.__dict__) 85 | return o 86 | -------------------------------------------------------------------------------- /src/undetected_chromedriver/patcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # this module is part of undetected_chromedriver 3 | 4 | from distutils.version import LooseVersion 5 | import io 6 | import json 7 | import logging 8 | import os 9 | import pathlib 10 | import platform 11 | import random 12 | import re 13 | import shutil 14 | import string 15 | import sys 16 | import time 17 | from urllib.request import urlopen 18 | from urllib.request import urlretrieve 19 | import zipfile 20 | from multiprocessing import Lock 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2")) 25 | 26 | 27 | class Patcher(object): 28 | lock = Lock() 29 | exe_name = "chromedriver%s" 30 | 31 | platform = sys.platform 32 | if platform.endswith("win32"): 33 | d = "~/appdata/roaming/undetected_chromedriver" 34 | elif "LAMBDA_TASK_ROOT" in os.environ: 35 | d = "/tmp/undetected_chromedriver" 36 | elif platform.startswith(("linux", "linux2")): 37 | d = "~/.local/share/undetected_chromedriver" 38 | elif platform.endswith("darwin"): 39 | d = "~/Library/Application Support/undetected_chromedriver" 40 | else: 41 | d = "~/.undetected_chromedriver" 42 | data_path = os.path.abspath(os.path.expanduser(d)) 43 | 44 | def __init__( 45 | self, 46 | executable_path=None, 47 | force=False, 48 | version_main: int = 0, 49 | user_multi_procs=False, 50 | ): 51 | """ 52 | Args: 53 | executable_path: None = automatic 54 | a full file path to the chromedriver executable 55 | force: False 56 | terminate processes which are holding lock 57 | version_main: 0 = auto 58 | specify main chrome version (rounded, ex: 82) 59 | """ 60 | self.force = force 61 | self._custom_exe_path = False 62 | prefix = "undetected" 63 | self.user_multi_procs = user_multi_procs 64 | 65 | try: 66 | version_main = int(version_main) 67 | except (ValueError, TypeError): 68 | raise ValueError("version_main must be an integer") 69 | 70 | self.is_old_chromedriver = version_main and version_main <= 114 71 | # Needs to be called before self.exe_name is accessed 72 | self._set_platform_name() 73 | 74 | if not os.path.exists(self.data_path): 75 | os.makedirs(self.data_path, exist_ok=True) 76 | 77 | if not executable_path: 78 | self.executable_path = os.path.join( 79 | self.data_path, "_".join([prefix, self.exe_name]) 80 | ) 81 | 82 | if not IS_POSIX: 83 | if executable_path: 84 | if not executable_path[-4:] == ".exe": 85 | executable_path += ".exe" 86 | 87 | self.zip_path = os.path.join(self.data_path, prefix) 88 | 89 | if not executable_path: 90 | if not self.user_multi_procs: 91 | self.executable_path = os.path.abspath( 92 | os.path.join(".", self.executable_path) 93 | ) 94 | 95 | if executable_path: 96 | self._custom_exe_path = True 97 | self.executable_path = executable_path 98 | 99 | # Set the correct repository to download the Chromedriver from 100 | if self.is_old_chromedriver: 101 | self.url_repo = "https://chromedriver.storage.googleapis.com" 102 | else: 103 | self.url_repo = "https://googlechromelabs.github.io/chrome-for-testing" 104 | 105 | self.version_main = version_main 106 | self.version_full = None 107 | 108 | def _set_platform_name(self): 109 | """ 110 | Set the platform and exe name based on the platform undetected_chromedriver is running on 111 | in order to download the correct chromedriver. 112 | """ 113 | if self.platform.endswith("win32"): 114 | self.platform_name = "win32" 115 | self.exe_name %= ".exe" 116 | if self.platform.endswith(("linux", "linux2")): 117 | self.platform_name = "linux64" 118 | self.exe_name %= "" 119 | if self.platform.endswith("darwin"): 120 | if self.is_old_chromedriver: 121 | self.platform_name = "mac64" 122 | else: 123 | self.platform_name = "mac-x64" 124 | self.exe_name %= "" 125 | 126 | def auto(self, executable_path=None, force=False, version_main=None, _=None): 127 | """ 128 | 129 | Args: 130 | executable_path: 131 | force: 132 | version_main: 133 | 134 | Returns: 135 | 136 | """ 137 | p = pathlib.Path(self.data_path) 138 | if self.user_multi_procs: 139 | with Lock(): 140 | files = list(p.rglob("*chromedriver*")) 141 | most_recent = max(files, key=lambda f: f.stat().st_mtime) 142 | files.remove(most_recent) 143 | list(map(lambda f: f.unlink(), files)) 144 | if self.is_binary_patched(most_recent): 145 | self.executable_path = str(most_recent) 146 | return True 147 | 148 | if executable_path: 149 | self.executable_path = executable_path 150 | self._custom_exe_path = True 151 | 152 | if self._custom_exe_path: 153 | ispatched = self.is_binary_patched(self.executable_path) 154 | if not ispatched: 155 | return self.patch_exe() 156 | else: 157 | return 158 | 159 | if version_main: 160 | self.version_main = version_main 161 | if force is True: 162 | self.force = force 163 | 164 | try: 165 | os.unlink(self.executable_path) 166 | except PermissionError: 167 | if self.force: 168 | self.force_kill_instances(self.executable_path) 169 | return self.auto(force=not self.force) 170 | try: 171 | if self.is_binary_patched(): 172 | # assumes already running AND patched 173 | return True 174 | except PermissionError: 175 | pass 176 | # return False 177 | except FileNotFoundError: 178 | pass 179 | 180 | release = self.fetch_release_number() 181 | self.version_main = release.version[0] 182 | self.version_full = release 183 | self.unzip_package(self.fetch_package()) 184 | return self.patch() 185 | 186 | def driver_binary_in_use(self, path: str = None) -> bool: 187 | """ 188 | naive test to check if a found chromedriver binary is 189 | currently in use 190 | 191 | Args: 192 | path: a string or PathLike object to the binary to check. 193 | if not specified, we check use this object's executable_path 194 | """ 195 | if not path: 196 | path = self.executable_path 197 | p = pathlib.Path(path) 198 | 199 | if not p.exists(): 200 | raise OSError("file does not exist: %s" % p) 201 | try: 202 | with open(p, mode="a+b") as fs: 203 | exc = [] 204 | try: 205 | 206 | fs.seek(0, 0) 207 | except PermissionError as e: 208 | exc.append(e) # since some systems apprently allow seeking 209 | # we conduct another test 210 | try: 211 | fs.readline() 212 | except PermissionError as e: 213 | exc.append(e) 214 | 215 | if exc: 216 | 217 | return True 218 | return False 219 | # ok safe to assume this is in use 220 | except Exception as e: 221 | # logger.exception("whoops ", e) 222 | pass 223 | 224 | def cleanup_unused_files(self): 225 | p = pathlib.Path(self.data_path) 226 | items = list(p.glob("*undetected*")) 227 | for item in items: 228 | try: 229 | item.unlink() 230 | except: 231 | pass 232 | 233 | def patch(self): 234 | self.patch_exe() 235 | return self.is_binary_patched() 236 | 237 | def fetch_release_number(self): 238 | """ 239 | Gets the latest major version available, or the latest major version of self.target_version if set explicitly. 240 | :return: version string 241 | :rtype: LooseVersion 242 | """ 243 | # Endpoint for old versions of Chromedriver (114 and below) 244 | if self.is_old_chromedriver: 245 | path = f"/latest_release_{self.version_main}" 246 | path = path.upper() 247 | logger.debug("getting release number from %s" % path) 248 | return LooseVersion(urlopen(self.url_repo + path).read().decode()) 249 | 250 | # Endpoint for new versions of Chromedriver (115+) 251 | if not self.version_main: 252 | # Fetch the latest version 253 | path = "/last-known-good-versions-with-downloads.json" 254 | logger.debug("getting release number from %s" % path) 255 | with urlopen(self.url_repo + path) as conn: 256 | response = conn.read().decode() 257 | 258 | last_versions = json.loads(response) 259 | return LooseVersion(last_versions["channels"]["Stable"]["version"]) 260 | 261 | # Fetch the latest minor version of the major version provided 262 | path = "/latest-versions-per-milestone-with-downloads.json" 263 | logger.debug("getting release number from %s" % path) 264 | with urlopen(self.url_repo + path) as conn: 265 | response = conn.read().decode() 266 | 267 | major_versions = json.loads(response) 268 | return LooseVersion(major_versions["milestones"][str(self.version_main)]["version"]) 269 | 270 | def parse_exe_version(self): 271 | with io.open(self.executable_path, "rb") as f: 272 | for line in iter(lambda: f.readline(), b""): 273 | match = re.search(rb"platform_handle\x00content\x00([0-9.]*)", line) 274 | if match: 275 | return LooseVersion(match[1].decode()) 276 | 277 | def fetch_package(self): 278 | """ 279 | Downloads ChromeDriver from source 280 | 281 | :return: path to downloaded file 282 | """ 283 | zip_name = f"chromedriver_{self.platform_name}.zip" 284 | if self.is_old_chromedriver: 285 | download_url = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, zip_name) 286 | else: 287 | zip_name = zip_name.replace("_", "-", 1) 288 | download_url = "https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/%s/%s/%s" 289 | download_url %= (self.version_full.vstring, self.platform_name, zip_name) 290 | 291 | logger.debug("downloading from %s" % download_url) 292 | return urlretrieve(download_url)[0] 293 | 294 | def unzip_package(self, fp): 295 | """ 296 | Does what it says 297 | 298 | :return: path to unpacked executable 299 | """ 300 | exe_path = self.exe_name 301 | if not self.is_old_chromedriver: 302 | # The new chromedriver unzips into its own folder 303 | zip_name = f"chromedriver-{self.platform_name}" 304 | exe_path = os.path.join(zip_name, self.exe_name) 305 | 306 | logger.debug("unzipping %s" % fp) 307 | try: 308 | os.unlink(self.zip_path) 309 | except (FileNotFoundError, OSError): 310 | pass 311 | 312 | os.makedirs(self.zip_path, mode=0o755, exist_ok=True) 313 | with zipfile.ZipFile(fp, mode="r") as zf: 314 | zf.extractall(self.zip_path) 315 | os.rename(os.path.join(self.zip_path, exe_path), self.executable_path) 316 | os.remove(fp) 317 | shutil.rmtree(self.zip_path) 318 | os.chmod(self.executable_path, 0o755) 319 | return self.executable_path 320 | 321 | @staticmethod 322 | def force_kill_instances(exe_name): 323 | """ 324 | kills running instances. 325 | :param: executable name to kill, may be a path as well 326 | 327 | :return: True on success else False 328 | """ 329 | exe_name = os.path.basename(exe_name) 330 | if IS_POSIX: 331 | r = os.system("kill -f -9 $(pidof %s)" % exe_name) 332 | else: 333 | r = os.system("taskkill /f /im %s" % exe_name) 334 | return not r 335 | 336 | @staticmethod 337 | def gen_random_cdc(): 338 | cdc = random.choices(string.ascii_letters, k=27) 339 | return "".join(cdc).encode() 340 | 341 | def is_binary_patched(self, executable_path=None): 342 | executable_path = executable_path or self.executable_path 343 | try: 344 | with io.open(executable_path, "rb") as fh: 345 | return fh.read().find(b"undetected chromedriver") != -1 346 | except FileNotFoundError: 347 | return False 348 | 349 | def patch_exe(self): 350 | start = time.perf_counter() 351 | logger.info("patching driver executable %s" % self.executable_path) 352 | with io.open(self.executable_path, "r+b") as fh: 353 | content = fh.read() 354 | # match_injected_codeblock = re.search(rb"{window.*;}", content) 355 | match_injected_codeblock = re.search(rb"\{window\.cdc.*?;\}", content) 356 | if match_injected_codeblock: 357 | target_bytes = match_injected_codeblock[0] 358 | new_target_bytes = ( 359 | b'{console.log("undetected chromedriver 1337!")}'.ljust( 360 | len(target_bytes), b" " 361 | ) 362 | ) 363 | new_content = content.replace(target_bytes, new_target_bytes) 364 | if new_content == content: 365 | logger.warning( 366 | "something went wrong patching the driver binary. could not find injection code block" 367 | ) 368 | else: 369 | logger.debug( 370 | "found block:\n%s\nreplacing with:\n%s" 371 | % (target_bytes, new_target_bytes) 372 | ) 373 | fh.seek(0) 374 | fh.write(new_content) 375 | logger.debug( 376 | "patching took us {:.2f} seconds".format(time.perf_counter() - start) 377 | ) 378 | 379 | def __repr__(self): 380 | return "{0:s}({1:s})".format( 381 | self.__class__.__name__, 382 | self.executable_path, 383 | ) 384 | 385 | def __del__(self): 386 | if self._custom_exe_path: 387 | # if the driver binary is specified by user 388 | # we assume it is important enough to not delete it 389 | return 390 | else: 391 | timeout = 3 # stop trying after this many seconds 392 | t = time.monotonic() 393 | now = lambda: time.monotonic() 394 | while now() - t > timeout: 395 | # we don't want to wait until the end of time 396 | try: 397 | if self.user_multi_procs: 398 | break 399 | os.unlink(self.executable_path) 400 | logger.debug("successfully unlinked %s" % self.executable_path) 401 | break 402 | except (OSError, RuntimeError, PermissionError): 403 | time.sleep(0.01) 404 | continue 405 | except FileNotFoundError: 406 | break -------------------------------------------------------------------------------- /src/undetected_chromedriver/reactor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # this module is part of undetected_chromedriver 3 | 4 | import asyncio 5 | import json 6 | import logging 7 | import threading 8 | 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class Reactor(threading.Thread): 14 | def __init__(self, driver: "Chrome"): 15 | super().__init__() 16 | 17 | self.driver = driver 18 | self.loop = asyncio.new_event_loop() 19 | 20 | self.lock = threading.Lock() 21 | self.event = threading.Event() 22 | self.daemon = True 23 | self.handlers = {} 24 | 25 | def add_event_handler(self, method_name, callback: callable): 26 | """ 27 | 28 | Parameters 29 | ---------- 30 | event_name: str 31 | example "Network.responseReceived" 32 | 33 | callback: callable 34 | callable which accepts 1 parameter: the message object dictionary 35 | 36 | Returns 37 | ------- 38 | 39 | """ 40 | with self.lock: 41 | self.handlers[method_name.lower()] = callback 42 | 43 | @property 44 | def running(self): 45 | return not self.event.is_set() 46 | 47 | def run(self): 48 | try: 49 | asyncio.set_event_loop(self.loop) 50 | self.loop.run_until_complete(self.listen()) 51 | except Exception as e: 52 | logger.warning("Reactor.run() => %s", e) 53 | 54 | async def _wait_service_started(self): 55 | while True: 56 | with self.lock: 57 | if ( 58 | getattr(self.driver, "service", None) 59 | and getattr(self.driver.service, "process", None) 60 | and self.driver.service.process.poll() 61 | ): 62 | await asyncio.sleep(self.driver._delay or 0.25) 63 | else: 64 | break 65 | 66 | async def listen(self): 67 | while self.running: 68 | await self._wait_service_started() 69 | await asyncio.sleep(1) 70 | 71 | try: 72 | with self.lock: 73 | log_entries = self.driver.get_log("performance") 74 | 75 | for entry in log_entries: 76 | try: 77 | obj_serialized: str = entry.get("message") 78 | obj = json.loads(obj_serialized) 79 | message = obj.get("message") 80 | method = message.get("method") 81 | 82 | if "*" in self.handlers: 83 | await self.loop.run_in_executor( 84 | None, self.handlers["*"], message 85 | ) 86 | elif method.lower() in self.handlers: 87 | await self.loop.run_in_executor( 88 | None, self.handlers[method.lower()], message 89 | ) 90 | 91 | # print(type(message), message) 92 | except Exception as e: 93 | raise e from None 94 | 95 | except Exception as e: 96 | if "invalid session id" in str(e): 97 | pass 98 | else: 99 | logging.debug("exception ignored :", e) 100 | -------------------------------------------------------------------------------- /src/undetected_chromedriver/webelement.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from selenium.webdriver.common.by import By 4 | import selenium.webdriver.remote.webelement 5 | 6 | 7 | class WebElement(selenium.webdriver.remote.webelement.WebElement): 8 | def click_safe(self): 9 | super().click() 10 | self._parent.reconnect(0.1) 11 | 12 | def children( 13 | self, tag=None, recursive=False 14 | ) -> List[selenium.webdriver.remote.webelement.WebElement]: 15 | """ 16 | returns direct child elements of current element 17 | :param tag: str, if supplied, returns nodes only 18 | """ 19 | script = "return [... arguments[0].children]" 20 | if tag: 21 | script += ".filter( node => node.tagName === '%s')" % tag.upper() 22 | if recursive: 23 | return list(_recursive_children(self, tag)) 24 | return list(self._parent.execute_script(script, self)) 25 | 26 | 27 | class UCWebElement(WebElement): 28 | """ 29 | Custom WebElement class which makes it easier to view elements when 30 | working in an interactive environment. 31 | 32 | standard webelement repr: 33 | 34 | 35 | using this WebElement class: 36 | )> 37 | 38 | """ 39 | 40 | def __init__(self, parent, id_): 41 | super().__init__(parent, id_) 42 | self._attrs = None 43 | 44 | @property 45 | def attrs(self): 46 | if not self._attrs: 47 | self._attrs = self._parent.execute_script( 48 | """ 49 | var items = {}; 50 | for (index = 0; index < arguments[0].attributes.length; ++index) 51 | { 52 | items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value 53 | }; 54 | return items; 55 | """, 56 | self, 57 | ) 58 | return self._attrs 59 | 60 | def __repr__(self): 61 | strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()]) 62 | if strattrs: 63 | strattrs = " " + strattrs 64 | return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>" 65 | 66 | 67 | def _recursive_children(element, tag: str = None, _results=None): 68 | """ 69 | returns all children of recursively 70 | 71 | :param element: `WebElement` object. 72 | find children below this 73 | 74 | :param tag: str = None. 75 | if provided, return only elements. example: 'a', or 'img' 76 | :param _results: do not use! 77 | """ 78 | results = _results or set() 79 | for element in element.children(): 80 | if tag: 81 | if element.tag_name == tag: 82 | results.add(element) 83 | else: 84 | results.add(element) 85 | results |= _recursive_children(element, tag, results) 86 | return results 87 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import random 5 | import re 6 | import shutil 7 | 8 | from selenium.webdriver.chrome.webdriver import WebDriver 9 | 10 | import undetected_chromedriver as uc 11 | from dtos import V1RequestBase 12 | 13 | FLARESOLVERR_VERSION = 0.1 14 | CHROME_MAJOR_VERSION = None 15 | USER_AGENT = None 16 | XVFB_DISPLAY = None 17 | PATCHED_DRIVER_PATH = None 18 | 19 | 20 | def get_config_log_html() -> bool: 21 | return os.environ.get('LOG_HTML', 'false').lower() == 'true' 22 | 23 | 24 | def get_config_headless() -> bool: 25 | return os.environ.get('HEADLESS', 'true').lower() == 'true' 26 | 27 | 28 | def get_flaresolverr_version() -> str: 29 | global FLARESOLVERR_VERSION 30 | if FLARESOLVERR_VERSION is not None: 31 | return FLARESOLVERR_VERSION 32 | 33 | package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json') 34 | with open(package_path) as f: 35 | FLARESOLVERR_VERSION = json.loads(f.read())['version'] 36 | return FLARESOLVERR_VERSION 37 | 38 | 39 | def get_webdriver(req: V1RequestBase = None, retry: int = 0) -> WebDriver: 40 | global PATCHED_DRIVER_PATH 41 | logging.debug('Launching web browser...') 42 | 43 | try: 44 | # undetected_chromedriver 45 | options = uc.ChromeOptions() 46 | options.add_argument('--no-sandbox') 47 | 48 | random_w = random.randint(800, 1200) 49 | random_h = random.randint(600, 800) 50 | options.add_argument(f'--window-size={random_w},{random_h}') 51 | 52 | # todo: this param shows a warning in chrome head-full 53 | options.add_argument('--disable-setuid-sandbox') 54 | options.add_argument('--disable-dev-shm-usage') 55 | # this option removes the zygote sandbox (it seems that the resolution is a bit faster) 56 | options.add_argument('--no-zygote') 57 | 58 | # Test 59 | if req.beta_args is not None and req.beta_args is True: 60 | options.add_argument('--auto-open-devtools-for-tabs') 61 | 62 | # Proxy Support 63 | if req is not None and req.proxy is not None: 64 | proxy = req.proxy['url'] 65 | options.add_argument('--proxy-server=%s' % proxy) 66 | # print("Added proxy: %s" % proxy) 67 | 68 | if req is not None: 69 | if req.headless: 70 | options.add_argument("--headless") 71 | 72 | # note: headless mode is detected (options.headless = True) 73 | # we launch the browser in head-full mode with the window hidden 74 | windows_headless = False 75 | if get_config_headless(): 76 | if req is not None and req.headless is True or os.name == 'nt': 77 | windows_headless = True 78 | 79 | # Make headless 80 | # Add start minimized 81 | # options.add_argument('--start-minimized') 82 | else: 83 | start_xvfb_display() 84 | 85 | # If we are inside the Docker container, we avoid downloading the driver 86 | driver_exe_path = None 87 | version_main = None 88 | if os.path.exists("/app/chromedriver"): 89 | # Running inside Docker 90 | driver_exe_path = "/app/chromedriver" 91 | else: 92 | version_main = get_chrome_major_version() 93 | if PATCHED_DRIVER_PATH is not None: 94 | driver_exe_path = PATCHED_DRIVER_PATH 95 | 96 | # downloads and patches the chromedriver 97 | # if we don't set driver_executable_path it downloads, patches, and deletes the driver each time 98 | driver = uc.Chrome(options=options, driver_executable_path=driver_exe_path, version_main=version_main, 99 | windows_headless=windows_headless, start_error_message="") 100 | 101 | # Temporary fix for headless mode 102 | if windows_headless: 103 | # Hide the window 104 | driver.minimize_window() 105 | 106 | # save the patched driver to avoid re-downloads 107 | if driver_exe_path is None: 108 | PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name) 109 | shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH) 110 | 111 | # selenium vanilla 112 | # options = webdriver.ChromeOptions() 113 | # options.add_argument('--no-sandbox') 114 | # options.add_argument('--window-size=1920,1080') 115 | # options.add_argument('--disable-setuid-sandbox') 116 | # options.add_argument('--disable-dev-shm-usage') 117 | # driver = webdriver.Chrome(options=options) 118 | 119 | return driver 120 | except Exception as e: 121 | 122 | # Retry up to 3 times 123 | if retry < 3: 124 | logging.exception(e) 125 | logging.debug('Retrying...') 126 | return get_webdriver(req, retry + 1) 127 | 128 | logging.exception(e) 129 | tb = e.__traceback__ 130 | lineno = tb.tb_lineno 131 | raise Exception(f'Error launching web browser: {e} (line {lineno})') 132 | 133 | 134 | def get_chrome_exe_path() -> str: 135 | return uc.find_chrome_executable() 136 | 137 | 138 | def get_chrome_major_version() -> str: 139 | global CHROME_MAJOR_VERSION 140 | if CHROME_MAJOR_VERSION is not None: 141 | return CHROME_MAJOR_VERSION 142 | 143 | if os.name == 'nt': 144 | try: 145 | stream = os.popen( 146 | 'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"') 147 | output = stream.read() 148 | # Example: '104.0.5112.79' 149 | complete_version = extract_version_registry(output) 150 | 151 | # noinspection PyBroadException 152 | except Exception: 153 | # Example: '104.0.5112.79' 154 | complete_version = extract_version_folder() 155 | else: 156 | chrome_path = uc.find_chrome_executable() 157 | process = os.popen(f'"{chrome_path}" --version') 158 | # Example 1: 'Chromium 104.0.5112.79 Arch Linux\n' 159 | # Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n' 160 | complete_version = process.read() 161 | process.close() 162 | 163 | CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1] 164 | return CHROME_MAJOR_VERSION 165 | 166 | 167 | def extract_version_registry(output) -> str: 168 | try: 169 | google_version = '' 170 | for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]: 171 | if letter != '\n': 172 | google_version += letter 173 | else: 174 | break 175 | return google_version.strip() 176 | except TypeError: 177 | return '' 178 | 179 | 180 | def extract_version_folder() -> str: 181 | # Check if the Chrome folder exists in the x32 or x64 Program Files folders. 182 | for i in range(2): 183 | path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application' 184 | if os.path.isdir(path): 185 | paths = [f.path for f in os.scandir(path) if f.is_dir()] 186 | for path in paths: 187 | filename = os.path.basename(path) 188 | pattern = '\d+\.\d+\.\d+\.\d+' 189 | match = re.search(pattern, filename) 190 | if match and match.group(): 191 | # Found a Chrome version. 192 | return match.group(0) 193 | return '' 194 | 195 | 196 | def get_user_agent(driver=None) -> str: 197 | global USER_AGENT 198 | if USER_AGENT is not None: 199 | return USER_AGENT 200 | 201 | try: 202 | if driver is None: 203 | req = V1RequestBase(_dict={}) 204 | req.headless = True 205 | driver = get_webdriver(req=req) 206 | 207 | USER_AGENT = driver.execute_script("return navigator.userAgent") 208 | return USER_AGENT 209 | except Exception as e: 210 | raise Exception("Error getting browser User-Agent. " + str(e)) 211 | finally: 212 | if driver is not None: 213 | driver.quit() 214 | 215 | 216 | def start_xvfb_display(): 217 | global XVFB_DISPLAY 218 | if XVFB_DISPLAY is None: 219 | from xvfbwrapper import Xvfb 220 | XVFB_DISPLAY = Xvfb() 221 | XVFB_DISPLAY.start() 222 | 223 | 224 | def object_to_dict(_object): 225 | json_dict = json.loads(json.dumps(_object, default=lambda o: o.__dict__)) 226 | # remove hidden fields 227 | return {k: v for k, v in json_dict.items() if not k.startswith('__')} 228 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | WebTest==3.0.0 2 | --------------------------------------------------------------------------------