├── docker ├── requirements.txt └── rootfs │ └── opt │ └── yandex_captcha_puzzle_solver │ ├── etc │ └── html_templates │ │ ├── form.html.j2 │ │ └── index.html.j2 │ └── bin │ ├── YandexCaptchaPuzzleSolverProxyRun.sh │ └── YandexCaptchaPuzzleSolverRun.sh ├── tests └── empty_test.py ├── src ├── yandex_captcha_puzzle_solver │ ├── __init__.py │ ├── proxy_controller.py │ ├── browser_wrapper.py │ ├── yandex_captcha_puzzle_solve_server.py │ ├── image_processor.py │ └── yandex_captcha_puzzle_solver.py ├── mitm_addons │ ├── mitm_grounding_addon.py │ └── mitm_split_addon.py └── grounding_server │ └── grounding_server.py ├── docker-compose.yml ├── pyproject.toml ├── .github └── workflows │ ├── python-publish.yml │ ├── docker-testing.yml │ ├── python-package.yml │ └── docker-publish.yml ├── setup.py ├── LICENSE ├── README.md ├── utils ├── gost-install.sh ├── linux_chrome_deb_repo_installer.sh └── linux_chrome_archive_installer.py └── Dockerfile /docker/requirements.txt: -------------------------------------------------------------------------------- 1 | mitmproxy==10.4.2 2 | -------------------------------------------------------------------------------- /tests/empty_test.py: -------------------------------------------------------------------------------- 1 | def test_empty(): 2 | pass 3 | -------------------------------------------------------------------------------- /docker/rootfs/opt/yandex_captcha_puzzle_solver/etc/html_templates/form.html.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |

{{smart_token}}

5 | 6 | 7 | -------------------------------------------------------------------------------- /src/yandex_captcha_puzzle_solver/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib.metadata 2 | 3 | from .yandex_captcha_puzzle_solver import Request, Response, Solver, BrowserWrapper 4 | from .proxy_controller import ProxyController 5 | from .yandex_captcha_puzzle_solve_server import server, server_run 6 | 7 | __version__ = importlib.metadata.version(__package__ or __name__) 8 | 9 | __all__ = [ 10 | 'Request', 'Response', 'Solver', 'BrowserWrapper', 11 | 'ProxyController', 'server', 'server_run' 12 | ] 13 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | yandex-captcha-puzzle-solver : 3 | build: 4 | args: 5 | #CHROME_VERSION: '131.' 6 | UID: 1001 7 | # CHROME_DISABLE_GPU: true 8 | # CHECK_SYSTEM: true 9 | # PYTHON_VERSION: 3.11 10 | context: . 11 | dockerfile: Dockerfile 12 | environment: 13 | UNUSED: false 14 | # DEBUG: true 15 | VERBOSE: true 16 | container_name: yandex-captcha-puzzle-solver 17 | image: yandex-captcha-puzzle-solver:latest 18 | ports: 19 | - 20081:8080 20 | #volumes: 21 | #- ./var:/opt/yandex_captcha_puzzle_solver/var/ 22 | #< Uncomment if you persistent logs between runs 23 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | 'setuptools>=45.0', 4 | 'setuptools-scm' 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | 8 | [project] 9 | name = "yandex_captcha_puzzle_solver" 10 | version = "0.1.2" 11 | authors = [ 12 | {name = "yoori", email = "yuri.kuznecov@gmail.com"} 13 | ] 14 | 15 | description = "" 16 | readme = "README.md" 17 | license = {text = 'GNU Lesser General Public License'} 18 | requires-python = ">=3.9" 19 | dynamic = ["dependencies"] 20 | 21 | [project.urls] 22 | homepage = "https://github.com/yoori/flare-bypasser" 23 | 24 | [options] 25 | package_dir = "src/yandex_captcha_puzzle_solver" 26 | 27 | [project.scripts] 28 | yandex_captcha_puzzle_solve_server = "yandex_captcha_puzzle_solver:server_run" -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Set up Python 18 | uses: actions/setup-python@v3 19 | with: 20 | python-version: '3.x' 21 | - name: Set up cmake and ninja 22 | uses: lukka/get-cmake@latest 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install build 27 | - name: Build package 28 | run: python -m build 29 | - name: Publish package 30 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 31 | with: 32 | user: __token__ 33 | password: ${{ secrets.PYPI_API_TOKEN }} 34 | -------------------------------------------------------------------------------- /docker/rootfs/opt/yandex_captcha_puzzle_solver/etc/html_templates/index.html.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |
8 | 9 | 10 | 11 |
13 |
14 | 15 | 16 |
17 | 18 | 21 |
22 | 23 | 24 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import importlib 4 | import distutils.core 5 | 6 | 7 | # Trick for avoid installation of non pip installed packages (apt), available by ADDITIONAL_PYTHONPATH 8 | def is_installed(pkgname): 9 | try: 10 | m = importlib.import_module(pkgname) 11 | return m is not None 12 | except Exception: 13 | pass 14 | return False 15 | 16 | 17 | if "ADDITIONAL_PYTHONPATH" in os.environ: 18 | add_path = os.environ["ADDITIONAL_PYTHONPATH"] 19 | sys.path += add_path.split(':') 20 | 21 | install_requires = [ 22 | 'asyncio', 23 | 'uuid', 24 | 'urllib3', 25 | 'websockets==14.0', 26 | 'zendriver_flare_bypasser==0.2.4', 27 | 'argparse', 28 | 'oslex', 29 | 'jinja2', 30 | 31 | # Server dependecies 32 | 'fastapi', 33 | 'uvicorn', 34 | 35 | 'xvfbwrapper==0.2.9 ; platform_system != "Windows"', 36 | 'gunicorn ; platform_system != "Windows"', 37 | ] 38 | 39 | for package_import_name, package in [('numpy', 'numpy'), ('cv2', 'opencv-python')]: 40 | if not is_installed(package_import_name): 41 | install_requires += [package] 42 | 43 | distutils.core.setup(install_requires=install_requires) 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Yuri Kuznecov (yoori / yuri.kuznecov@gmail.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /docker/rootfs/opt/yandex_captcha_puzzle_solver/bin/YandexCaptchaPuzzleSolverProxyRun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | LOCAL_PORT="$1" 4 | GROUND_URL="$2" 5 | PROXY="$3" 6 | LOG_DIR="$4" # Log dir. 7 | 8 | clean_up() { 9 | rm -rf "$1" 10 | } 11 | 12 | if [ "$LOG_DIR" = "" ] ; then 13 | LOG_DIR=$(mktemp -d -t proxy.XXXXXX) 14 | trap "clean_up '$LOG_DIR'" EXIT 15 | fi 16 | 17 | # Grounding server should be runned on 9001 port 18 | 19 | if [ "$PROXY" != "" ] ; then 20 | gost -L=http://127.0.0.1:$((LOCAL_PORT + 2000)) -F=$PROXY & 21 | pids+=($!) 22 | else 23 | gost -L=http://127.0.0.1:$((LOCAL_PORT + 2000)) & 24 | pids+=($!) 25 | fi 26 | 27 | # GroundingProxy: proxy that convert proxy traffic to http and send it to GroundingServer. 28 | mitmdump --mode regular --listen-port "$((LOCAL_PORT + 1000))" \ 29 | -s /opt/yandex_captcha_puzzle_solver/lib/mitm_addons/mitm_grounding_addon.py \ 30 | --set ground_url=http://localhost:9001 \ 31 | >"$LOG_DIR/mitmproxy_closing.log" 2>&1 & 32 | 33 | pids+=($!) 34 | 35 | # SplitProxy: proxy that split traffic: 36 | # Url's with solver_intercept argument to GroundingProxy 37 | # Other to external network 38 | mitmdump --listen-port "$LOCAL_PORT" --ssl-insecure \ 39 | -s /opt/yandex_captcha_puzzle_solver/lib/mitm_addons/mitm_split_addon.py \ 40 | --mode "upstream:http://localhost:$((LOCAL_PORT + 1000))" \ 41 | --set proxy=localhost:$((LOCAL_PORT + 2000)) \ 42 | >"$LOG_DIR/mitmproxy_splitter.log" 2>&1 & 43 | 44 | pids+=($!) 45 | 46 | for pid in "${pids[@]}"; do 47 | wait "${pid}" 48 | done 49 | -------------------------------------------------------------------------------- /.github/workflows/docker-testing.yml: -------------------------------------------------------------------------------- 1 | name: Docker 2 | 3 | # This workflow uses actions that are not certified by GitHub. 4 | # They are provided by a third-party and are governed by 5 | # separate terms of service, privacy policy, and support 6 | # documentation. 7 | 8 | on: 9 | workflow_run: 10 | workflows: ["Upload Python Package"] 11 | types: 12 | - completed 13 | 14 | env: 15 | # Use docker.io for Docker Hub if empty 16 | REGISTRY: ghcr.io 17 | # github.repository as / 18 | IMAGE_NAME: ${{ github.repository }} 19 | 20 | 21 | jobs: 22 | build: 23 | strategy: 24 | matrix: 25 | include: 26 | - builder: ubuntu-latest 27 | platform: linux/amd64 28 | - builder: ubuntu-latest 29 | platform: linux/arm64 30 | - builder: ubuntu-latest 31 | platform: linux/arm/v7 32 | 33 | runs-on: ${{ matrix.builder }} 34 | 35 | permissions: 36 | contents: read 37 | packages: write 38 | # This is used to complete the identity challenge 39 | # with sigstore/fulcio when running outside of PRs. 40 | id-token: write 41 | 42 | steps: 43 | - name: Checkout repository 44 | uses: actions/checkout@v4 45 | 46 | - name: Checkout latest docker image 47 | if: ${{ github.event_name != 'pull_request' }} 48 | env: 49 | # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable 50 | TAGS: ${{ steps.meta.outputs.tags }} 51 | DIGEST: ${{ steps.build-and-push.outputs.digest }} 52 | # This step uses the identity token to provision an ephemeral certificate 53 | # against the sigstore community Fulcio instance. 54 | run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST} 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # YandexCaptchaPuzzleSolver 3 | 4 | YandexCaptchaPuzzleSolver is a service to bypass Yandex Captcha (Puzzle). 5 | 6 | ## How it works 7 | 8 | YandexCaptchaPuzzleSolver starts a server, that can solve yandex captcha of puzzle type : 9 | 10 | ![Yandex puzzle captcha view](https://github.com/user-attachments/assets/ed71b6b1-5260-43dc-ba3b-40bea1826aa5) 11 | 12 | 13 | and it waits for user requests. 14 | For get some site valid token (result of solving), need to send request to docker (see Installation): 15 | 16 | curl -XPOST 'http://localhost:20081/get_token' \ 17 | -H 'Content-Type: application/json' \ 18 | --data-raw '{"maxTimeout": 120000, "url": "SITE FOR SOLVE", "yandex_key": "YANDEX KEY"}' 19 | 20 | YANDEX KEY you can get from source code of target page, usualy it starts with **ysc1_** string. 21 | 22 | Response example: 23 | 24 | {"status":"ok","message":"Challenge solved!","startTimestamp":1733819749.824522,"endTimestamp":1733819774.119855,"solution":{"status":"ok","url":"","cookies":[{"name":"receive-cookie-deprecation","value":"1","domain":".yandex.ru","path":"/","secure":true},{"name":"session-cookie","value":"180fc3e2fb41df94e50241d9d00b084574552116189d7515109f2424d43b405a76cd9ae4255944b2d868fe358dc27d53","domain":".some.domain","path":"/","secure":false}],"user_agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36","token":"dD0xNzMzODE5NzY3O2k9MjE3LjY1LjIuMjI5O0Q9NzAzQzI4OTlFRDBFQTBFRTM1ODE3MUFBMzRFMkFDRURDQkQzQTlFMDgwMzM4QjMzRDJEODlDMTczMTEyQTk5ODZDODkyMEQxNzA4QTBFN0I4MTkxQzVCRkQ3RjRDMzExQ0E3Qjg1NkRDOEM4MDZENTFEM0JERENFODUzNzlEMTYzODY2MkM5RDg2RjIwQUEwNzc7dT0xNzMzODE5NzY3NTk4OTEyNjU3O2g9ZjI3ZWY0OWUxZmUyN2EzNWQ4OTNmM2IzYzM5YTQwNWU="}} 25 | 26 | ## Installation 27 | 28 | It is recommended to install using a Docker container because the project depends on an external browser that is 29 | already included within the image. 30 | 31 | We provide a `docker-compose.yml` configuration file. 32 | Clone this repository and execute `docker compose up -d` to start the container. 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/mitm_addons/mitm_grounding_addon.py: -------------------------------------------------------------------------------- 1 | import typing 2 | import traceback 3 | import urllib.parse 4 | import mitmproxy 5 | 6 | 7 | class Addon(object): 8 | _ground_url: typing.Tuple[str, int] 9 | 10 | def __init__(self, template_root = "mtproxy_templates/"): 11 | self._ground_url = None 12 | 13 | def load(self, loader): 14 | loader.add_option( 15 | name = "ground_url", 16 | typespec = typing.Optional[str], 17 | default = None, 18 | help = "Ground url", 19 | ) 20 | 21 | def configure(self, updates): 22 | try: 23 | if "ground_url" in updates: 24 | ground_url = urllib.parse.urlparse(mitmproxy.ctx.options.ground_url) 25 | if ground_url.hostname is not None and ground_url.port is not None: 26 | self._ground_url = (ground_url.hostname, ground_url.port) 27 | 28 | except Exception as e: 29 | print("configure, exception: " + str(e), flush = True) 30 | 31 | def running(self): 32 | # We change the connection strategy to lazy so that next_layer happens before we actually connect upstream. 33 | # Alternatively we could also change the server address in `server_connect`. 34 | mitmproxy.ctx.options.connection_strategy = "lazy" 35 | mitmproxy.ctx.options.upstream_cert = False 36 | 37 | def next_layer(self, nextlayer: mitmproxy.proxy.layer.NextLayer): 38 | """ 39 | remove TLS for ground_url requests 40 | """ 41 | try: 42 | print("next_layer: " + str(nextlayer.context.server.address), flush = True) 43 | if ( 44 | nextlayer.context.server.address is not None and 45 | self._ground_url is not None 46 | ): 47 | nextlayer.context.server.address = self._ground_url 48 | nextlayer.context.client.alpn = b"" 49 | nextlayer.layer = mitmproxy.proxy.layers.ClientTLSLayer(nextlayer.context) 50 | nextlayer.layer.child_layer = mitmproxy.proxy.layers.TCPLayer(nextlayer.context) 51 | except Exception as e: 52 | print("next_layer, exception: " + str(e), flush = True) 53 | traceback.print_exc() 54 | 55 | def server_connect(self, data: mitmproxy.proxy.server_hooks.ServerConnectionHookData): 56 | # non TLS override 57 | data.server.address = self._ground_url 58 | 59 | 60 | addons = [ 61 | Addon() 62 | ] 63 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.9", "3.10", "3.11"] 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Set up cmake and ninja 28 | uses: lukka/get-cmake@latest 29 | # Install opencv as package 30 | #- name: Trust ubuntu GPG keys 31 | # run: | 32 | # sudo gpg --keyserver "hkps://keyserver.ubuntu.com:443" --recv-keys 40976EAF437D05B5 3B4FE6ACC0B21F32 33 | # sudo gpg --yes --output "/etc/apt/trusted.gpg.d/40976EAF437D05B5.gpg" --export "40976EAF437D05B5" 34 | # sudo gpg --yes --output "/etc/apt/trusted.gpg.d/3B4FE6ACC0B21F32.gpg" --export "3B4FE6ACC0B21F32" 35 | # sudo add-apt-repository deb http://security.ubuntu.com/ubuntu xenial-security main 36 | # sudo apt-get update 37 | # sudo apt-get install libopencv-dev python3-opencv 38 | #- uses: Dovyski/setup-opencv-action@v1.1 39 | # with: 40 | # opencv-version: '4.0.0' 41 | - name: Install dependencies 42 | run: | 43 | python -m pip install --upgrade pip 44 | python -m pip install flake8 pytest 45 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 46 | - name: Lint with flake8 47 | run: | 48 | # stop the build if there are Python syntax errors or undefined names 49 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --indent-size 2 --max-line-length=100 --ignore E251,W504,C901 50 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 51 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --indent-size 2 --ignore E251,W504,C901 52 | - name: Preinstall package problematic dependencies 53 | run: | 54 | python -m pip install opencv-contrib-python numpy 55 | - name: Install package 56 | run: | 57 | python -m pip install --prefer-binary . 58 | - name: Test with pytest 59 | run: | 60 | pytest 61 | -------------------------------------------------------------------------------- /src/grounding_server/grounding_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import jinja2 4 | import flask 5 | 6 | app = flask.Flask(__name__, template_folder = "templates/") 7 | 8 | form_page_template = None 9 | page_template = None 10 | template_root = '' 11 | 12 | 13 | @app.route('/shutdown', methods=["GET", "POST"]) 14 | def request_shutdown(): 15 | try: 16 | print("Flask shutdown request got ...", flush = True) 17 | shutdown_fun = flask.request.environ.get('werkzeug.server.shutdown') 18 | shutdown_fun() 19 | print("Flask shutdown request processed ...", flush = True) 20 | return flask.Response(status = 204) 21 | except Exception: 22 | return flask.Response(status = 500) 23 | 24 | 25 | @app.route('/', defaults={'path': ''}) 26 | @app.route('/') 27 | def request_main(path): 28 | # init template 29 | # parse utm_keyword 30 | yandex_captcha_key = flask.request.args.get("yandex_captcha_key") 31 | args = {} 32 | args['yandex_captcha_key'] = yandex_captcha_key 33 | global page_template 34 | resp = page_template.render(args) 35 | return flask.Response(resp, mimetype = 'text/html') 36 | 37 | 38 | @app.route('/send_captcha') 39 | @app.route('/send_captcha/') 40 | def request_send_captcha(): 41 | # init template 42 | # parse utm_keyword 43 | smart_token = flask.request.args.get("smart-token") 44 | args = {} 45 | args['smart_token'] = smart_token 46 | global form_page_template 47 | resp = form_page_template.render(args) 48 | return flask.Response(resp, mimetype = 'text/html') 49 | 50 | 51 | def run_app(args): 52 | app.run(host = "0.0.0.0", port = args['port'], threaded = True) 53 | 54 | 55 | def start_app(): 56 | parser = argparse.ArgumentParser(description = 'grounding_server.') 57 | parser.add_argument( 58 | "-p", "--port", type = int, default = 9200, help="Listen port") 59 | parser.add_argument( 60 | "-f", "--pidfile", "--pid-file", type = str, default = 'grounding_server.pid', help="Pid file") 61 | parser.add_argument( 62 | "-t", "--page-template", type = str, default = 'index.html.j2', help = "Template file") 63 | parser.add_argument( 64 | "--form-page-template", type = str, default = 'form.html.j2', help = "Template file") 65 | args = parser.parse_args() 66 | 67 | pid = os.getpid() 68 | with open(args.pidfile, 'wb') as f: 69 | f.write(str(pid).encode('utf-8')) 70 | f.close() 71 | 72 | global page_template 73 | page_template = jinja2.Environment(loader = jinja2.FileSystemLoader("/")).get_template(args.page_template) 74 | 75 | global form_page_template 76 | form_page_template = jinja2.Environment(loader = jinja2.FileSystemLoader("/")).get_template(args.form_page_template) 77 | 78 | run_app({'port': args.port, 'ssl': False}) 79 | 80 | 81 | if __name__ == "__main__": 82 | start_app() 83 | -------------------------------------------------------------------------------- /src/mitm_addons/mitm_split_addon.py: -------------------------------------------------------------------------------- 1 | import typing 2 | import mitmproxy 3 | from mitmproxy.script import concurrent 4 | 5 | 6 | class Addon(object): 7 | _no_condition_via = None # proxy for send external traffic (url does not subject the condition) 8 | _proxy_via = None # proxy for send internal traffic (url is subject to the condition) 9 | 10 | def __init__(self): 11 | pass 12 | 13 | def load(self, loader): 14 | loader.add_option( 15 | name="proxy", 16 | typespec=typing.Optional[str], 17 | default=None, 18 | help="proxy", 19 | ) 20 | 21 | def configure(self, updates): 22 | try: 23 | if "proxy" in updates: 24 | self._set_proxy(mitmproxy.ctx.options.proxy) 25 | except Exception as e: 26 | print("configure, exception: " + str(e), flush=True) 27 | 28 | def running(self): 29 | # We change the connection strategy to lazy so that next_layer happens before we actually connect upstream. 30 | # Alternatively we could also change the server address in `server_connect`. 31 | mitmproxy.ctx.options.connection_strategy = "lazy" 32 | mitmproxy.ctx.options.upstream_cert = False 33 | # fill default upstream (for url's subject to the condition) 34 | self._proxy_via = None # set via to None for non upstream modes 35 | options = mitmproxy.ctx.options 36 | if options.mode and options.mode[0].startswith("upstream:"): 37 | mode = mitmproxy.proxy.mode_specs.UpstreamMode.parse(options.mode[0]) 38 | self._proxy_via = (mode.scheme, mode.address) 39 | 40 | @concurrent 41 | def requestheaders(self, flow): 42 | # print("REQUEST URL: " + flow.request.pretty_url, flush=True) 43 | need_send_to_proxy = self._need_send_to_proxy(flow.request) 44 | 45 | # flow.server_conn.via is None: means that will be used upstream(internal proxy) 46 | if (need_send_to_proxy and ( 47 | flow.server_conn.via is None or flow.server_conn.via != self._no_condition_via) 48 | ): 49 | # switch to use external proxy 50 | flow.server_conn.state = mitmproxy.connection.ConnectionState.CLOSED 51 | flow.server_conn.via = self._no_condition_via 52 | elif (not need_send_to_proxy and ( 53 | flow.server_conn.via is not None and flow.server_conn.via == self._no_condition_via) 54 | ): 55 | # switch from use proxy to upstream 56 | flow.server_conn.state = mitmproxy.connection.ConnectionState.CLOSED 57 | flow.server_conn.via = self._proxy_via 58 | 59 | print("SEND URL: " + flow.request.pretty_url + " => " + str(flow.server_conn.via), flush=True) 60 | 61 | def _need_send_to_proxy(self, request): 62 | args = request.query # args: MultiDictView 63 | return ("solver_intercept" not in args) 64 | 65 | def _set_proxy(self, parse_proxy): 66 | proxy_spec = mitmproxy.net.server_spec.parse(parse_proxy, "http") if parse_proxy else None 67 | self._no_condition_via = proxy_spec 68 | self._via = proxy_spec 69 | 70 | 71 | addons = [ 72 | Addon() 73 | ] 74 | -------------------------------------------------------------------------------- /utils/gost-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check Root User 4 | 5 | # If you want to run as another user, please modify $EUID to be owned by this user 6 | if [[ "$EUID" -ne '0' ]]; then 7 | echo "$(tput setaf 1)Error: You must run this script as root!$(tput sgr0)" 8 | exit 1 9 | fi 10 | 11 | # Set the desired GitHub repository 12 | repo="go-gost/gost" 13 | base_url="https://api.github.com/repos/$repo/releases" 14 | 15 | # Function to download and install gost 16 | install_gost() { 17 | version=$1 18 | # Detect the operating system 19 | if [[ "$(uname)" == "Linux" ]]; then 20 | os="linux" 21 | elif [[ "$(uname)" == "Darwin" ]]; then 22 | os="darwin" 23 | elif [[ "$(uname)" == "MINGW"* ]]; then 24 | os="windows" 25 | else 26 | echo "Unsupported operating system." 27 | exit 1 28 | fi 29 | 30 | # Detect the CPU architecture 31 | arch=$(uname -m) 32 | case $arch in 33 | x86_64) 34 | cpu_arch="amd64" 35 | ;; 36 | armv5*) 37 | cpu_arch="armv5" 38 | ;; 39 | armv6*) 40 | cpu_arch="armv6" 41 | ;; 42 | armv7*) 43 | cpu_arch="armv7" 44 | ;; 45 | aarch64) 46 | cpu_arch="arm64" 47 | ;; 48 | i686) 49 | cpu_arch="386" 50 | ;; 51 | mips64*) 52 | cpu_arch="mips64" 53 | ;; 54 | mips*) 55 | cpu_arch="mips" 56 | ;; 57 | mipsel*) 58 | cpu_arch="mipsle" 59 | ;; 60 | *) 61 | echo "Unsupported CPU architecture." 62 | exit 1 63 | ;; 64 | esac 65 | get_download_url="$base_url/tags/$version" 66 | download_url=$(curl -s "$get_download_url" | grep -Eo "\"browser_download_url\": \".*${os}.*${cpu_arch}.*\"" | awk -F'["]' '{print $4}' | head -n1) 67 | 68 | # Download the binary 69 | echo "Downloading gost version $version... (by $download_url)" 70 | curl -fsSL -o gost.tar.gz "$download_url" 71 | 72 | # Extract and install the binary 73 | echo "Installing gost..." 74 | tar -xzf gost.tar.gz 75 | chmod +x gost 76 | mv gost /usr/local/bin/gost || ( echo "gost not found after install" >&2 ; exit 1 ; ) 77 | 78 | echo "gost installation completed!" 79 | } 80 | 81 | # Retrieve available versions from GitHub API 82 | versions=$(curl -s "$base_url" | grep -oP 'tag_name": "\K[^"]+') 83 | 84 | # Check if --install option provided 85 | if [[ "$1" == "--install" ]]; then 86 | # Install the latest version automatically 87 | latest_version=$(echo "$versions" | head -n 1) 88 | install_gost $latest_version 89 | else 90 | # Display available versions to the user 91 | echo "Available gost versions:" 92 | select version in $versions; do 93 | if [[ -n $version ]]; then 94 | install_gost $version 95 | break 96 | else 97 | echo "Invalid choice! Please select a valid option." 98 | fi 99 | done 100 | fi 101 | -------------------------------------------------------------------------------- /utils/linux_chrome_deb_repo_installer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # should be runned under root 4 | 5 | INSTALL_ROOT="$1" 6 | CHROME_VERSION="$2" 7 | 8 | mkdir -p "$INSTALL_ROOT" 9 | 10 | curl "https://dl.google.com/linux/linux_signing_key.pub" 2>/dev/null | tee /etc/apt/trusted.gpg.d/google.asc >/dev/null 11 | 12 | touch /etc/apt/sources.list.d/chrome-find-repos.list 13 | echo 'deb [arch=amd64] https://dl.google.com/linux/chrome/deb/ stable main' >>/etc/apt/sources.list.d/chrome-find-repos.list 14 | 15 | apt update -y --no-install-recommends >/dev/null 2>&1 16 | mkdir -p "$INSTALL_ROOT" 17 | 18 | apt list --all-versions 2>/dev/null | grep -E '^(google-chrome-|chromium/)' | \ 19 | tr '\t' ' ' >/tmp/available_all_chrome_versions 20 | 21 | ARCH_SYNONYMS="$(arch)" 22 | if [ "$ARCH_SYNONYMS" = "aarch64" -o "$ARCH_SYNONYMS" = "arm64" ] ; then 23 | ARCH_SYNONYMS="aarch64|arm64" 24 | elif [ "$ARCH_SYNONYMS" = "armv7l" -o "$ARCH_SYNONYMS" = "armhf" ] ; then 25 | ARCH_SYNONYMS="armv7l|armhf" 26 | fi 27 | 28 | cat /tmp/available_all_chrome_versions | awk -F' ' '{if($3 ~ /^'"$ARCH_SYNONYMS"'$/){print $0}}' \ 29 | >/tmp/available_platform_chrome_versions 30 | 31 | FOUND_VERSION=$(cat /tmp/available_platform_chrome_versions | 32 | awk '{ if ($2 ~ /^'"$(echo "$CHROME_VERSION" | sed 's/[.]/\\\./')"'/) {print $1" "$2} }' | 33 | sed -r 's|(^[^ ]+)/[^ ]+ (.*)$|\1 \2|' | head -n1 | tr ' ' '=') 34 | 35 | if [ "$FOUND_VERSION" = "" ] ; then 36 | echo "Can't find chrome of required version: $CHROME_VERSION , all available versions (for all platforms):" >&2 37 | cat /tmp/available_all_chrome_versions >&2 38 | echo "Version available for your platform ($(arch)):" >&2 39 | cat /tmp/available_platform_chrome_versions >&2 40 | exit 1 41 | fi 42 | 43 | echo "To install package: $FOUND_VERSION" 44 | 45 | apt remove -y "$(echo "$FOUND_VERSION" | awk -F= '{print $1}')" >/dev/null 2>&1 46 | 47 | rm -rf /tmp/chrome_download >/dev/null 48 | mkdir /tmp/chrome_download 49 | pushd /tmp/chrome_download >/dev/null 2>&1 50 | 51 | apt download "$FOUND_VERSION" >/tmp/chrome_install.err 2>&1 || ( 52 | echo "Chrome install failed:" >&2 ; cat /tmp/chrome_install.err >&2 ; 53 | echo "Available versions: " >&2 ; cat /tmp/available_platform_chrome_versions >&2 ; 54 | exit 1 ; 55 | ) || exit 1 56 | 57 | CHROME_DEPS=$(find . -type f -exec apt-cache depends {} \; | \ 58 | sed -r 's/^<(.*)>$/\1/' | sort -u | grep -E '^chromium-common') 59 | 60 | if [ "$CHROME_DEPS" != "" ] ; then 61 | DEP_VERSION=$(echo "$FOUND_VERSION" | awk -F'=' '{print $2}') 62 | if [ "$DEP_VERSION" != "" ] ; then 63 | DEP_VERSION="=$DEP_VERSION" 64 | fi 65 | INSTALL_CHROME_DEPS=$(echo "$CHROME_DEPS" | tr ' ' '\n' | grep -v -E '^$' | sed -r 's/$/'"$DEP_VERSION"'/' | tr '\n' ' ') 66 | echo "To install package deps: $INSTALL_CHROME_DEPS" 67 | apt download $INSTALL_CHROME_DEPS >>/tmp/chrome_install.err 2>&1 || ( 68 | echo "Chrome deps install failed:" >&2 ; cat /tmp/chrome_install.err >&2 ; 69 | echo "Available versions: " >&2 ; cat /tmp/available_platform_chrome_versions >&2 ; 70 | exit 1 ; 71 | ) || exit 1 72 | fi 73 | 74 | find . -type f -exec dpkg-deb -R {} "$INSTALL_ROOT" \; 75 | 76 | popd 77 | -------------------------------------------------------------------------------- /utils/linux_chrome_archive_installer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import shutil 4 | import logging 5 | import json 6 | import zipfile 7 | import argparse 8 | from urllib.request import urlretrieve, urlopen 9 | 10 | 11 | def fetch_package(download_url): 12 | return urlretrieve(download_url)[0] 13 | 14 | 15 | def unzip_package( 16 | fp, extract_root='/', unzip_path='/tmp/unzip_chrome', 17 | extract_sub_directory='' 18 | ): 19 | try: 20 | os.unlink(unzip_path) 21 | except (FileNotFoundError, OSError): 22 | pass 23 | 24 | os.makedirs(unzip_path, mode=0o755, exist_ok=True) 25 | 26 | with zipfile.ZipFile(fp, mode="r") as zf: 27 | zf.extractall(unzip_path) 28 | 29 | shutil.copytree( 30 | os.path.join(unzip_path, extract_sub_directory), extract_root, 31 | dirs_exist_ok=True) 32 | shutil.rmtree(unzip_path) 33 | 34 | 35 | def download_and_install(version_prefix = None, install_root = None, arch = 'x86_64'): 36 | # Script can install chrome only on linux platforms and only on x86_64. 37 | # here no archive of versions for linux/arm64 38 | if arch == 'x86_64': 39 | target_platform = "linux64" 40 | else: 41 | raise Exception("Unknown or unsupported platform: " + str(arch)) 42 | 43 | chrome_download_url = None 44 | with urlopen( 45 | "https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json" 46 | ) as conn: 47 | response = conn.read().decode() 48 | response_json = json.loads(response) 49 | 50 | # If version is undefined: use max_version 51 | if version_prefix == '': 52 | version_prefix = None 53 | 54 | for version_obj in response_json['versions']: 55 | if ('version' in version_obj and 'downloads' in version_obj and ( 56 | version_prefix is None or version_obj['version'].startswith(version_prefix))): 57 | downloads_obj = version_obj['downloads'] 58 | if ('chrome' in downloads_obj): 59 | local_chrome_download_url = None 60 | 61 | for platform_obj in downloads_obj['chrome']: 62 | if platform_obj['platform'] == target_platform: 63 | local_chrome_download_url = platform_obj['url'] 64 | 65 | if local_chrome_download_url is not None: 66 | chrome_download_url = local_chrome_download_url 67 | if version_prefix is not None: 68 | break 69 | 70 | if chrome_download_url is None: 71 | raise Exception("Can't find download urls") 72 | 73 | print("Download chrome by url: " + str(chrome_download_url), flush=True) 74 | extract_root = install_root if install_root is not None else '/usr/bin/' 75 | unzip_package( 76 | fetch_package(chrome_download_url), extract_root=extract_root, 77 | extract_sub_directory=('chrome-' + target_platform)) 78 | 79 | os.chmod(os.path.join(extract_root, 'chrome'), 0o755) 80 | os.chmod(os.path.join(extract_root, 'chrome-wrapper'), 0o755) 81 | os.chmod(os.path.join(extract_root, 'chrome_crashpad_handler'), 0o755) 82 | os.chmod(os.path.join(extract_root, 'chrome_sandbox'), 0o755) 83 | 84 | os.system( 85 | "sed -i 's/Google Chrome for Testing/Google Chrome\\x00for Testing/' " + 86 | str(extract_root) + "/chrome") 87 | return True 88 | 89 | 90 | if __name__ == "__main__": 91 | parser = argparse.ArgumentParser(description='linux_chrome_installer.') 92 | parser.add_argument("-v", "--version-prefix", type=str, default='120.') 93 | parser.add_argument("-i", "--install-root", type=str, default='/usr/bin') 94 | parser.add_argument("--arch", type=str, default='x86_64') 95 | args = parser.parse_args() 96 | 97 | try: 98 | res = download_and_install( 99 | version_prefix = args.version_prefix, 100 | install_root = args.install_root, 101 | arch = args.arch 102 | ) 103 | except Exception as e: 104 | logging.error("Can't install chrome: " + str(e)) 105 | sys.exit(1) 106 | -------------------------------------------------------------------------------- /src/yandex_captcha_puzzle_solver/proxy_controller.py: -------------------------------------------------------------------------------- 1 | import typing 2 | import threading 3 | import subprocess 4 | import socket 5 | import logging 6 | import contextlib 7 | import oslex 8 | import jinja2 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class ProxyController(object): 14 | _proxy_cmd_template: jinja2.Template 15 | _lock: threading.Lock 16 | _proxies_by_url: typing.Dict[str, object] # -> ProxyHolder 17 | _proxies_by_port: typing.Dict[int, object] # -> ProxyHolder 18 | 19 | class PortBusy(Exception): 20 | pass 21 | 22 | class NoPortForListen(Exception): 23 | pass 24 | 25 | class ProxyHolder(object): 26 | _proxy_storage: object # ProxyController 27 | _local_port: int 28 | _url: str 29 | _ref_count: int = 0 30 | _start_wait: threading.Lock 31 | _started: bool = False 32 | _process = None 33 | 34 | # [start_port .. end_port]: localy started proxies will use ports in this interval 35 | def __init__(self, proxy_storage: object, local_port: int, url: str): 36 | self._proxy_storage = proxy_storage 37 | self._start_wait = threading.Lock() 38 | self._local_port = local_port 39 | self._url = url 40 | 41 | def add_ref(self): 42 | # wait start if it in progress 43 | with self._start_wait: 44 | if not self._started: 45 | self._proxy_storage._start_proxy(self) 46 | self._started = True 47 | self._ref_count += 1 48 | 49 | def remove_ref(self): 50 | self._ref_count -= 1 51 | if self._ref_count == 0: 52 | self._proxy_storage._close_proxy(self) 53 | 54 | class ProxyHolderRef(object): 55 | _proxy_holder: object # ProxyController.ProxyHolder 56 | 57 | def __init__(self, proxy_holder: object): 58 | self._proxy_holder = proxy_holder 59 | self._proxy_holder.add_ref() 60 | 61 | def local_port(self): 62 | return self._proxy_holder._local_port 63 | 64 | def url(self): 65 | return self._proxy_holder._url 66 | 67 | def is_alive(self): 68 | return self._proxy_holder._process is not None 69 | 70 | def release(self): 71 | if self._proxy_holder: 72 | self._proxy_holder.remove_ref() 73 | self._proxy_holder = None 74 | 75 | def __enter__(self): 76 | return self 77 | 78 | def __exit__(self, type, value, traceback): 79 | self.release() 80 | return False 81 | 82 | def __del__(self): 83 | self.release() 84 | 85 | def __init__( 86 | self, 87 | start_port=10000, 88 | end_port=20000, 89 | command="gost -L=socks5://127.0.0.1:{{LOCAL_PORT}} -F='{{UPSTREAM_URL}}'" 90 | ): 91 | self._proxy_cmd_template = jinja2.Environment().from_string(command) 92 | self._lock = threading.Lock() 93 | self._proxies_by_url = {} 94 | self._proxies_by_port = {} 95 | self._start_port = start_port 96 | self._end_port = end_port 97 | 98 | def get_proxy(self, url): 99 | new_proxy_holder: ProxyController.ProxyHolder = None 100 | 101 | with self._lock: 102 | if url in self._proxies_by_url: 103 | return ProxyController.ProxyHolderRef(self._proxies_by_url[url]) 104 | new_proxy_holder_port = self._choose_port(url) 105 | new_proxy_holder = ProxyController.ProxyHolder(self, new_proxy_holder_port, url) 106 | self._proxies_by_url[url] = new_proxy_holder 107 | self._proxies_by_port[new_proxy_holder_port] = new_proxy_holder 108 | 109 | return ProxyController.ProxyHolderRef(new_proxy_holder) 110 | # < Start/wait start or simple increase ref. 111 | 112 | def opened_proxies_count(self): 113 | return len(self._proxies_by_port) 114 | 115 | @staticmethod 116 | def _port_is_listen(port): 117 | with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: 118 | try: 119 | result = sock.connect_ex(("127.0.0.1", port)) 120 | return result == 0 121 | except socket.gaierror: 122 | return False 123 | 124 | def _choose_port(self, url): 125 | base_port_offset = hash(url) % (self._end_port - self._start_port + 1) 126 | for port_offset in range(self._end_port - self._start_port + 1): 127 | check_port = self._start_port + (base_port_offset + port_offset) % ( 128 | self._end_port - self._start_port + 1) 129 | if check_port in self._proxies_by_port: 130 | continue 131 | if ProxyController._port_is_listen(check_port): 132 | raise ProxyController.PortBusy( 133 | "Port " + str(check_port) + " dedicated for proxy usage is busy.") 134 | return check_port 135 | raise ProxyController.NoPortForListen() 136 | 137 | def _start_proxy(self, proxy_holder): 138 | # Start proxy process 139 | proxy_cmd = self._proxy_cmd_template.render({ 140 | 'LOCAL_PORT': str(proxy_holder._local_port), 141 | 'UPSTREAM_URL': proxy_holder._url}) 142 | logger.info("Start with: " + str(proxy_cmd)) 143 | proxy_holder._process = subprocess.Popen( 144 | oslex.split(proxy_cmd), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) 145 | 146 | def _close_proxy(self, proxy_holder): 147 | # Close proxy process 148 | with self._lock: 149 | del self._proxies_by_url[proxy_holder._url] 150 | del self._proxies_by_port[proxy_holder._local_port] 151 | if proxy_holder._process: 152 | logger.info("Close proxy for: " + str(proxy_holder._url)) 153 | proxy_holder._process.kill() 154 | proxy_holder._process.wait() 155 | proxy_holder._process = None 156 | -------------------------------------------------------------------------------- /docker/rootfs/opt/yandex_captcha_puzzle_solver/bin/YandexCaptchaPuzzleSolverRun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | chrome_diagnostic() { 4 | rm -rf /tmp/chrome_testing_run/ 5 | mkdir -p /tmp/chrome_testing_run/user_data 6 | XVFB_OUTPUT_FILE="/tmp/chrome_testing_run/xvfb.out" 7 | CHROME_OUTPUT_FILE="/tmp/chrome_testing_run/chrome.out" 8 | SCREENSHOT_FILE="/tmp/chrome_testing_run/screenshot.png" 9 | USER_DATA_DIR="/tmp/chrome_testing_run/user_data" 10 | 11 | Xvfb :99999 >"$XVFB_OUTPUT_FILE" 2>&1 & 12 | XVFB_PID=$! 13 | 14 | sleep 1 15 | if ! ps -p "$XVFB_PID" > /dev/null; then 16 | echo "Chrome diagnostic failed (Xvfb running)" >&2 17 | cat "$XVFB_OUTPUT_FILE" >&2 18 | return 1 19 | fi 20 | 21 | "$CHROME_BIN" '--remote-allow-origins=*' \ 22 | --no-first-run \ 23 | --no-service-autorun \ 24 | --no-default-browser-check \ 25 | --homepage=about:blank \ 26 | --no-pings \ 27 | --password-store=basic \ 28 | --disable-infobars \ 29 | --disable-breakpad \ 30 | --disable-component-update \ 31 | --disable-backgrounding-occluded-windows \ 32 | --disable-renderer-backgrounding \ 33 | --disable-background-networking \ 34 | --disable-dev-shm-usage \ 35 | --disable-features=IsolateOrigins,site-per-process \ 36 | --disable-session-crashed-bubble \ 37 | --disable-search-engine-choice-screen \ 38 | --user-data-dir=/tmp/chrome_testing_run/ \ 39 | --disable-features=IsolateOrigins,site-per-process \ 40 | --disable-session-crashed-bubble \ 41 | --no-sandbox \ 42 | --remote-debugging-host=127.0.0.1 \ 43 | --remote-debugging-port=44444 \ 44 | --user-data-dir="$USER_DATA_DIR" \ 45 | --timeout=60 \ 46 | --window-size=1920,1200 \ 47 | --headless \ 48 | --screenshot="$SCREENSHOT_FILE" \ 49 | "https://www.google.com" \ 50 | >"$CHROME_OUTPUT_FILE" 2>&1 & 51 | CHROME_PID=$! 52 | 53 | START_TIME=$(date +%s) 54 | WAIT_TIMEOUT=30 55 | EXIT_CODE=1 56 | 57 | while true 58 | do 59 | CUR_TIME=$(date +%s) 60 | if [[ $((CUR_TIME - START_TIME)) -gt "$WAIT_TIMEOUT" ]]; then 61 | break 62 | fi 63 | if ! ps -p "$CHROME_PID" > /dev/null; then 64 | break 65 | fi 66 | if [ -f "$SCREENSHOT_FILE" ]; then 67 | EXIT_CODE=0 68 | break 69 | fi 70 | sleep 1 71 | done 72 | 73 | if [ -f "$SCREENSHOT_FILE" ]; then 74 | EXIT_CODE=0 75 | fi 76 | 77 | if [[ $EXIT_CODE == 0 ]] 78 | then 79 | echo "Chrome diagnostic success" 80 | else 81 | echo "Chrome diagnostic failed (chrome running)" >&2 82 | cat "$CHROME_OUTPUT_FILE" >&2 83 | fi 84 | 85 | kill "$CHROME_PID" 2>/dev/null 86 | wait "$CHROME_PID" 87 | 88 | kill "$XVFB_PID" 2>/dev/null 89 | wait "$XVFB_PID" 90 | 91 | return $EXIT_CODE 92 | } 93 | 94 | set -o pipefail 95 | 96 | CURRENT_UID=$(id -u) 97 | CURRENT_GID=$(id -g) 98 | 99 | export IN_DOCKER=true 100 | export WORKSPACE_ROOT=/opt/yandex_captcha_puzzle_solver/var/ 101 | export PYTHONPATH=$PYTHONPATH:/opt/yandex_captcha_puzzle_solver/lib/ 102 | CHROME_BIN=$(which chrome || which chromium) 103 | 104 | if [ "$CHROME_BIN" = "" ] ; then 105 | echo "Can't find chrome executable" >&2 106 | exit 1 107 | fi 108 | 109 | sudo -n find "$WORKSPACE_ROOT" -exec chown "$CURRENT_UID:$CURRENT_GID" {} \; 110 | mkdir -p "$WORKSPACE_ROOT/log" 111 | 112 | # Non critical - simple make chrome happy and disable some its errors. 113 | # Start dbus for exclude chrome errors: 114 | # Failed to connect to the bus: Failed to connect to socket /run/dbus/system_bus_socket: No such file or directory 115 | # Failed to connect to the bus: Could not parse server address: Unknown address type 116 | XDG_RUNTIME_DIR=/run/xdg/ 117 | sudo bash -c " 118 | sudo service dbus start 119 | mkdir -p '$XDG_RUNTIME_DIR' 120 | chmod 700 '$XDG_RUNTIME_DIR' 121 | chown '$(id -un):$(id -gn)' '$XDG_RUNTIME_DIR'" 122 | DBUS_SESSION_BUS_ADDRESS="unix:path=$XDG_RUNTIME_DIR/bus" 123 | dbus-daemon --session --address="$DBUS_SESSION_BUS_ADDRESS" --nofork --nopidfile --syslog-only & 124 | 125 | # Run diagnostic if required 126 | if [ "$CHECK_SYSTEM" = true ] ; then 127 | chrome_diagnostic || exit 1 128 | fi 129 | 130 | # Start grounding server - web server, that will fill fake captcha form. 131 | python3 /opt/yandex_captcha_puzzle_solver/bin/grounding_server/grounding_server.py \ 132 | --port=9001 \ 133 | --page-template=/opt/yandex_captcha_puzzle_solver/etc/html_templates/index.html.j2 \ 134 | --form-page-template=/opt/yandex_captcha_puzzle_solver/etc/html_templates/form.html.j2 \ 135 | >"$WORKSPACE_ROOT/log/grounding_server.log" 2>&1 & 136 | 137 | # Up default proxy, that will be used for solve without proxy defined in request. 138 | bash /opt/yandex_captcha_puzzle_solver/bin/YandexCaptchaPuzzleSolverProxyRun.sh \ 139 | 10000 "http://localhost:9001" "" "$WORKSPACE_ROOT/log/" \ 140 | >"$WORKSPACE_ROOT/log/yandex_proxy_run.out" 2>&1 & 141 | 142 | # Run service 143 | ADD_PARAMS="" 144 | if [ "$CHROME_DISABLE_GPU" = true ] ; then 145 | ADD_PARAMS="$ADD_PARAMS --disable-gpu" 146 | fi 147 | 148 | if [ "$VERBOSE" = true ] ; then 149 | ADD_PARAMS="$ADD_PARAMS --verbose" 150 | fi 151 | 152 | if [ "$DEBUG" = true ] ; then 153 | mkdir -p "$WORKSPACE_ROOT/debug" 154 | ADD_PARAMS="$ADD_PARAMS --debug-dir=$WORKSPACE_ROOT/debug" 155 | fi 156 | 157 | echo "Run server $(pip show yandex-captcha-puzzle-solver | grep Version | awk '{print $2}' 158 | ), chrome: $("$CHROME_BIN" --version)" 159 | 160 | yandex_captcha_puzzle_solve_server \ 161 | -b 0.0.0.0:8080 \ 162 | --proxy http://127.0.0.1:10000 \ 163 | --proxy-listen-start-port 10001 \ 164 | --proxy-listen-end-port 20000 \ 165 | --proxy-command 'bash /opt/yandex_captcha_puzzle_solver/bin/YandexCaptchaPuzzleSolverProxyRun.sh {{LOCAL_PORT}} "http://localhost:9001" "{{UPSTREAM_URL}}"' \ 166 | $ADD_PARAMS \ 167 | 2>&1 | \ 168 | tee "$WORKSPACE_ROOT/log/yandex_captcha_puzzle_solver.log" 169 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTHON_VERSION=3.11 2 | 3 | FROM python:${PYTHON_VERSION}-slim-bookworm AS builder 4 | 5 | ARG CHROME_VERSION="" 6 | 7 | WORKDIR /app/ 8 | 9 | ENV PACKAGES_DIR=/packages 10 | 11 | # Build dummy packages to skip installing them and their dependencies 12 | RUN mkdir -p "${PACKAGES_DIR}" \ 13 | && apt-get update \ 14 | && apt-get install -y --no-install-recommends equivs \ 15 | && equivs-control libgl1-mesa-dri \ 16 | && printf 'Section: misc\nPriority: optional\nStandards-Version: 3.9.2\nPackage: libgl1-mesa-dri\nVersion: 99.0.0\nDescription: Dummy package for libgl1-mesa-dri\n' >> libgl1-mesa-dri \ 17 | && equivs-build libgl1-mesa-dri \ 18 | && mv libgl1-mesa-dri_*.deb ${PACKAGES_DIR}/libgl1-mesa-dri.deb \ 19 | && equivs-control adwaita-icon-theme \ 20 | && printf 'Section: misc\nPriority: optional\nStandards-Version: 3.9.2\nPackage: adwaita-icon-theme\nVersion: 99.0.0\nDescription: Dummy package for adwaita-icon-theme\n' >> adwaita-icon-theme \ 21 | && equivs-build adwaita-icon-theme \ 22 | && mv adwaita-icon-theme_*.deb ${PACKAGES_DIR}/adwaita-icon-theme.deb 23 | 24 | # Install gost proxy (for process requests with proxy, that require authorization) 25 | RUN apt-get install -y --no-install-recommends curl # gost-install.sh requirement 26 | COPY utils/gost-install.sh ./gost-install.sh 27 | RUN chmod +x ./gost-install.sh && bash -c "./gost-install.sh --install" 28 | 29 | COPY utils/linux_chrome_archive_installer.py ./linux_chrome_archive_installer.py 30 | COPY utils/linux_chrome_deb_repo_installer.sh ./linux_chrome_deb_repo_installer.sh 31 | 32 | # If CHROME_VERSION ins't defined obviously use tested version by platform. 33 | RUN if [ "$CHROME_VERSION" = "" ] ; then \ 34 | BUILD_ARCH="$(arch)" ; \ 35 | if [ "$BUILD_ARCH" = "arm64" ] ; then echo 'CHROME_VERSION="120."' >>/tmp/build.env ; \ 36 | elif [ "$BUILD_ARCH" = "aarch64" -o "$BUILD_ARCH" = "armv7l" ] ; then echo 'CHROME_VERSION="130."' >>/tmp/build.env ; \ 37 | else echo 'CHROME_VERSION="131."' >>/tmp/build.env ; \ 38 | fi ; \ 39 | else echo 'CHROME_VERSION="'"$CHROME_VERSION"'"' >>/tmp/build.env ; \ 40 | fi 41 | 42 | # We prefer version from archive, because it is more productive (faster start), 43 | # but for ARM's here no available versions in archive 44 | RUN . /tmp/build.env ; if [ "$(arch)" != "x86_64" ] ; then \ 45 | echo "To install chrome($CHROME_VERSION) from google repository (no archive versions for ARM)" ; \ 46 | chmod +x ./linux_chrome_deb_repo_installer.sh ; \ 47 | bash -c "./linux_chrome_deb_repo_installer.sh /opt/yandex_captcha_puzzle_solver/installed_chrome/ '$CHROME_VERSION'" || \ 48 | { echo "Can't install chrome (required version '$CHROME_VERSION')" >&2 ; exit 1 ; } ; \ 49 | else \ 50 | echo "To install chrome($CHROME_VERSION) from archive" ; \ 51 | mkdir -p /opt/yandex_captcha_puzzle_solver/installed_chrome/usr/bin/ ; \ 52 | python3 ./linux_chrome_archive_installer.py \ 53 | --version-prefix="$CHROME_VERSION" \ 54 | --install-root=/opt/yandex_captcha_puzzle_solver/installed_chrome/usr/bin/ \ 55 | --arch=$(arch) || \ 56 | { echo "Can't install chrome (required version '$CHROME_VERSION')" >&2 ; exit 1 ; } ; \ 57 | fi 58 | 59 | 60 | FROM python:${PYTHON_VERSION}-slim-bookworm 61 | 62 | ARG UID=1111 63 | ARG GID=0 64 | ARG UNAME=yandex_captcha_puzzle_solver 65 | ARG CHECK_SYSTEM=false 66 | ARG CHROME_DISABLE_GPU=false 67 | 68 | ENV PACKAGES_DIR=/packages 69 | ENV CHECK_SYSTEM=${CHECK_SYSTEM} 70 | ENV CHROME_DISABLE_GPU=${CHROME_DISABLE_GPU} 71 | ENV DEBUG=false 72 | ENV VERBOSE=false 73 | ENV PYTHONPATH=/usr/lib/python3/dist-packages/ 74 | 75 | # Copy dummy packages 76 | COPY --from=builder ${PACKAGES_DIR} ${PACKAGES_DIR} 77 | COPY --from=builder /usr/local/bin/gost /usr/local/bin/gost 78 | 79 | # Copy installed chrome 80 | COPY --from=builder /opt/yandex_captcha_puzzle_solver/installed_chrome / 81 | 82 | # Install dependencies and create user 83 | # You can test Chromium running this command inside the container: 84 | # xvfb-run -s "-screen 0 1600x1200x24" chromium --no-sandbox 85 | # The error traces is like this: "*** stack smashing detected ***: terminated" 86 | # To check the package versions available you can use this command: 87 | # apt-cache madison chromium 88 | 89 | # Install dummy packages 90 | RUN dpkg -i ${PACKAGES_DIR}/*.deb \ 91 | # Install dependencies 92 | && apt-get update \ 93 | && apt-get install -y --no-install-recommends \ 94 | $(apt-cache depends chromium | grep Depends | sed "s/.*ends:\ //" | grep -v -E '^<.*>$' | tr '\n' ' ') \ 95 | && apt-get install -y --no-install-recommends \ 96 | xvfb dumb-init procps curl vim xauth sudo git \ 97 | # Remove temporary files and hardware decoding libraries 98 | && rm -rf /var/lib/apt/lists/* \ 99 | && find /usr/lib/ -type f -name 'libmfxhw*' -delete \ 100 | && find /usr/lib/ -type d -name mfx -exec rm -rf {} \; \ 101 | && mkdir -p /app/bin/ 102 | 103 | RUN mkdir -p "/app/.config/chromium/Crash Reports/pending" 104 | 105 | RUN if [ "$UID" -ne 0 ] ; then echo '%sudo ALL=(ALL:ALL) NOPASSWD:ALL' >/etc/sudoers.d/nopasswd \ 106 | && adduser --disabled-password --gecos '' --uid "${UID}" --gid "${GID}" --shell /bin/bash ${UNAME} \ 107 | && adduser ${UNAME} sudo \ 108 | && chown -R ${UNAME} /app/ \ 109 | && mkdir -p /opt/yandex_captcha_puzzle_solver/var/ \ 110 | && chown -R ${UNAME} /opt/yandex_captcha_puzzle_solver/var/ ; \ 111 | fi 112 | 113 | WORKDIR /app 114 | 115 | RUN apt-get update && \ 116 | apt install -y --no-install-recommends python3-opencv python3-numpy python3-cffi 117 | 118 | COPY . yandex_captcha_puzzle_solver 119 | RUN ADDITIONAL_PYTHONPATH="$PYTHONPATH" pip install --prefer-binary yandex_captcha_puzzle_solver/ 120 | 121 | COPY src/grounding_server /opt/yandex_captcha_puzzle_solver/bin/grounding_server 122 | COPY src/mitm_addons /opt/yandex_captcha_puzzle_solver/lib/mitm_addons 123 | COPY docker/rootfs / 124 | COPY docker/requirements.txt /app/ 125 | RUN pip install -r /app/requirements.txt 126 | 127 | USER ${UID} 128 | 129 | # dumb-init avoids zombie chromium processes 130 | ENTRYPOINT ["/usr/bin/dumb-init", "--"] 131 | CMD ["/bin/bash", "-c", "/opt/yandex_captcha_puzzle_solver/bin/YandexCaptchaPuzzleSolverRun.sh"] 132 | -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- 1 | name: Docker 2 | 3 | # This workflow uses actions that are not certified by GitHub. 4 | # They are provided by a third-party and are governed by 5 | # separate terms of service, privacy policy, and support 6 | # documentation. 7 | 8 | on: 9 | push: 10 | branches: [ "main" ] 11 | # Publish semver tags as releases. 12 | tags: [ 'v*.*.*' ] 13 | workflow_dispatch: 14 | 15 | env: 16 | # Use docker.io for Docker Hub if empty 17 | REGISTRY: ghcr.io 18 | # github.repository as / 19 | IMAGE_NAME: ${{ github.repository }} 20 | 21 | 22 | jobs: 23 | build: 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | include: 28 | - builder: ubuntu-latest 29 | platform: linux/x86_64 30 | - builder: ubuntu-latest 31 | platform: linux/amd64 32 | - builder: ubuntu-latest 33 | platform: linux/arm64 34 | - builder: ubuntu-latest 35 | platform: linux/aarch64 36 | - builder: ubuntu-latest 37 | platform: linux/arm/v7 38 | - builder: ubuntu-latest 39 | platform: linux/arm/v8 40 | #- builder: ubuntu-latest # No chrome 41 | # platform: linux/arm/v6 42 | #- builder: ubuntu-latest # No gost, no chrome 43 | # platform: linux/s390x 44 | #- builder: ubuntu-latest # No gost, custom chrome 45 | # platform: linux/ppc64le 46 | 47 | runs-on: ${{ matrix.builder }} 48 | 49 | permissions: 50 | contents: read 51 | packages: write 52 | id-token: write 53 | 54 | steps: 55 | - name: Prepare 56 | run: | 57 | platform=${{ matrix.platform }} 58 | echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV 59 | 60 | - name: Checkout repository 61 | uses: actions/checkout@v4 62 | 63 | # Install the cosign tool except on PR 64 | # https://github.com/sigstore/cosign-installer 65 | - name: Install cosign 66 | uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 #v3.5.0 67 | with: 68 | cosign-release: 'v2.2.4' 69 | 70 | - name: Set up QEMU 71 | uses: docker/setup-qemu-action@v3 72 | 73 | # Set up BuildKit Docker container builder to be able to build 74 | # multi-platform images and export cache 75 | # https://github.com/docker/setup-buildx-action 76 | - name: Set up Docker Buildx 77 | uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 78 | 79 | # Login to docker hub for push only digest packages 80 | - name: Login to dockerhub 81 | uses: docker/login-action@v3 82 | with: 83 | registry: ${{ env.REGISTRY }} 84 | username: ${{ github.actor }} 85 | password: ${{ secrets.GITHUB_TOKEN }} 86 | 87 | # Extract metadata (tags, labels) for Docker 88 | # https://github.com/docker/metadata-action 89 | - name: Extract Docker metadata 90 | id: meta 91 | uses: docker/metadata-action@v5 92 | with: 93 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 94 | 95 | - name: Build and push by digest 96 | id: build 97 | uses: docker/build-push-action@v6 98 | with: 99 | context: . 100 | platforms: ${{ matrix.platform }} 101 | labels: ${{ steps.meta.outputs.labels }} 102 | outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true 103 | 104 | - name: Export digest 105 | run: | 106 | mkdir -p /tmp/digests 107 | digest="${{ steps.build.outputs.digest }}" 108 | touch "/tmp/digests/${digest#sha256:}" 109 | 110 | - name: Upload digest 111 | uses: actions/upload-artifact@v4 112 | with: 113 | name: digests-${{ env.PLATFORM_PAIR }} 114 | path: /tmp/digests/* 115 | if-no-files-found: error 116 | retention-days: 1 117 | 118 | # merge images to one multi-platform image 119 | merge: 120 | needs: 121 | - build 122 | 123 | runs-on: ubuntu-latest 124 | 125 | permissions: 126 | contents: read 127 | packages: write 128 | id-token: write 129 | 130 | steps: 131 | - name: Download digests 132 | uses: actions/download-artifact@v4 133 | with: 134 | path: /tmp/digests 135 | pattern: digests-* 136 | merge-multiple: true 137 | 138 | - name: Set up Docker Buildx 139 | uses: docker/setup-buildx-action@v3 140 | 141 | - name: Docker meta 142 | id: meta 143 | uses: docker/metadata-action@v5 144 | with: 145 | images: ${{ env.IMAGE_NAME }} 146 | tags: | 147 | type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} 148 | type=ref,enable=true,priority=600,prefix=,suffix=,event=tag 149 | type=ref,enable=true,priority=600,prefix=,suffix=,event=branch 150 | labels: | 151 | org.opencontainers.image.title=yandex-captcha-puzzle-solver 152 | org.opencontainers.image.description=Yandex Captcha Puzzle Solve Server 153 | org.opencontainers.image.vendor=yuri.kuznecov@gmail.com 154 | 155 | - name: Login to dockerhub 156 | uses: docker/login-action@v3 157 | with: 158 | registry: ${{ env.REGISTRY }} 159 | username: ${{ github.actor }} 160 | password: ${{ secrets.GITHUB_TOKEN }} 161 | 162 | - name: Create manifest list and push 163 | working-directory: /tmp/digests 164 | run: | 165 | docker buildx imagetools create $(jq -cr '.tags | map("-t ${{ env.REGISTRY }}/" + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ 166 | $(printf '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@sha256:%s ' *) 167 | 168 | - name: Inspect image 169 | run: | 170 | set -e 171 | set -o pipefail 172 | echo "${{ steps.meta.outputs.tags }}" | sed -r 's/[ ]+/ /g' | tr ' ' '\n' | sed -r 's|^|${{ env.REGISTRY }}/|' | \ 173 | while read IMAGE_NAME ; do \ 174 | docker buildx imagetools inspect "$IMAGE_NAME" || exit 1 ; \ 175 | done 176 | 177 | # TODO: publish to docker.io 178 | -------------------------------------------------------------------------------- /src/yandex_captcha_puzzle_solver/browser_wrapper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import typing 4 | import traceback 5 | import asyncio 6 | import uuid 7 | import http.cookiejar 8 | import shutil 9 | import logging 10 | import numpy as np 11 | 12 | import cv2 13 | 14 | import zendriver_flare_bypasser as zendriver 15 | 16 | XVFB_DISPLAY = None 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | class Rect(object): 22 | left: int 23 | top: int 24 | width: int 25 | height: int 26 | 27 | 28 | """ 29 | Trivial wrapper for browser (driver). 30 | Allow to localize driver operations implementation and requirements, 31 | and simplify migration to other driver. 32 | """ 33 | 34 | 35 | class BrowserWrapper(object): 36 | _zendriver_driver: zendriver.Browser = None 37 | _page = None 38 | 39 | class FakePosition(object): 40 | center = None 41 | 42 | def __init__(self, center): 43 | self.center = tuple(float(x) for x in center) 44 | 45 | class FakeNode(object): 46 | attributes = None 47 | 48 | class FakeElement(zendriver.Element): 49 | _position = None 50 | 51 | def __init__(self, page: zendriver.Tab, center_coords): 52 | super(BrowserWrapper.FakeElement, self).__init__( 53 | BrowserWrapper.FakeNode(), # zendriver.cdp.dom.Node 54 | page # zendriver.Tab 55 | ) 56 | self._position = BrowserWrapper.FakePosition(center_coords) 57 | 58 | def _make_attrs(self): # override for exclude exception on __init__ 59 | pass 60 | 61 | # overrides for call only cdp click send in zendriver.Element.mouse_click 62 | async def get_position(self): 63 | return self._position 64 | 65 | async def flash(self, duration: typing.Union[float, int] = 0.5): 66 | pass 67 | 68 | def __init__(self, zendriver_driver: zendriver.Browser, user_data_dir: str = None): 69 | self._zendriver_driver = zendriver_driver 70 | self._user_data_dir = user_data_dir 71 | 72 | def __del__(self): 73 | if self._user_data_dir: 74 | shutil.rmtree(self._user_data_dir, ignore_errors=True) 75 | 76 | @staticmethod 77 | def start_xvfb_display(): 78 | if sys.platform != 'win32': 79 | global XVFB_DISPLAY 80 | if XVFB_DISPLAY is None: 81 | from xvfbwrapper import Xvfb 82 | XVFB_DISPLAY = Xvfb() 83 | XVFB_DISPLAY.start() 84 | 85 | @staticmethod 86 | async def create(proxy = None, disable_gpu = False): 87 | user_data_dir = os.path.join("/tmp", str(uuid.uuid4())) # < Each created chrome should be isolated. 88 | BrowserWrapper.start_xvfb_display() 89 | browser_args = [] 90 | if proxy: 91 | browser_args.append("--proxy-server=" + proxy) 92 | if disable_gpu: 93 | browser_args += [ 94 | "--disable-gpu", 95 | "--disable-software-rasterizer" 96 | ] 97 | if sys.platform == 'win32': 98 | browser_args += ["--headless"] 99 | 100 | browser_args += ["--user-data-dir=" + user_data_dir] 101 | browser_args += ["--ignore-certificate-errors", "--ignore-urlfetcher-cert-requests"] 102 | 103 | try: 104 | zendriver_driver = await zendriver.start( 105 | sandbox=False, 106 | browser_args=browser_args 107 | ) 108 | return BrowserWrapper(zendriver_driver, user_data_dir = user_data_dir) 109 | finally: 110 | shutil.rmtree(user_data_dir, ignore_errors=True) 111 | 112 | # Get original driver page impl - can be used only in user command specific implementations 113 | def get_driver(self): 114 | return self._page 115 | 116 | async def size(self): 117 | image = await self.get_screenshot() 118 | image_height, image_width, _ = image.shape 119 | return image_width, image_height 120 | 121 | async def get_outputs(self): 122 | try: 123 | stdout_bytes, stderr_bytes = await self._zendriver_driver.communicate() 124 | return [stdout_bytes, stderr_bytes] 125 | except Exception: 126 | return None 127 | 128 | async def current_url(self): 129 | return self._page.url 130 | 131 | async def close(self): 132 | self._page = None 133 | if self._zendriver_driver: 134 | await self._zendriver_driver.stop() 135 | if self._user_data_dir: 136 | shutil.rmtree(self._user_data_dir, ignore_errors=True) 137 | self._user_data_dir = None 138 | 139 | async def select_text(self, css_selector): 140 | try: 141 | res = await self._page.select(css_selector, timeout=0) 142 | return res.text 143 | except asyncio.TimeoutError: 144 | return None 145 | 146 | async def select_count(self, css_selector): 147 | try: 148 | return len(await self._page.select_all(css_selector, timeout=0)) # Select without waiting. 149 | except asyncio.TimeoutError: 150 | return 0 151 | 152 | async def get(self, url): 153 | # we work only with one page - close all tabs (excluding first - this close browser) 154 | for tab_i, tab in enumerate(self._zendriver_driver.tabs): 155 | if tab_i > 0: 156 | await tab.close() 157 | self._page = await self._zendriver_driver.get(url) 158 | 159 | async def click(self, css_selector): 160 | try: 161 | element = await self._page.select(css_selector, timeout=0) 162 | except asyncio.TimeoutError: 163 | return False 164 | await element.click() 165 | return True 166 | 167 | async def click_coords(self, coords): 168 | # Specific workaround for zendriver 169 | # click by coordinates without no driver patching. 170 | step = "start" 171 | try: 172 | fake_node = BrowserWrapper.FakeElement(self._page, coords) 173 | step = "mouse_click" 174 | await fake_node.mouse_click() 175 | except Exception as e: 176 | print("EXCEPTION on click_coords '" + step + "': " + str(e)) 177 | raise 178 | 179 | async def mouse_down(self): 180 | try: 181 | await self._page.mouse.down() 182 | except Exception as e: 183 | print("EXCEPTION on mouse_down: " + str(e) + ":\n" + traceback.format_exc()) 184 | raise 185 | 186 | async def mouse_up(self): 187 | try: 188 | await self._page.mouse.up() 189 | except Exception as e: 190 | print("EXCEPTION on mouse_up: " + str(e)) 191 | raise 192 | 193 | async def mouse_move(self, coords): 194 | try: 195 | await self._page.mouse.move(coords[0], coords[1]) 196 | except Exception as e: 197 | print("EXCEPTION on mouse_move: " + str(e)) 198 | raise 199 | 200 | async def get_user_agent(self): 201 | return await self._page.evaluate("window.navigator.userAgent") 202 | 203 | async def get_dom(self): 204 | res_dom = await self._page.get_content() 205 | return (res_dom if res_dom is not None else "") # zendriver return None sometimes (on error) 206 | 207 | async def get_element_screenshot(self, css_selector) -> tuple[np.array, Rect]: 208 | # < Return screenshot as cv2 image (numpy array) 209 | tmp_file_path = None 210 | 211 | try: 212 | try: 213 | element = await self._page.select(css_selector, timeout=0) 214 | except asyncio.TimeoutError: 215 | return (None, None) 216 | 217 | if element is None: 218 | return (None, None) 219 | 220 | try: 221 | logger.info("To get position for '" + css_selector + "'") 222 | pos = await element.get_position() # abs=True don't works 223 | finally: 224 | logger.info("From get position for '" + css_selector + "'") 225 | rect = Rect() 226 | rect.left = pos.abs_x 227 | rect.top = pos.abs_y 228 | rect.width = pos.width 229 | rect.height = pos.height 230 | 231 | try: 232 | if tmp_file_path is None: 233 | tmp_file_path = os.path.join("/tmp", str(uuid.uuid4()) + ".jpg") 234 | await element.save_screenshot(tmp_file_path) 235 | return cv2.imread(tmp_file_path), rect 236 | except Exception as e: 237 | # return None for: 238 | # "not finished loading yet" 239 | # "Cannot take screenshot with 0 height." - elements isn't loaded yet. 240 | # "Could not find object with given id" - element dissappeared, js on page changed DOM. 241 | # 242 | msg = str(e).lower() 243 | if ( 244 | "not finished loading yet" not in msg and 245 | "cannot take screenshot " not in msg and 246 | "could not find object" not in msg and 247 | "could not find position" not in msg 248 | ): 249 | raise 250 | return (None, None) 251 | finally: 252 | if tmp_file_path is not None and os.path.exists(tmp_file_path): 253 | os.remove(tmp_file_path) 254 | 255 | async def get_screenshot(self): # Return screenshot as cv2 image (numpy array) 256 | tmp_file_path = None 257 | try: 258 | while True: 259 | try: 260 | tmp_file_path = os.path.join("/tmp", str(uuid.uuid4()) + ".jpg") 261 | await self._page.save_screenshot(tmp_file_path) 262 | return cv2.imread(tmp_file_path) 263 | except zendriver.core.connection.ProtocolException as e: 264 | if "not finished loading yet" not in str(e): 265 | raise 266 | await asyncio.sleep(1) 267 | finally: 268 | if tmp_file_path is not None and os.path.exists(tmp_file_path): 269 | os.remove(tmp_file_path) 270 | 271 | async def save_screenshot(self, image_path): 272 | while True: 273 | try: 274 | await self._page.save_screenshot(image_path) 275 | return 276 | except zendriver.core.connection.ProtocolException as e: 277 | if "not finished loading yet" not in str(e): 278 | raise 279 | await asyncio.sleep(1) 280 | 281 | async def set_cookies(self, cookies: list[dict]): 282 | # convert {"name": "...", "value": "...", ...} to array of http.cookiejar.Cookie 283 | cookie_jar = http.cookiejar.CookieJar() 284 | for c in cookies: 285 | # TO CHECK, that all fields filled correctly. 286 | cookie_jar.set_cookie(http.cookiejar.Cookie( 287 | None, # version 288 | c.get('name', None), 289 | c.get('value', None), 290 | c.get('port', 443), 291 | None, # port_specified 292 | c.get('domain', None), 293 | None, # domain_specified 294 | None, # domain_initial_dot 295 | c.get('path', '/'), 296 | None, # path_specified 297 | c.get('secure', False), 298 | c.get('expires', None), # < here expected float seconds since epoch time. 299 | None, # discard 300 | None, # comment 301 | None, # comment_url 302 | None # rest 303 | )) 304 | await self._zendriver_driver.cookies.set_all(cookie_jar) 305 | 306 | async def get_cookies(self) -> list[dict]: 307 | # return list of dict have format: {"name": "...", "value": "..."} 308 | zendriver_cookies = await self._zendriver_driver.cookies.get_all(requests_cookie_format=True) 309 | res = [] 310 | # convert array of http.cookiejar.Cookie to expected cookie format 311 | for cookie in zendriver_cookies: 312 | res.append({ 313 | "name": cookie.name, 314 | "value": cookie.value, 315 | "port": cookie.port, 316 | "domain": cookie.domain, 317 | "path": cookie.path, 318 | "secure": cookie.secure 319 | }) 320 | return res 321 | -------------------------------------------------------------------------------- /src/yandex_captcha_puzzle_solver/yandex_captcha_puzzle_solve_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import re 4 | import typing 5 | import typing_extensions 6 | import datetime 7 | import copy 8 | import platform 9 | import uuid 10 | import pathlib 11 | import traceback 12 | import logging 13 | import argparse 14 | import urllib3.util 15 | import fastapi 16 | import pydantic 17 | 18 | import yandex_captcha_puzzle_solver 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | USE_GUNICORN = ( 23 | sys.platform not in ['win32', 'cygwin'] and 'YANDEX_SOLVER_USE_UVICORN' not in os.environ 24 | ) 25 | 26 | if USE_GUNICORN: 27 | import gunicorn.app.wsgiapp 28 | else: 29 | import uvicorn.main 30 | 31 | # Remove requirement for Content-Type header presence. 32 | 33 | 34 | class RemoveContentTypeRequirementMiddleware(object): 35 | def __init__(self, app): 36 | self._app = app 37 | 38 | async def __call__(self, scope, receive, send): 39 | headers = scope["headers"] 40 | content_type_found = False 41 | for header_index, header in enumerate(headers): 42 | if not isinstance(header, tuple) or len(header) != 2: 43 | # Unexpected headers format - don't make something. 44 | content_type_found = True 45 | break 46 | if header[0].decode('utf-8').lower() == 'content-type': 47 | headers[header_index] = (b'content-type', b'application/json') 48 | content_type_found = True 49 | break 50 | if not content_type_found: 51 | headers.append((b'content-type', b'application/json')) 52 | 53 | return await self._app(scope, receive, send) 54 | 55 | 56 | server = fastapi.FastAPI( 57 | openapi_url='/docs/openapi.json', 58 | docs_url='/docs', 59 | swagger_ui_parameters={"defaultModelsExpandDepth": -1}, 60 | tags_metadata=[] 61 | ) 62 | 63 | server.add_middleware(RemoveContentTypeRequirementMiddleware) 64 | 65 | PROXY_ANNOTATION = """Proxy in format: ://(:@)?: . 66 | Examples: socks5://1.1.1.1:2000, http://user:password@1.1.1.1:8080. 67 | For yandex solver compatibility allowed format: 68 | {"url": "://:", "username": "", "port": ""} 69 | If you use proxy with authorization and use yandex-captcha-puzzle-solver as package, please, 70 | read instructions - need to install gost.""" 71 | 72 | solver_args = { 73 | 'proxy_controller': None, 74 | 'disable_gpu': False, 75 | 'debug_dir': None 76 | } 77 | 78 | 79 | class ProxyModel(pydantic.BaseModel): 80 | url: str = pydantic.Field(default=None, description='Proxy url') 81 | username: str = pydantic.Field(default=None, description='Proxy authorization username') 82 | password: str = pydantic.Field(default=None, description='Proxy authorization password') 83 | 84 | 85 | class CookieModel(pydantic.BaseModel): 86 | name: str = pydantic.Field(description='Cookie name') 87 | value: str = pydantic.Field(description='Cookie value (empty string if no value)') 88 | domain: str = pydantic.Field(description='Cookie domain') # < Is required - we don't allow super cookies usage. 89 | port: typing.Optional[int] = pydantic.Field(default=None, description='Cookie port') 90 | path: typing.Optional[str] = pydantic.Field(default='/', description='Cookie path') 91 | secure: typing.Optional[bool] = pydantic.Field(default=True, description='Cookie is secure') 92 | expires: typing.Optional[int] = pydantic.Field( 93 | default=None, description='Cookie expire time in seconds after epoch start' 94 | ) 95 | 96 | 97 | class HandleCommandResponseSolution(pydantic.BaseModel): 98 | status: str 99 | url: str 100 | cookies: list[CookieModel] = pydantic.Field(default=[], description='Cookies got after solving') 101 | user_agent: typing.Optional[str] = None 102 | token: typing.Optional[str] = None 103 | 104 | 105 | class HandleCommandResponse(pydantic.BaseModel): 106 | status: str 107 | message: str 108 | startTimestamp: float 109 | endTimestamp: float 110 | solution: typing.Optional[HandleCommandResponseSolution] = None 111 | 112 | 113 | async def process_solve_request( 114 | url: str, 115 | yandex_key: str, 116 | cookies: list[CookieModel] = None, 117 | max_timeout: int = None, # in msec. 118 | proxy: typing.Union[str, ProxyModel] = None, 119 | ): 120 | start_timestamp = datetime.datetime.timestamp(datetime.datetime.now()) 121 | 122 | # Adapt proxy format for canonical representation. 123 | if proxy is not None and not isinstance(proxy, str): 124 | if proxy.url is not None: 125 | parsed_proxy = urllib3.util.parse_url(proxy.url) 126 | proxy = ( 127 | parsed_proxy.scheme + "://" + 128 | ( 129 | proxy.username + ":" + (proxy.password if proxy.password else '') + '@' 130 | if proxy.username else '' 131 | ) + 132 | parsed_proxy.hostname + 133 | (":" + str(parsed_proxy.port) if parsed_proxy.port else '') 134 | ) 135 | else: 136 | proxy = None 137 | 138 | try: 139 | solve_request = yandex_captcha_puzzle_solver.Request() 140 | solve_request.yandex_key = yandex_key 141 | solve_request.url = url 142 | solve_request.cookies = [ 143 | (cookie if isinstance(cookie, dict) else cookie.__dict__) 144 | for cookie in cookies 145 | ] if cookies else [] 146 | solve_request.max_timeout = max_timeout * 1.0 / 1000 147 | solve_request.proxy = proxy 148 | 149 | global solver_args 150 | local_solver_args = copy.copy(solver_args) 151 | if local_solver_args['debug_dir']: 152 | debug_dir = os.path.join(local_solver_args['debug_dir'], str(uuid.uuid4())) 153 | pathlib.Path(debug_dir).mkdir(parents=True, exist_ok=True) 154 | local_solver_args['debug_dir'] = debug_dir 155 | solver = yandex_captcha_puzzle_solver.Solver( 156 | **local_solver_args) 157 | solve_response = await solver.solve(solve_request) 158 | 159 | return HandleCommandResponse( 160 | status="ok", 161 | message=solve_response.message, 162 | startTimestamp=start_timestamp, 163 | endTimestamp=datetime.datetime.timestamp(datetime.datetime.now()), 164 | solution=HandleCommandResponseSolution( 165 | status="ok", 166 | url=solve_response.url, 167 | cookies=[ # Convert cookiejar.Cookie to CookieModel 168 | CookieModel(**cookie) for cookie in solve_response.cookies 169 | ], 170 | # < pass cookies as dict's (solver don't know about rest model). 171 | user_agent=solve_response.user_agent, 172 | message=solve_response.message, 173 | token=solve_response.token 174 | ) 175 | ) 176 | 177 | except Exception as e: 178 | print(str(e)) 179 | print(traceback.format_exc(), flush=True) 180 | return HandleCommandResponse( 181 | status="error", 182 | message="Error: " + str(e), 183 | startTimestamp=start_timestamp, 184 | endTimestamp=datetime.datetime.timestamp(datetime.datetime.now()), 185 | ) 186 | 187 | 188 | # REST API methods. 189 | @server.post( 190 | "/get_token", response_model=HandleCommandResponse, tags=['Standard API'], 191 | response_model_exclude_none=True 192 | ) 193 | async def Get_cookies_after_solve( 194 | url: typing_extensions.Annotated[ 195 | str, 196 | fastapi.Body(description="Url for solve challenge.") 197 | ], 198 | yandex_key: typing_extensions.Annotated[ 199 | str, 200 | fastapi.Body(description="Yandex captcha key") 201 | ], 202 | cookies: typing_extensions.Annotated[ 203 | typing.List[CookieModel], 204 | fastapi.Body(description="Cookies to send.") 205 | ] = None, 206 | maxTimeout: typing_extensions.Annotated[ 207 | float, 208 | fastapi.Body(description="Max processing timeout in ms.") 209 | ] = 60000, 210 | proxy: typing_extensions.Annotated[ 211 | typing.Union[str, ProxyModel], 212 | fastapi.Body(description=PROXY_ANNOTATION) 213 | ] = None, 214 | ): 215 | return await process_solve_request( 216 | url=url, 217 | yandex_key=yandex_key, 218 | cookies=cookies, 219 | max_timeout=maxTimeout, 220 | proxy=proxy, 221 | ) 222 | 223 | 224 | def server_run(): 225 | try: 226 | logging.basicConfig( 227 | format='%(asctime)s [%(name)s] [%(levelname)s]: %(message)s', 228 | handlers=[logging.StreamHandler(sys.stdout)], 229 | level=logging.INFO 230 | ) 231 | 232 | logging.getLogger('urllib3').setLevel(logging.ERROR) 233 | 234 | logger.info( 235 | "Start yandex_captcha_puzzle_server:\n" + 236 | " version: " + str(yandex_captcha_puzzle_solver.__version__) + "\n" + 237 | " python version = " + ".".join([str(x) for x in list(sys.version_info)]) + "\n" + 238 | " os = " + " ".join([platform.system(), platform.release(), platform.version()]) + "\n" + 239 | " docker = " + os.environ.get('IN_DOCKER', "false") + "\n" + 240 | " arch = " + str(platform.machine()) + "\n" + 241 | " processor = " + str(platform.processor()) 242 | ) 243 | 244 | parser = argparse.ArgumentParser( 245 | description='Start yandex captcha puzzle solve server.', 246 | epilog='Other arguments will be passed to gunicorn or uvicorn(win32) as is.') 247 | parser.add_argument("-b", "--bind", type=str, default='127.0.0.1:8000') 248 | # < parse for pass to gunicorn as is and as "--host X --port X" to uvicorn 249 | parser.add_argument( 250 | "--proxy-listen-start-port", type=int, default=10000, 251 | help="""Port interval start, that can be used for up local proxies on request processing""" 252 | ) 253 | parser.add_argument( 254 | "--proxy-listen-end-port", type=int, default=20000, 255 | help="""Port interval end for up local proxies""" 256 | ) 257 | parser.add_argument( 258 | "--proxy-command", type=str, 259 | default=None, 260 | help="""command template (jinja2), that will be used for up proxy for process request 261 | with arguments: LOCAL_PORT, UPSTREAM_URL - proxy passed in request""" 262 | ) 263 | parser.add_argument("--disable-gpu", action='store_true') 264 | parser.add_argument("--verbose", action='store_true') 265 | parser.add_argument( 266 | "--debug-dir", type=str, default=None, 267 | help="""directory for save intermediate DOM dumps and screenshots on solving, 268 | for each request will be created unique directory""" 269 | ) 270 | parser.add_argument("--proxy", type=str) 271 | parser.set_defaults(disable_gpu=False, debug=False) 272 | args, unknown_args = parser.parse_known_args() 273 | try: 274 | host, port = args.bind.split(':') 275 | except Exception: 276 | print("Invalid 'bind' argument value: " + str(args.bind), file=sys.stderr, flush=True) 277 | sys.exit(1) 278 | 279 | if args.verbose: 280 | logging.getLogger('zendriver.core.browser').setLevel(logging.DEBUG) 281 | logging.getLogger('yandex_captcha_puzzle_solver.yandex_captcha_puzzle_solver').setLevel(logging.DEBUG) 282 | logging.getLogger('uc.connection').setLevel(logging.INFO) 283 | 284 | global solver_args 285 | 286 | if args.debug_dir: 287 | logging.getLogger('yandex_captcha_puzzle_solver.yandex_captcha_puzzle_solver').setLevel(logging.DEBUG) 288 | solver_args['debug_dir'] = args.debug_dir 289 | 290 | sys.argv = [re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])] 291 | sys.argv += unknown_args 292 | 293 | solver_args['proxy'] = args.proxy 294 | 295 | # Init ProxyController 296 | solver_args['proxy_controller'] = yandex_captcha_puzzle_solver.proxy_controller.ProxyController( 297 | start_port=args.proxy_listen_start_port, 298 | end_port=args.proxy_listen_end_port, 299 | command=args.proxy_command) 300 | 301 | if args.disable_gpu: 302 | solver_args['disable_gpu'] = True 303 | 304 | if USE_GUNICORN: 305 | sys.argv += ['-b', args.bind] 306 | sys.argv += ['--worker-class', 'uvicorn.workers.UvicornWorker'] 307 | sys.argv += ['yandex_captcha_puzzle_solver:server'] 308 | sys.exit(gunicorn.app.wsgiapp.run()) 309 | else: 310 | sys.argv += ['--host', host] 311 | sys.argv += ['--port', port] 312 | sys.argv += ['yandex_captcha_puzzle_solver:server'] 313 | sys.exit(uvicorn.main.main()) 314 | 315 | except Exception as e: 316 | logging.error(str(e)) 317 | sys.exit(1) 318 | 319 | 320 | if __name__ == '__main__': 321 | server_run() 322 | -------------------------------------------------------------------------------- /src/yandex_captcha_puzzle_solver/image_processor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import typing 4 | import enum 5 | import collections 6 | import math 7 | import numpy as np 8 | 9 | import cv2 10 | 11 | 12 | class ImageProcessor(object): 13 | 14 | """ 15 | Joint representation 16 | """ 17 | class JointSegment(object): 18 | class Type(enum.Enum): 19 | HORIZONTAL = 1 20 | VERTICAL = 2 21 | 22 | start_point: typing.Tuple[int, int] 23 | length: int 24 | type: Type 25 | 26 | def __init__( 27 | self, 28 | start_point: typing.Tuple[int, int] = None, 29 | length: int = None, 30 | type: Type = None 31 | ): 32 | self.start_point = start_point 33 | self.length = length 34 | self.type = type 35 | 36 | # Get rect of modal window (that contains challenge). 37 | @staticmethod 38 | def get_modal_frame_rect( 39 | image, color = (255, 255, 255), 40 | save_steps_dir: str = None, 41 | logger = None 42 | ): 43 | image_height, image_width, _ = image.shape 44 | color_delta = 20 45 | mask = cv2.inRange( 46 | image, 47 | ( 48 | max(color[0] - color_delta, 0), 49 | max(color[1] - color_delta, 0), 50 | max(color[2] - color_delta, 0) 51 | ), 52 | ( 53 | min(color[0] + color_delta, 255), 54 | min(color[1] + color_delta, 255), 55 | min(color[2] + color_delta, 255) 56 | ) 57 | ) 58 | 59 | if save_steps_dir: 60 | cv2.imwrite(os.path.join(save_steps_dir, 'mask.png'), mask) 61 | 62 | contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 63 | res_box = None 64 | for c in contours: 65 | x, y, w, h = cv2.boundingRect(c) 66 | if res_box is None or w * h > res_box[2] * res_box[3]: 67 | res_box = (x, y, w, h) 68 | 69 | if res_box is not None: 70 | compress_width = 10 71 | compress_height = 10 72 | res_box = ( 73 | res_box[0] + int(compress_width / 2), 74 | res_box[1] + int(compress_height / 2), 75 | res_box[2] - compress_width, 76 | res_box[3] - compress_height 77 | ) 78 | if save_steps_dir: 79 | debug_image = image.copy() 80 | debug_image = cv2.rectangle( 81 | debug_image, 82 | (res_box[0], res_box[1]), 83 | (res_box[0] + res_box[2], res_box[1] + res_box[3]), 84 | (0, 0, 255), 85 | 2 86 | ) 87 | cv2.imwrite(os.path.join(save_steps_dir, 'rect.png'), debug_image) 88 | 89 | return res_box 90 | 91 | # Get slider points (for drag from to) 92 | @staticmethod 93 | def get_drag_points(image, logger = None, save_steps_dir: str = None, log_prefix = ''): 94 | image_height, image_width, _ = image.shape 95 | slider_color = (255, 130, 82) # < GBR color of slider. 96 | slider_color_delta = 50 97 | mask = cv2.inRange( 98 | image, 99 | ( 100 | max(slider_color[0] - slider_color_delta, 0), 101 | max(slider_color[1] - slider_color_delta, 0), 102 | max(slider_color[2] - slider_color_delta, 0) 103 | ), 104 | ( 105 | min(slider_color[0] + slider_color_delta, 255), 106 | min(slider_color[1] + slider_color_delta, 255), 107 | min(slider_color[2] + slider_color_delta, 255) 108 | ) 109 | ) 110 | 111 | if save_steps_dir: 112 | cv2.imwrite(os.path.join(save_steps_dir, 'mask.png'), mask) 113 | 114 | broad_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (10, 10)) 115 | mask = cv2.dilate(mask, broad_kernel, iterations = 1) 116 | 117 | if save_steps_dir: 118 | cv2.imwrite(os.path.join(save_steps_dir, 'dilated_mask.png'), mask) 119 | 120 | erode_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (20, 20)) 121 | mask = cv2.erode(mask, erode_kernel, iterations = 1) 122 | 123 | if save_steps_dir: 124 | cv2.imwrite(os.path.join(save_steps_dir, 'eroded_mask.png'), mask) 125 | 126 | points_x, points_y = np.where(mask >= 255) 127 | if len(points_x) == 0: 128 | return None 129 | 130 | down_point_pos = random.randint(0, len(points_x) - 1) 131 | down_point = (points_y[down_point_pos], points_x[down_point_pos]) 132 | up_point_pos = random.randint(0, len(points_x) - 1) 133 | up_point = (image_width - points_y[up_point_pos], points_x[up_point_pos]) 134 | 135 | if save_steps_dir: 136 | debug_image = image.copy() 137 | debug_image = cv2.circle(debug_image, down_point, 5, (0, 0, 255), 2) 138 | debug_image = cv2.circle(debug_image, up_point, 5, (0, 0, 255), 2) 139 | cv2.imwrite(os.path.join(save_steps_dir, 'image_with_points.png'), debug_image) 140 | 141 | return [down_point, up_point] 142 | 143 | # Get puzzle joints for evaluate diff 144 | @staticmethod 145 | def get_puzzle_joints( 146 | image, logger = None, save_steps_dir: str = None, log_prefix = '' 147 | ) -> typing.List[ 148 | typing.Tuple[ # < Pair of segments, that represent puzzle joints. 149 | typing.Tuple[typing.Tuple[int, int], typing.Tuple[int, int]], 150 | typing.Tuple[typing.Tuple[int, int], typing.Tuple[int, int]] 151 | ] 152 | ]: 153 | # Return array of two elements tuples, where element is point 154 | # check horizontal net 155 | puzzle_vertical_separators, puzzle_horizontal_separators = ImageProcessor._determine_separators( 156 | image, save_steps_dir = save_steps_dir 157 | ) 158 | 159 | if puzzle_vertical_separators is None or puzzle_horizontal_separators is None: 160 | return [] 161 | 162 | # construct joints 163 | res_joints: typing.List[typing.Tuple[ImageProcessor.JointSegment, ImageProcessor.JointSegment]] = [] 164 | for h_index, h in enumerate(puzzle_horizontal_separators): # < h is Tuple[Tuple[int, int], Tuple[int, int]] 165 | for v_index, v in enumerate(puzzle_vertical_separators): 166 | vertical_indentation = 2 167 | horizontal_indentation = 2 168 | if h_index < len(puzzle_horizontal_separators) - 1: 169 | # add horizontal joint 170 | j_len = v[1][0] - v[0][1] - 2 * vertical_indentation 171 | res_joints.append( 172 | ( 173 | ImageProcessor.JointSegment( 174 | start_point=(v[0][1] + horizontal_indentation, h[1][0] - vertical_indentation), 175 | length=j_len, 176 | type=ImageProcessor.JointSegment.Type.HORIZONTAL 177 | ), 178 | ImageProcessor.JointSegment( 179 | start_point=(v[0][1] + horizontal_indentation, h[1][1] + vertical_indentation), 180 | length=j_len, 181 | type=ImageProcessor.JointSegment.Type.HORIZONTAL 182 | ), 183 | ) 184 | ) 185 | if v_index < len(puzzle_vertical_separators) - 1: 186 | # add vertical joint 187 | j_len = h[1][0] - h[0][1] - 2 * horizontal_indentation 188 | res_joints.append( 189 | ( 190 | ImageProcessor.JointSegment( 191 | start_point=(v[1][0] - horizontal_indentation, h[0][1] + vertical_indentation), 192 | length=j_len, 193 | type=ImageProcessor.JointSegment.Type.VERTICAL 194 | ), 195 | ImageProcessor.JointSegment( 196 | start_point=(v[1][1] + horizontal_indentation, h[0][1] + vertical_indentation), 197 | length=j_len, 198 | type=ImageProcessor.JointSegment.Type.VERTICAL 199 | ), 200 | ) 201 | ) 202 | 203 | # draw joints 204 | if save_steps_dir: 205 | debug_image = image.copy() 206 | for joint_from, joint_to in res_joints: 207 | cv2.line( 208 | debug_image, 209 | joint_from.start_point, 210 | ( 211 | joint_from.start_point[0] + ( 212 | joint_from.length if joint_from.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0), 213 | joint_from.start_point[1] + ( 214 | joint_from.length if joint_from.type == ImageProcessor.JointSegment.Type.VERTICAL else 0) 215 | ), 216 | (0, 0, 255), 217 | 1 218 | ) 219 | cv2.line( 220 | debug_image, 221 | joint_to.start_point, 222 | ( 223 | joint_to.start_point[0] + (joint_to.length if joint_to.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0), 224 | joint_to.start_point[1] + (joint_to.length if joint_to.type == ImageProcessor.JointSegment.Type.VERTICAL else 0) 225 | ), 226 | (0, 0, 255), 227 | 1 228 | ) 229 | cv2.imwrite(os.path.join(save_steps_dir, 'image_with_joints.png'), debug_image) 230 | 231 | return res_joints 232 | 233 | @staticmethod 234 | def evaluate_joints_diff( 235 | image, 236 | joints: typing.List[typing.Tuple[JointSegment, JointSegment]], 237 | evaluate_points = 10 238 | ) -> float: 239 | color_diff_sum = 0 240 | color_diff_count = 0 241 | for joint_from, joint_to in joints: 242 | for i in range(evaluate_points): 243 | point1 = ( 244 | ( 245 | joint_from.start_point[0] + 246 | int( 247 | joint_from.length * i / evaluate_points 248 | if joint_from.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0 249 | ) 250 | ), 251 | ( 252 | joint_from.start_point[1] + 253 | int( 254 | joint_from.length * i / evaluate_points 255 | if joint_from.type == ImageProcessor.JointSegment.Type.VERTICAL else 0 256 | ) 257 | ) 258 | ) 259 | point2 = ( 260 | ( 261 | joint_to.start_point[0] + 262 | int( 263 | joint_to.length * i / evaluate_points 264 | if joint_to.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0 265 | ) 266 | ), 267 | ( 268 | joint_to.start_point[1] + 269 | int( 270 | joint_to.length * i / evaluate_points 271 | if joint_to.type == ImageProcessor.JointSegment.Type.VERTICAL else 0 272 | ) 273 | ) 274 | ) 275 | c1 = image[point1[1], point1[0]] 276 | c2 = image[point2[1], point2[0]] 277 | color_diff_sum += math.sqrt( 278 | (float(c2[0]) - float(c1[0]))**2 + 279 | (float(c2[1]) - float(c1[1]))**2 + 280 | (float(c2[2]) - float(c1[2]))**2 281 | ) 282 | color_diff_count += 1 283 | return color_diff_sum / color_diff_count if color_diff_count > 0 else 0 284 | 285 | @staticmethod 286 | def _lines_to_intervals(lines, sum_threshold) -> typing.List[ 287 | typing.Tuple[ 288 | int, # pos 289 | int # num of lines after pos 290 | ] 291 | ]: 292 | # Return closed intervals (include right line that obey threshold) 293 | last_interval_start = None 294 | intervals = [] 295 | 296 | for i, line_sum in enumerate(lines): 297 | if line_sum >= sum_threshold: 298 | if last_interval_start is None: 299 | last_interval_start = i 300 | elif last_interval_start is not None: 301 | # close interval 302 | intervals.append([last_interval_start, i - 1]) 303 | last_interval_start = None 304 | 305 | if last_interval_start is not None: 306 | intervals.append([last_interval_start, len(lines) - 1]) 307 | 308 | return intervals 309 | 310 | @staticmethod 311 | def group_values(arr, radius): 312 | d = sorted(arr) 313 | m = [[d[0]]] 314 | for x in d[1:]: 315 | if x - m[-1][0] < radius: 316 | m[-1].append(x) 317 | else: 318 | m.append([x]) 319 | return m 320 | 321 | @staticmethod 322 | def _get_separators_group( 323 | separators, 324 | min_len, 325 | max_len 326 | ) -> typing.List[ 327 | typing.Tuple[ 328 | typing.Tuple[int, int], # left/up group of separators 329 | typing.Tuple[int, int] # right/down group of separators 330 | ] 331 | ]: 332 | block_heights = collections.OrderedDict() 333 | for l_index in range(1, len(separators)): 334 | prev_bottom = separators[l_index - 1][1] 335 | cur_top = separators[l_index][0] 336 | block_height = cur_top - prev_bottom 337 | if block_height not in block_heights: 338 | block_heights[block_height] = [] 339 | block_heights[block_height].append((separators[l_index - 1], separators[l_index])) 340 | 341 | if block_heights.keys(): 342 | key_groups = ImageProcessor.group_values(block_heights.keys(), 4) 343 | for height_group in key_groups: 344 | avg_height = int(sum(height_group) / len(height_group)) 345 | if (avg_height >= int(min_len) and avg_height < int(max_len)): 346 | # check number of separators 347 | res_separators = [] 348 | for h in height_group: 349 | res_separators += block_heights[h] 350 | res_separators = sorted(res_separators, key=lambda sep: sep[0]) 351 | if (len(res_separators) >= 3 and len(res_separators) <= 7): 352 | # found puzzle groups 353 | return res_separators 354 | 355 | return None 356 | 357 | @staticmethod 358 | def _determine_separators( 359 | image, white_percent = 0.94, 360 | lower_color = (210, 210, 210), upper_color = (256, 256, 256), 361 | save_steps_dir = None 362 | ): 363 | h, w, _ = image.shape 364 | mask = cv2.inRange(image, lower_color, upper_color) 365 | mask = mask / 255 366 | 367 | # determine horizontal separators 368 | horizontal_lines = np.sum(mask, axis = 1) 369 | horizontal_separators = ImageProcessor._lines_to_intervals(horizontal_lines, w * white_percent) 370 | # filter horizontal separators 371 | puzzle_horizontal_separators = ImageProcessor._get_separators_group( 372 | horizontal_separators, h / 20, h / 2 373 | ) 374 | 375 | # determine vertical separators 376 | vertical_lines = np.sum(mask, axis = 0) 377 | vertical_separators = ImageProcessor._lines_to_intervals(vertical_lines, h * white_percent) 378 | # filter vertical separators 379 | puzzle_vertical_separators = ImageProcessor._get_separators_group( 380 | vertical_separators, w / 20, w / 2 381 | ) 382 | 383 | if puzzle_horizontal_separators is None or puzzle_vertical_separators is None: 384 | return None, None 385 | 386 | if save_steps_dir: 387 | debug_image = image.copy() 388 | for h_el in puzzle_horizontal_separators: # < Tuple[Tuple[int, int], Tuple[int, int]] 389 | cv2.line(debug_image, (0, h_el[0][1]), (w, h_el[0][1]), (255, 0, 0), 1) 390 | cv2.line(debug_image, (0, h_el[1][0]), (w, h_el[1][0]), (255, 0, 0), 1) 391 | for v_el in puzzle_vertical_separators: # < Tuple[Tuple[int, int], Tuple[int, int]] 392 | cv2.line(debug_image, (v_el[0][1], 0), (v_el[0][1], h), (0, 0, 255), 1) 393 | cv2.line(debug_image, (v_el[1][0], 0), (v_el[1][0], h), (0, 0, 255), 1) 394 | cv2.imwrite(os.path.join(save_steps_dir, 'image_with_sep.png'), debug_image) 395 | 396 | return puzzle_vertical_separators, puzzle_horizontal_separators 397 | -------------------------------------------------------------------------------- /src/yandex_captcha_puzzle_solver/yandex_captcha_puzzle_solver.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import logging 3 | import os 4 | import typing 5 | import random 6 | import datetime 7 | import asyncio 8 | import certifi 9 | import contextlib 10 | 11 | # Image processing imports 12 | import numpy as np 13 | import cv2 14 | 15 | from .browser_wrapper import BrowserWrapper 16 | from .proxy_controller import ProxyController 17 | from .image_processor import ImageProcessor 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | YANDEX_CAPTCHA_SELECTORS = [ 22 | 'iframe[src*="smartcaptcha.yandexcloud.net"]', 23 | ] 24 | USER_AGENT = None 25 | 26 | _SHORT_TIMEOUT = 1 27 | _REDIRECT_WAIT_TIMEOUT = 5 28 | 29 | 30 | """ 31 | Request for process, can be extended and some custom fields used in process_command. 32 | """ 33 | 34 | 35 | class Request(object): 36 | url: str = None 37 | yandex_key: str = None 38 | proxy: dict = None 39 | max_timeout: float = 60 # timeout in sec 40 | cookies: dict = None 41 | 42 | def __init__(self, _dict=None): 43 | if _dict: 44 | self.__dict__.update(_dict) 45 | 46 | def __str__(self): 47 | return str(self.__dict__) 48 | 49 | 50 | class Response(object): 51 | url: str = None 52 | cookies: list = None 53 | user_agent: str = None 54 | token: str = None 55 | message: str = None 56 | 57 | def __str__(self): 58 | return str(self.__dict__) 59 | 60 | 61 | class Solver(object): 62 | """ 63 | Solver 64 | """ 65 | _proxy: str = None 66 | _driver: BrowserWrapper = None 67 | _proxy_controller: ProxyController = None 68 | _disable_gpu: bool = False 69 | _screenshot_i: int = 0 70 | _debug_dir: str = None 71 | 72 | class Exception(Exception): 73 | step = None 74 | 75 | def __init__(self, message: str, step: str = None): 76 | super().__init__(message) 77 | self.step = step 78 | 79 | def __init__( 80 | self, 81 | proxy: str = None, 82 | proxy_controller = None, 83 | disable_gpu = False, 84 | debug_dir: str = None 85 | ): 86 | self._proxy = proxy 87 | self._driver = None 88 | self._proxy_controller = proxy_controller 89 | self._debug_dir = debug_dir 90 | self._disable_gpu = disable_gpu 91 | 92 | async def save_screenshot( 93 | self, step_name, image = None, mark_coords = None, mark_rect = None, mark_joints = None 94 | ): 95 | if self._debug_dir: 96 | screenshot_file_without_ext = os.path.join( 97 | self._debug_dir, str(self._screenshot_i) + '_' + step_name) 98 | 99 | if image is not None: 100 | cv2.imwrite(screenshot_file_without_ext + ".jpg", image) 101 | else: 102 | await self._driver.save_screenshot(screenshot_file_without_ext + ".jpg") 103 | 104 | if mark_coords or mark_rect or mark_joints: 105 | image = cv2.imread(screenshot_file_without_ext + ".jpg") 106 | if mark_coords: 107 | for mark_c in mark_coords: 108 | image = cv2.circle(image, mark_c, 5, (255, 0, 0), 2) 109 | if mark_rect: 110 | image = cv2.rectangle( 111 | image, 112 | (mark_rect[0], mark_rect[1]), 113 | (mark_rect[2], mark_rect[3]), 114 | (255, 0, 0), 115 | 2 # < thickness 116 | ) 117 | if mark_joints: 118 | for joint_from, joint_to in mark_joints: 119 | image = cv2.line( 120 | image, 121 | joint_from.start_point, 122 | ( 123 | joint_from.start_point[0] + ( 124 | joint_from.length if joint_from.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0 125 | ), 126 | joint_from.start_point[1] + ( 127 | joint_from.length if joint_from.type == ImageProcessor.JointSegment.Type.VERTICAL else 0 128 | ) 129 | ), 130 | (0, 0, 255), 131 | 1 132 | ) 133 | image = cv2.line( 134 | image, 135 | joint_to.start_point, 136 | ( 137 | joint_to.start_point[0] + ( 138 | joint_to.length if joint_to.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0 139 | ), 140 | joint_to.start_point[1] + ( 141 | joint_to.length if joint_to.type == ImageProcessor.JointSegment.Type.VERTICAL else 0 142 | ) 143 | ), 144 | (0, 0, 255), 145 | 1 146 | ) 147 | cv2.imwrite(screenshot_file_without_ext + "_mark.jpg", image) 148 | 149 | dom = await self._driver.get_dom() 150 | with open(screenshot_file_without_ext + '.html', 'w') as fp: 151 | fp.write(dom) 152 | self._screenshot_i += 1 153 | 154 | logger.debug("Screenshot saved to '" + screenshot_file_without_ext + "'") 155 | 156 | async def solve(self, req: Request) -> Response: 157 | # do some validations 158 | if req.url is None: 159 | raise Exception("Parameter 'url' should be defined.") 160 | 161 | try: 162 | logger.info("Solve request: " + str(req)) 163 | res = await asyncio.wait_for(self._resolve_challenge(req), req.max_timeout) 164 | logger.info("Solve result: " + str(res)) 165 | except asyncio.TimeoutError: 166 | raise Exception("Processing timeout (max_timeout=" + str(req.max_timeout) + ")") 167 | return res 168 | 169 | async def _resolve_challenge(self, req: Request) -> Response: 170 | start_time: datetime.datetime = datetime.datetime.now() 171 | step = 'start' 172 | try: 173 | # Use default upped proxy 174 | use_proxy: str = self._proxy 175 | proxy_holder = None 176 | 177 | step = 'proxy init' 178 | if req.proxy: 179 | # Up proxy with specific end proxy (for yandex requests) 180 | if not self._proxy_controller: 181 | raise Solver.Exception("For use proxy with authorization you should pass proxy_controller into c-tor") 182 | proxy_holder = self._proxy_controller.get_proxy(use_proxy) 183 | use_proxy = "socks5://127.0.0.1:" + str(proxy_holder.local_port()) 184 | else: 185 | proxy_holder = contextlib.nullcontext() 186 | 187 | with proxy_holder: 188 | try: 189 | step = 'browser init' 190 | self._driver: BrowserWrapper = await BrowserWrapper.create( 191 | use_proxy, disable_gpu = self._disable_gpu 192 | ) 193 | logger.info( 194 | 'New instance of webdriver has been created to perform the request (proxy=' + 195 | str(use_proxy) + '), timeout=' + str(req.max_timeout)) 196 | return await self._resolve_challenge_impl(req, start_time) 197 | finally: 198 | logger.info('Close webdriver') 199 | if self._driver is not None: 200 | await self._driver.close() 201 | logger.debug('A used instance of webdriver has been destroyed') 202 | if logger.isEnabledFor(logging.DEBUG) and self._driver is not None: 203 | # Read outputs only after driver close (when process stopped), 204 | # otherwise output reading can be blocked. 205 | outputs = await self._driver.get_outputs() 206 | if outputs: 207 | for output_i, output in enumerate(outputs): 208 | logger.debug( 209 | "Webdriver output #" + str(output_i) + ":" + 210 | "\n---------------------------------------\n" + 211 | str(output.decode("utf-8")) + 212 | "\n---------------------------------------\n" 213 | ) 214 | self._driver = None 215 | except Solver.Exception as e: 216 | error_message = ( 217 | "Error solving the challenge. On platform " + str(sys.platform) + 218 | " at step '" + str(e.step) + "': " + 219 | str(e).replace('\n', '\\n') 220 | ) 221 | logger.error(error_message) 222 | raise Solver.Exception(error_message, step=e.step) 223 | except Exception as e: 224 | error_message = ( 225 | "Error solving the challenge. On platform " + str(sys.platform) + 226 | " at step '" + step + "': " + 227 | str(e).replace('\n', '\\n') 228 | ) 229 | logger.error(error_message) 230 | raise Solver.Exception(error_message) 231 | 232 | async def _check_challenge(self): 233 | for selector in YANDEX_CAPTCHA_SELECTORS: 234 | if await self._driver.select_count(selector) > 0: 235 | return True 236 | return False 237 | 238 | async def _wait_screenshot(self, css_selector) -> typing.Tuple[typing.Any, typing.Tuple[int, int, int, int]]: 239 | while True: 240 | captcha_frame_image, rect = await self._driver.get_element_screenshot( 241 | 'div[class="smart-captcha"]' 242 | ) 243 | if captcha_frame_image is not None: 244 | return (captcha_frame_image, rect) 245 | await self.save_screenshot('element_screenshot_step') 246 | await asyncio.sleep(1) 247 | 248 | async def _challenge_wait_and_click_loop(self) -> str: 249 | attempt = 0 250 | width, height = await self._driver.size() 251 | 252 | while True: 253 | logger.info("Challenge step #" + str(attempt)) 254 | await self.save_screenshot('attempt') 255 | 256 | # Check state of captcha. 257 | checkbox = await self._driver.select_count('iframe[src*="smartcaptcha.yandexcloud.net/checkbox"]') 258 | 259 | if checkbox > 0: 260 | # Checkbox state (slider), can be in two states: need to solve, solved. 261 | captcha_frame_image, captcha_frame_rect = await self._wait_screenshot('div[class="smart-captcha"]') 262 | image_height, image_width, _ = captcha_frame_image.shape 263 | 264 | logger.info("Drag slider") 265 | await self.save_screenshot('attempt_to_move_slider') 266 | # scroller in form 267 | down_and_up_points = Solver._get_drag_points(captcha_frame_image) 268 | if down_and_up_points is not None: 269 | down_point = down_and_up_points[0] 270 | if down_point[0] < image_width / 2: 271 | # Need to solve checkbox 272 | up_point = down_and_up_points[1] 273 | logger.info("To move slider from " + str(down_point) + " to " + str(up_point)) 274 | await self.save_screenshot('attempt_move_down_point', mark_coords=[down_point, up_point]) 275 | abs_down_point = (captcha_frame_rect.left + down_point[0], captcha_frame_rect.top + down_point[1]) 276 | abs_up_point = (captcha_frame_rect.left + up_point[0], captcha_frame_rect.top + up_point[1]) 277 | premove_point = (down_point[0] - random.randint(5, 10), down_point[1] - random.randint(5, 10)) 278 | await self._driver.mouse_move(premove_point) 279 | await self._driver.mouse_move(down_point) 280 | await self._driver.mouse_down() 281 | await self.save_screenshot('attempt_move_down', mark_coords=[down_point]) 282 | steps = 20 283 | for i in range(1, steps + 1): 284 | x = round(abs_down_point[0] + (abs_up_point[0] - abs_down_point[0]) * (i / steps)) 285 | y = round(abs_down_point[1] + (abs_up_point[1] - abs_down_point[1]) * (i / steps)) 286 | await self._driver.mouse_move((x, y)) 287 | await self._driver.mouse_up() 288 | await self.save_screenshot('attempt_from_move_slider_after_up') 289 | await asyncio.sleep(1) # Wait advanced challenge loading. 290 | else: 291 | # Solved (slider in right part) - send form 292 | await self._driver.click("#submit_captcha_button") 293 | await asyncio.sleep(1) # Wait form sending. 294 | # get token as text 295 | res = await self._driver.select_text('#smart_token') 296 | return res 297 | 298 | advanced = await self._driver.select_count('iframe[src*="smartcaptcha.yandexcloud.net/advanced"]') 299 | if advanced > 0: 300 | # modal mode - find puzzle position 301 | full_image = await self._driver.get_screenshot() 302 | # get internal slider position in modal 303 | logger.info("Solve modal window") 304 | await self.save_screenshot("modal_solve_start") 305 | modal_rect = ImageProcessor.get_modal_frame_rect(full_image) 306 | modal_image = full_image[ 307 | modal_rect[1]:modal_rect[1] + modal_rect[3], modal_rect[0]:modal_rect[0] + modal_rect[2] 308 | ] 309 | find_slider_rect = ( 310 | 0, 311 | int(modal_rect[3] * 3 / 5), 312 | modal_rect[2], 313 | int(modal_rect[3] * 2 / 5), 314 | ) 315 | slider_image = modal_image[ 316 | find_slider_rect[1]:find_slider_rect[1] + find_slider_rect[3], 317 | find_slider_rect[0]:find_slider_rect[0] + find_slider_rect[2] 318 | ] 319 | down_and_up_points = ImageProcessor.get_drag_points(slider_image) 320 | 321 | if down_and_up_points is not None: 322 | assert down_and_up_points[0][0] < down_and_up_points[1][0] 323 | # offset down_and_up_points relative full image 324 | down_and_up_points = ( 325 | ( 326 | down_and_up_points[0][0] + modal_rect[0] + find_slider_rect[0], 327 | down_and_up_points[0][1] + modal_rect[1] + find_slider_rect[1] 328 | ), 329 | ( 330 | down_and_up_points[1][0] + modal_rect[0] + find_slider_rect[0], 331 | down_and_up_points[1][1] + modal_rect[1] + find_slider_rect[1] 332 | ) 333 | ) 334 | await self.save_screenshot("modal_slider_pos", mark_coords=down_and_up_points) 335 | 336 | # get joints in modal 337 | find_puzzle_rect = ( 338 | 0, 339 | 0, 340 | modal_rect[2], 341 | int(modal_rect[3] * 3 / 5), 342 | ) 343 | find_puzzle_image = modal_image[ 344 | find_puzzle_rect[1]:find_puzzle_rect[3], 345 | find_puzzle_rect[0]:find_puzzle_rect[2] 346 | ] 347 | logger.debug("To save modal_find_puzzle") 348 | await self.save_screenshot("modal_find_puzzle", image=find_puzzle_image) 349 | logger.debug("From save modal_find_puzzle") 350 | joints = ImageProcessor.get_puzzle_joints(find_puzzle_image) 351 | # offset joints relative full image 352 | for joint1, joint2 in joints: 353 | joint1.start_point = ( 354 | joint1.start_point[0] + modal_rect[0] + find_puzzle_rect[0], 355 | joint1.start_point[1] + modal_rect[1] + find_puzzle_rect[1] 356 | ) 357 | joint2.start_point = ( 358 | joint2.start_point[0] + modal_rect[0] + find_puzzle_rect[0], 359 | joint2.start_point[1] + modal_rect[1] + find_puzzle_rect[1] 360 | ) 361 | 362 | logger.debug("Modal solving started with " + str(len(joints)) + " joints") 363 | await self.save_screenshot("modal_start", mark_joints=joints) 364 | start_point = down_and_up_points[0] 365 | await self._driver.mouse_move(start_point) 366 | await self._driver.mouse_down() 367 | slider_steps = 20 368 | min_diff = 0 369 | max_diff = 10000000 370 | end_point = down_and_up_points[1] 371 | depth_step_i = 0 372 | select_pos = None 373 | while abs(max_diff - min_diff) > 0.1 and abs(end_point[0] - start_point[0]) > 2: 374 | min_diff = 10000000 375 | max_diff = 0 376 | new_start_point = None # < Positions around min diff point 377 | new_end_point = None 378 | 379 | check_points: typing.List[typing.Tuple[int, int]] = [] 380 | if end_point[0] - start_point[0] > slider_steps: 381 | for i in range(slider_steps + 1): 382 | check_points.append(Solver._middle_point(start_point, end_point, float(i) / slider_steps)) 383 | else: 384 | for i in range(start_point[0], end_point[0] + 1): 385 | check_points.append((i, start_point[1])) 386 | 387 | for check_point_i, middle_point in enumerate(check_points): 388 | await self._driver.mouse_move(middle_point) 389 | full_image = await self._driver.get_screenshot() 390 | joints_diff = ImageProcessor.evaluate_joints_diff(full_image, joints) 391 | await self.save_screenshot( 392 | "modal_slider_pos_" + str(i) + "_" + str(joints_diff), mark_joints=joints 393 | ) 394 | if new_start_point is None or joints_diff < min_diff: 395 | min_diff = joints_diff 396 | new_start_point = check_points[check_point_i - 1] if check_point_i > 0 else middle_point 397 | new_end_point = check_points[check_point_i + 1] if check_point_i < len(check_points) - 1 else middle_point 398 | select_pos = middle_point 399 | max_diff = max(max_diff, joints_diff) 400 | 401 | start_point = new_start_point 402 | end_point = new_end_point 403 | logger.debug( 404 | "Puzzle solve step " + str(depth_step_i) + ", min_diff = " + str(min_diff) + 405 | ", max_diff = " + str(max_diff) + 406 | ", start_point = " + str(start_point) + 407 | ", end_point = " + str(end_point) + 408 | ", select_pos = " + str(select_pos) + 409 | ", check_points = " + str(check_points) 410 | ) 411 | depth_step_i += 1 412 | 413 | await self._driver.mouse_move(select_pos) 414 | await self.save_screenshot("modal_final_slider_pos") 415 | await self._driver.mouse_up() 416 | await asyncio.sleep(1) 417 | await self.save_screenshot('modal_final') 418 | 419 | attempt = attempt + 1 420 | await asyncio.sleep(_SHORT_TIMEOUT) 421 | 422 | async def _resolve_challenge_impl(self, req: Request, start_time: datetime.datetime) -> Response: 423 | step = 'solving' 424 | try: 425 | res = Response() 426 | 427 | step = 'navigate to url' 428 | # navigate to the page 429 | result_url = req.url 430 | if "?" not in result_url: 431 | result_url += "?" 432 | if result_url: 433 | if result_url[-1] != '?': 434 | result_url += '&' 435 | result_url += 'solver_intercept=1&yandex_captcha_key=' + str(req.yandex_key) 436 | logger.debug(f'Navigating to... {result_url}') 437 | await self._driver.get(result_url) 438 | 439 | logger.debug('To make screenshot') 440 | await self.save_screenshot('evil_logic') 441 | 442 | step = 'set cookies' 443 | 444 | # set cookies if required 445 | if req.cookies: 446 | logger.debug('Setting cookies...') 447 | await self._driver.set_cookies(req.cookies) 448 | await self._driver.get(result_url) 449 | 450 | step = 'solve challenge' 451 | token = await self._challenge_wait_and_click_loop() 452 | res.message = "Challenge solved!" # expect exception if challenge isn't solved 453 | res.token = token 454 | 455 | logger.info("Challenge solving finished") 456 | await self.save_screenshot('solving_finish') 457 | 458 | # Click submit and get token text 459 | 460 | step = 'get cookies' 461 | res.url = await self._driver.current_url() 462 | res.cookies = await self._driver.get_cookies() 463 | logger.info("Cookies got") 464 | global USER_AGENT 465 | if USER_AGENT is None: 466 | step = 'get user-agent' 467 | USER_AGENT = await self._driver.get_user_agent() 468 | res.user_agent = USER_AGENT 469 | 470 | await self.save_screenshot('finish') 471 | logger.info('Solving finished') 472 | 473 | return res 474 | except Exception as e: 475 | raise Solver.Exception(str(e), step=step) 476 | 477 | @staticmethod 478 | def _middle_point(start_point: typing.Tuple[int, int], end_point: typing.Tuple[int, int], coef: float): 479 | return ( 480 | int(round(start_point[0] + float(end_point[0] - start_point[0]) * coef)), 481 | int(round(start_point[1] + float(end_point[1] - start_point[1]) * coef)) 482 | ) 483 | 484 | @staticmethod 485 | def _get_dominant_color(image): 486 | a2D = image.reshape(-1, image.shape[-1]) 487 | col_range = (256, 256, 256) # generically: a2D.max(0)+1 488 | a1D = np.ravel_multi_index(a2D.T, col_range) 489 | return np.unravel_index(np.bincount(a1D).argmax(), col_range) 490 | 491 | @staticmethod 492 | def _get_drag_points(image, logger = None, save_steps_dir: str = None, log_prefix = ''): 493 | image_height, image_width, _ = image.shape 494 | slider_color = (255, 130, 82) # < GBR color of slider. 495 | slider_color_delta = 50 496 | mask = cv2.inRange( 497 | image, 498 | ( 499 | max(slider_color[0] - slider_color_delta, 0), 500 | max(slider_color[1] - slider_color_delta, 0), 501 | max(slider_color[2] - slider_color_delta, 0) 502 | ), 503 | ( 504 | min(slider_color[0] + slider_color_delta, 255), 505 | min(slider_color[1] + slider_color_delta, 255), 506 | min(slider_color[2] + slider_color_delta, 255) 507 | )) 508 | 509 | if save_steps_dir: 510 | cv2.imwrite(os.path.join(save_steps_dir, 'mask.png'), mask) 511 | 512 | broad_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (10, 10)) 513 | mask = cv2.dilate(mask, broad_kernel, iterations = 1) 514 | 515 | if save_steps_dir: 516 | cv2.imwrite(os.path.join(save_steps_dir, 'dilated_mask.png'), mask) 517 | 518 | erode_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (20, 20)) 519 | mask = cv2.erode(mask, erode_kernel, iterations = 1) 520 | 521 | if save_steps_dir: 522 | cv2.imwrite(os.path.join(save_steps_dir, 'eroded_mask.png'), mask) 523 | 524 | points_x, points_y = np.where(mask >= 255) 525 | if len(points_x) == 0: 526 | return None 527 | 528 | down_point_pos = random.randint(0, len(points_x) - 1) 529 | down_point = (points_y[down_point_pos], points_x[down_point_pos]) 530 | up_point_pos = random.randint(0, len(points_x) - 1) 531 | up_point = (image_width - points_y[up_point_pos], points_x[up_point_pos]) 532 | 533 | if save_steps_dir: 534 | debug_image = image.copy() 535 | debug_image = cv2.circle(debug_image, down_point, 5, (0, 0, 255), 2) 536 | debug_image = cv2.circle(debug_image, up_point, 5, (0, 0, 255), 2) 537 | cv2.imwrite(os.path.join(save_steps_dir, 'image_with_points.png'), debug_image) 538 | 539 | return [down_point, up_point] 540 | 541 | 542 | # fix ssl certificates for compiled binaries 543 | # https://github.com/pyinstaller/pyinstaller/issues/7229 544 | # https://stackoverflow.com/questions/55736855/how-to-change-the-cafile-argument-in-the-ssl-module-in-python3 545 | os.environ["REQUESTS_CA_BUNDLE"] = certifi.where() 546 | os.environ["SSL_CERT_FILE"] = certifi.where() 547 | 548 | if __name__ == '__main__': 549 | sys.stdout.reconfigure(encoding="utf-8") 550 | logger.basicConfig( 551 | format='%(asctime)s [%(name)s] [%(levelname)s]: %(message)s', 552 | handlers=[logger.StreamHandler(sys.stdout)], 553 | level=logging.INFO) 554 | 555 | req = Request() 556 | req.url = 'https://knopka.ashoo.id' 557 | 558 | solver = Solver() 559 | res = solver.solve(req) 560 | --------------------------------------------------------------------------------