├── docker
├── requirements.txt
└── rootfs
│ └── opt
│ └── yandex_captcha_puzzle_solver
│ ├── etc
│ └── html_templates
│ │ ├── form.html.j2
│ │ └── index.html.j2
│ └── bin
│ ├── YandexCaptchaPuzzleSolverProxyRun.sh
│ └── YandexCaptchaPuzzleSolverRun.sh
├── tests
└── empty_test.py
├── src
├── yandex_captcha_puzzle_solver
│ ├── __init__.py
│ ├── proxy_controller.py
│ ├── browser_wrapper.py
│ ├── yandex_captcha_puzzle_solve_server.py
│ ├── image_processor.py
│ └── yandex_captcha_puzzle_solver.py
├── mitm_addons
│ ├── mitm_grounding_addon.py
│ └── mitm_split_addon.py
└── grounding_server
│ └── grounding_server.py
├── docker-compose.yml
├── pyproject.toml
├── .github
└── workflows
│ ├── python-publish.yml
│ ├── docker-testing.yml
│ ├── python-package.yml
│ └── docker-publish.yml
├── setup.py
├── LICENSE
├── README.md
├── utils
├── gost-install.sh
├── linux_chrome_deb_repo_installer.sh
└── linux_chrome_archive_installer.py
└── Dockerfile
/docker/requirements.txt:
--------------------------------------------------------------------------------
1 | mitmproxy==10.4.2
2 |
--------------------------------------------------------------------------------
/tests/empty_test.py:
--------------------------------------------------------------------------------
1 | def test_empty():
2 | pass
3 |
--------------------------------------------------------------------------------
/docker/rootfs/opt/yandex_captcha_puzzle_solver/etc/html_templates/form.html.j2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | {{smart_token}}
5 |
6 |
7 |
--------------------------------------------------------------------------------
/src/yandex_captcha_puzzle_solver/__init__.py:
--------------------------------------------------------------------------------
1 | import importlib.metadata
2 |
3 | from .yandex_captcha_puzzle_solver import Request, Response, Solver, BrowserWrapper
4 | from .proxy_controller import ProxyController
5 | from .yandex_captcha_puzzle_solve_server import server, server_run
6 |
7 | __version__ = importlib.metadata.version(__package__ or __name__)
8 |
9 | __all__ = [
10 | 'Request', 'Response', 'Solver', 'BrowserWrapper',
11 | 'ProxyController', 'server', 'server_run'
12 | ]
13 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | yandex-captcha-puzzle-solver :
3 | build:
4 | args:
5 | #CHROME_VERSION: '131.'
6 | UID: 1001
7 | # CHROME_DISABLE_GPU: true
8 | # CHECK_SYSTEM: true
9 | # PYTHON_VERSION: 3.11
10 | context: .
11 | dockerfile: Dockerfile
12 | environment:
13 | UNUSED: false
14 | # DEBUG: true
15 | VERBOSE: true
16 | container_name: yandex-captcha-puzzle-solver
17 | image: yandex-captcha-puzzle-solver:latest
18 | ports:
19 | - 20081:8080
20 | #volumes:
21 | #- ./var:/opt/yandex_captcha_puzzle_solver/var/
22 | #< Uncomment if you persistent logs between runs
23 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | 'setuptools>=45.0',
4 | 'setuptools-scm'
5 | ]
6 | build-backend = "setuptools.build_meta"
7 |
8 | [project]
9 | name = "yandex_captcha_puzzle_solver"
10 | version = "0.1.2"
11 | authors = [
12 | {name = "yoori", email = "yuri.kuznecov@gmail.com"}
13 | ]
14 |
15 | description = ""
16 | readme = "README.md"
17 | license = {text = 'GNU Lesser General Public License'}
18 | requires-python = ">=3.9"
19 | dynamic = ["dependencies"]
20 |
21 | [project.urls]
22 | homepage = "https://github.com/yoori/flare-bypasser"
23 |
24 | [options]
25 | package_dir = "src/yandex_captcha_puzzle_solver"
26 |
27 | [project.scripts]
28 | yandex_captcha_puzzle_solve_server = "yandex_captcha_puzzle_solver:server_run"
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | name: Upload Python Package
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | permissions:
8 | contents: read
9 |
10 | jobs:
11 | deploy:
12 |
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v4
17 | - name: Set up Python
18 | uses: actions/setup-python@v3
19 | with:
20 | python-version: '3.x'
21 | - name: Set up cmake and ninja
22 | uses: lukka/get-cmake@latest
23 | - name: Install dependencies
24 | run: |
25 | python -m pip install --upgrade pip
26 | pip install build
27 | - name: Build package
28 | run: python -m build
29 | - name: Publish package
30 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
31 | with:
32 | user: __token__
33 | password: ${{ secrets.PYPI_API_TOKEN }}
34 |
--------------------------------------------------------------------------------
/docker/rootfs/opt/yandex_captcha_puzzle_solver/etc/html_templates/index.html.j2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | import importlib
4 | import distutils.core
5 |
6 |
7 | # Trick for avoid installation of non pip installed packages (apt), available by ADDITIONAL_PYTHONPATH
8 | def is_installed(pkgname):
9 | try:
10 | m = importlib.import_module(pkgname)
11 | return m is not None
12 | except Exception:
13 | pass
14 | return False
15 |
16 |
17 | if "ADDITIONAL_PYTHONPATH" in os.environ:
18 | add_path = os.environ["ADDITIONAL_PYTHONPATH"]
19 | sys.path += add_path.split(':')
20 |
21 | install_requires = [
22 | 'asyncio',
23 | 'uuid',
24 | 'urllib3',
25 | 'websockets==14.0',
26 | 'zendriver_flare_bypasser==0.2.4',
27 | 'argparse',
28 | 'oslex',
29 | 'jinja2',
30 |
31 | # Server dependecies
32 | 'fastapi',
33 | 'uvicorn',
34 |
35 | 'xvfbwrapper==0.2.9 ; platform_system != "Windows"',
36 | 'gunicorn ; platform_system != "Windows"',
37 | ]
38 |
39 | for package_import_name, package in [('numpy', 'numpy'), ('cv2', 'opencv-python')]:
40 | if not is_installed(package_import_name):
41 | install_requires += [package]
42 |
43 | distutils.core.setup(install_requires=install_requires)
44 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Yuri Kuznecov (yoori / yuri.kuznecov@gmail.com)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/docker/rootfs/opt/yandex_captcha_puzzle_solver/bin/YandexCaptchaPuzzleSolverProxyRun.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | LOCAL_PORT="$1"
4 | GROUND_URL="$2"
5 | PROXY="$3"
6 | LOG_DIR="$4" # Log dir.
7 |
8 | clean_up() {
9 | rm -rf "$1"
10 | }
11 |
12 | if [ "$LOG_DIR" = "" ] ; then
13 | LOG_DIR=$(mktemp -d -t proxy.XXXXXX)
14 | trap "clean_up '$LOG_DIR'" EXIT
15 | fi
16 |
17 | # Grounding server should be runned on 9001 port
18 |
19 | if [ "$PROXY" != "" ] ; then
20 | gost -L=http://127.0.0.1:$((LOCAL_PORT + 2000)) -F=$PROXY &
21 | pids+=($!)
22 | else
23 | gost -L=http://127.0.0.1:$((LOCAL_PORT + 2000)) &
24 | pids+=($!)
25 | fi
26 |
27 | # GroundingProxy: proxy that convert proxy traffic to http and send it to GroundingServer.
28 | mitmdump --mode regular --listen-port "$((LOCAL_PORT + 1000))" \
29 | -s /opt/yandex_captcha_puzzle_solver/lib/mitm_addons/mitm_grounding_addon.py \
30 | --set ground_url=http://localhost:9001 \
31 | >"$LOG_DIR/mitmproxy_closing.log" 2>&1 &
32 |
33 | pids+=($!)
34 |
35 | # SplitProxy: proxy that split traffic:
36 | # Url's with solver_intercept argument to GroundingProxy
37 | # Other to external network
38 | mitmdump --listen-port "$LOCAL_PORT" --ssl-insecure \
39 | -s /opt/yandex_captcha_puzzle_solver/lib/mitm_addons/mitm_split_addon.py \
40 | --mode "upstream:http://localhost:$((LOCAL_PORT + 1000))" \
41 | --set proxy=localhost:$((LOCAL_PORT + 2000)) \
42 | >"$LOG_DIR/mitmproxy_splitter.log" 2>&1 &
43 |
44 | pids+=($!)
45 |
46 | for pid in "${pids[@]}"; do
47 | wait "${pid}"
48 | done
49 |
--------------------------------------------------------------------------------
/.github/workflows/docker-testing.yml:
--------------------------------------------------------------------------------
1 | name: Docker
2 |
3 | # This workflow uses actions that are not certified by GitHub.
4 | # They are provided by a third-party and are governed by
5 | # separate terms of service, privacy policy, and support
6 | # documentation.
7 |
8 | on:
9 | workflow_run:
10 | workflows: ["Upload Python Package"]
11 | types:
12 | - completed
13 |
14 | env:
15 | # Use docker.io for Docker Hub if empty
16 | REGISTRY: ghcr.io
17 | # github.repository as /
18 | IMAGE_NAME: ${{ github.repository }}
19 |
20 |
21 | jobs:
22 | build:
23 | strategy:
24 | matrix:
25 | include:
26 | - builder: ubuntu-latest
27 | platform: linux/amd64
28 | - builder: ubuntu-latest
29 | platform: linux/arm64
30 | - builder: ubuntu-latest
31 | platform: linux/arm/v7
32 |
33 | runs-on: ${{ matrix.builder }}
34 |
35 | permissions:
36 | contents: read
37 | packages: write
38 | # This is used to complete the identity challenge
39 | # with sigstore/fulcio when running outside of PRs.
40 | id-token: write
41 |
42 | steps:
43 | - name: Checkout repository
44 | uses: actions/checkout@v4
45 |
46 | - name: Checkout latest docker image
47 | if: ${{ github.event_name != 'pull_request' }}
48 | env:
49 | # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable
50 | TAGS: ${{ steps.meta.outputs.tags }}
51 | DIGEST: ${{ steps.build-and-push.outputs.digest }}
52 | # This step uses the identity token to provision an ephemeral certificate
53 | # against the sigstore community Fulcio instance.
54 | run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST}
55 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # YandexCaptchaPuzzleSolver
3 |
4 | YandexCaptchaPuzzleSolver is a service to bypass Yandex Captcha (Puzzle).
5 |
6 | ## How it works
7 |
8 | YandexCaptchaPuzzleSolver starts a server, that can solve yandex captcha of puzzle type :
9 |
10 | 
11 |
12 |
13 | and it waits for user requests.
14 | For get some site valid token (result of solving), need to send request to docker (see Installation):
15 |
16 | curl -XPOST 'http://localhost:20081/get_token' \
17 | -H 'Content-Type: application/json' \
18 | --data-raw '{"maxTimeout": 120000, "url": "SITE FOR SOLVE", "yandex_key": "YANDEX KEY"}'
19 |
20 | YANDEX KEY you can get from source code of target page, usualy it starts with **ysc1_** string.
21 |
22 | Response example:
23 |
24 | {"status":"ok","message":"Challenge solved!","startTimestamp":1733819749.824522,"endTimestamp":1733819774.119855,"solution":{"status":"ok","url":"","cookies":[{"name":"receive-cookie-deprecation","value":"1","domain":".yandex.ru","path":"/","secure":true},{"name":"session-cookie","value":"180fc3e2fb41df94e50241d9d00b084574552116189d7515109f2424d43b405a76cd9ae4255944b2d868fe358dc27d53","domain":".some.domain","path":"/","secure":false}],"user_agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36","token":"dD0xNzMzODE5NzY3O2k9MjE3LjY1LjIuMjI5O0Q9NzAzQzI4OTlFRDBFQTBFRTM1ODE3MUFBMzRFMkFDRURDQkQzQTlFMDgwMzM4QjMzRDJEODlDMTczMTEyQTk5ODZDODkyMEQxNzA4QTBFN0I4MTkxQzVCRkQ3RjRDMzExQ0E3Qjg1NkRDOEM4MDZENTFEM0JERENFODUzNzlEMTYzODY2MkM5RDg2RjIwQUEwNzc7dT0xNzMzODE5NzY3NTk4OTEyNjU3O2g9ZjI3ZWY0OWUxZmUyN2EzNWQ4OTNmM2IzYzM5YTQwNWU="}}
25 |
26 | ## Installation
27 |
28 | It is recommended to install using a Docker container because the project depends on an external browser that is
29 | already included within the image.
30 |
31 | We provide a `docker-compose.yml` configuration file.
32 | Clone this repository and execute `docker compose up -d` to start the container.
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/src/mitm_addons/mitm_grounding_addon.py:
--------------------------------------------------------------------------------
1 | import typing
2 | import traceback
3 | import urllib.parse
4 | import mitmproxy
5 |
6 |
7 | class Addon(object):
8 | _ground_url: typing.Tuple[str, int]
9 |
10 | def __init__(self, template_root = "mtproxy_templates/"):
11 | self._ground_url = None
12 |
13 | def load(self, loader):
14 | loader.add_option(
15 | name = "ground_url",
16 | typespec = typing.Optional[str],
17 | default = None,
18 | help = "Ground url",
19 | )
20 |
21 | def configure(self, updates):
22 | try:
23 | if "ground_url" in updates:
24 | ground_url = urllib.parse.urlparse(mitmproxy.ctx.options.ground_url)
25 | if ground_url.hostname is not None and ground_url.port is not None:
26 | self._ground_url = (ground_url.hostname, ground_url.port)
27 |
28 | except Exception as e:
29 | print("configure, exception: " + str(e), flush = True)
30 |
31 | def running(self):
32 | # We change the connection strategy to lazy so that next_layer happens before we actually connect upstream.
33 | # Alternatively we could also change the server address in `server_connect`.
34 | mitmproxy.ctx.options.connection_strategy = "lazy"
35 | mitmproxy.ctx.options.upstream_cert = False
36 |
37 | def next_layer(self, nextlayer: mitmproxy.proxy.layer.NextLayer):
38 | """
39 | remove TLS for ground_url requests
40 | """
41 | try:
42 | print("next_layer: " + str(nextlayer.context.server.address), flush = True)
43 | if (
44 | nextlayer.context.server.address is not None and
45 | self._ground_url is not None
46 | ):
47 | nextlayer.context.server.address = self._ground_url
48 | nextlayer.context.client.alpn = b""
49 | nextlayer.layer = mitmproxy.proxy.layers.ClientTLSLayer(nextlayer.context)
50 | nextlayer.layer.child_layer = mitmproxy.proxy.layers.TCPLayer(nextlayer.context)
51 | except Exception as e:
52 | print("next_layer, exception: " + str(e), flush = True)
53 | traceback.print_exc()
54 |
55 | def server_connect(self, data: mitmproxy.proxy.server_hooks.ServerConnectionHookData):
56 | # non TLS override
57 | data.server.address = self._ground_url
58 |
59 |
60 | addons = [
61 | Addon()
62 | ]
63 |
--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3 |
4 | name: Python package
5 |
6 | on:
7 | push:
8 | branches: [ "main" ]
9 | pull_request:
10 | branches: [ "main" ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 | strategy:
17 | fail-fast: false
18 | matrix:
19 | python-version: ["3.9", "3.10", "3.11"]
20 |
21 | steps:
22 | - uses: actions/checkout@v4
23 | - name: Set up Python ${{ matrix.python-version }}
24 | uses: actions/setup-python@v3
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 | - name: Set up cmake and ninja
28 | uses: lukka/get-cmake@latest
29 | # Install opencv as package
30 | #- name: Trust ubuntu GPG keys
31 | # run: |
32 | # sudo gpg --keyserver "hkps://keyserver.ubuntu.com:443" --recv-keys 40976EAF437D05B5 3B4FE6ACC0B21F32
33 | # sudo gpg --yes --output "/etc/apt/trusted.gpg.d/40976EAF437D05B5.gpg" --export "40976EAF437D05B5"
34 | # sudo gpg --yes --output "/etc/apt/trusted.gpg.d/3B4FE6ACC0B21F32.gpg" --export "3B4FE6ACC0B21F32"
35 | # sudo add-apt-repository deb http://security.ubuntu.com/ubuntu xenial-security main
36 | # sudo apt-get update
37 | # sudo apt-get install libopencv-dev python3-opencv
38 | #- uses: Dovyski/setup-opencv-action@v1.1
39 | # with:
40 | # opencv-version: '4.0.0'
41 | - name: Install dependencies
42 | run: |
43 | python -m pip install --upgrade pip
44 | python -m pip install flake8 pytest
45 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
46 | - name: Lint with flake8
47 | run: |
48 | # stop the build if there are Python syntax errors or undefined names
49 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --indent-size 2 --max-line-length=100 --ignore E251,W504,C901
50 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
51 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --indent-size 2 --ignore E251,W504,C901
52 | - name: Preinstall package problematic dependencies
53 | run: |
54 | python -m pip install opencv-contrib-python numpy
55 | - name: Install package
56 | run: |
57 | python -m pip install --prefer-binary .
58 | - name: Test with pytest
59 | run: |
60 | pytest
61 |
--------------------------------------------------------------------------------
/src/grounding_server/grounding_server.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import jinja2
4 | import flask
5 |
6 | app = flask.Flask(__name__, template_folder = "templates/")
7 |
8 | form_page_template = None
9 | page_template = None
10 | template_root = ''
11 |
12 |
13 | @app.route('/shutdown', methods=["GET", "POST"])
14 | def request_shutdown():
15 | try:
16 | print("Flask shutdown request got ...", flush = True)
17 | shutdown_fun = flask.request.environ.get('werkzeug.server.shutdown')
18 | shutdown_fun()
19 | print("Flask shutdown request processed ...", flush = True)
20 | return flask.Response(status = 204)
21 | except Exception:
22 | return flask.Response(status = 500)
23 |
24 |
25 | @app.route('/', defaults={'path': ''})
26 | @app.route('/')
27 | def request_main(path):
28 | # init template
29 | # parse utm_keyword
30 | yandex_captcha_key = flask.request.args.get("yandex_captcha_key")
31 | args = {}
32 | args['yandex_captcha_key'] = yandex_captcha_key
33 | global page_template
34 | resp = page_template.render(args)
35 | return flask.Response(resp, mimetype = 'text/html')
36 |
37 |
38 | @app.route('/send_captcha')
39 | @app.route('/send_captcha/')
40 | def request_send_captcha():
41 | # init template
42 | # parse utm_keyword
43 | smart_token = flask.request.args.get("smart-token")
44 | args = {}
45 | args['smart_token'] = smart_token
46 | global form_page_template
47 | resp = form_page_template.render(args)
48 | return flask.Response(resp, mimetype = 'text/html')
49 |
50 |
51 | def run_app(args):
52 | app.run(host = "0.0.0.0", port = args['port'], threaded = True)
53 |
54 |
55 | def start_app():
56 | parser = argparse.ArgumentParser(description = 'grounding_server.')
57 | parser.add_argument(
58 | "-p", "--port", type = int, default = 9200, help="Listen port")
59 | parser.add_argument(
60 | "-f", "--pidfile", "--pid-file", type = str, default = 'grounding_server.pid', help="Pid file")
61 | parser.add_argument(
62 | "-t", "--page-template", type = str, default = 'index.html.j2', help = "Template file")
63 | parser.add_argument(
64 | "--form-page-template", type = str, default = 'form.html.j2', help = "Template file")
65 | args = parser.parse_args()
66 |
67 | pid = os.getpid()
68 | with open(args.pidfile, 'wb') as f:
69 | f.write(str(pid).encode('utf-8'))
70 | f.close()
71 |
72 | global page_template
73 | page_template = jinja2.Environment(loader = jinja2.FileSystemLoader("/")).get_template(args.page_template)
74 |
75 | global form_page_template
76 | form_page_template = jinja2.Environment(loader = jinja2.FileSystemLoader("/")).get_template(args.form_page_template)
77 |
78 | run_app({'port': args.port, 'ssl': False})
79 |
80 |
81 | if __name__ == "__main__":
82 | start_app()
83 |
--------------------------------------------------------------------------------
/src/mitm_addons/mitm_split_addon.py:
--------------------------------------------------------------------------------
1 | import typing
2 | import mitmproxy
3 | from mitmproxy.script import concurrent
4 |
5 |
6 | class Addon(object):
7 | _no_condition_via = None # proxy for send external traffic (url does not subject the condition)
8 | _proxy_via = None # proxy for send internal traffic (url is subject to the condition)
9 |
10 | def __init__(self):
11 | pass
12 |
13 | def load(self, loader):
14 | loader.add_option(
15 | name="proxy",
16 | typespec=typing.Optional[str],
17 | default=None,
18 | help="proxy",
19 | )
20 |
21 | def configure(self, updates):
22 | try:
23 | if "proxy" in updates:
24 | self._set_proxy(mitmproxy.ctx.options.proxy)
25 | except Exception as e:
26 | print("configure, exception: " + str(e), flush=True)
27 |
28 | def running(self):
29 | # We change the connection strategy to lazy so that next_layer happens before we actually connect upstream.
30 | # Alternatively we could also change the server address in `server_connect`.
31 | mitmproxy.ctx.options.connection_strategy = "lazy"
32 | mitmproxy.ctx.options.upstream_cert = False
33 | # fill default upstream (for url's subject to the condition)
34 | self._proxy_via = None # set via to None for non upstream modes
35 | options = mitmproxy.ctx.options
36 | if options.mode and options.mode[0].startswith("upstream:"):
37 | mode = mitmproxy.proxy.mode_specs.UpstreamMode.parse(options.mode[0])
38 | self._proxy_via = (mode.scheme, mode.address)
39 |
40 | @concurrent
41 | def requestheaders(self, flow):
42 | # print("REQUEST URL: " + flow.request.pretty_url, flush=True)
43 | need_send_to_proxy = self._need_send_to_proxy(flow.request)
44 |
45 | # flow.server_conn.via is None: means that will be used upstream(internal proxy)
46 | if (need_send_to_proxy and (
47 | flow.server_conn.via is None or flow.server_conn.via != self._no_condition_via)
48 | ):
49 | # switch to use external proxy
50 | flow.server_conn.state = mitmproxy.connection.ConnectionState.CLOSED
51 | flow.server_conn.via = self._no_condition_via
52 | elif (not need_send_to_proxy and (
53 | flow.server_conn.via is not None and flow.server_conn.via == self._no_condition_via)
54 | ):
55 | # switch from use proxy to upstream
56 | flow.server_conn.state = mitmproxy.connection.ConnectionState.CLOSED
57 | flow.server_conn.via = self._proxy_via
58 |
59 | print("SEND URL: " + flow.request.pretty_url + " => " + str(flow.server_conn.via), flush=True)
60 |
61 | def _need_send_to_proxy(self, request):
62 | args = request.query # args: MultiDictView
63 | return ("solver_intercept" not in args)
64 |
65 | def _set_proxy(self, parse_proxy):
66 | proxy_spec = mitmproxy.net.server_spec.parse(parse_proxy, "http") if parse_proxy else None
67 | self._no_condition_via = proxy_spec
68 | self._via = proxy_spec
69 |
70 |
71 | addons = [
72 | Addon()
73 | ]
74 |
--------------------------------------------------------------------------------
/utils/gost-install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Check Root User
4 |
5 | # If you want to run as another user, please modify $EUID to be owned by this user
6 | if [[ "$EUID" -ne '0' ]]; then
7 | echo "$(tput setaf 1)Error: You must run this script as root!$(tput sgr0)"
8 | exit 1
9 | fi
10 |
11 | # Set the desired GitHub repository
12 | repo="go-gost/gost"
13 | base_url="https://api.github.com/repos/$repo/releases"
14 |
15 | # Function to download and install gost
16 | install_gost() {
17 | version=$1
18 | # Detect the operating system
19 | if [[ "$(uname)" == "Linux" ]]; then
20 | os="linux"
21 | elif [[ "$(uname)" == "Darwin" ]]; then
22 | os="darwin"
23 | elif [[ "$(uname)" == "MINGW"* ]]; then
24 | os="windows"
25 | else
26 | echo "Unsupported operating system."
27 | exit 1
28 | fi
29 |
30 | # Detect the CPU architecture
31 | arch=$(uname -m)
32 | case $arch in
33 | x86_64)
34 | cpu_arch="amd64"
35 | ;;
36 | armv5*)
37 | cpu_arch="armv5"
38 | ;;
39 | armv6*)
40 | cpu_arch="armv6"
41 | ;;
42 | armv7*)
43 | cpu_arch="armv7"
44 | ;;
45 | aarch64)
46 | cpu_arch="arm64"
47 | ;;
48 | i686)
49 | cpu_arch="386"
50 | ;;
51 | mips64*)
52 | cpu_arch="mips64"
53 | ;;
54 | mips*)
55 | cpu_arch="mips"
56 | ;;
57 | mipsel*)
58 | cpu_arch="mipsle"
59 | ;;
60 | *)
61 | echo "Unsupported CPU architecture."
62 | exit 1
63 | ;;
64 | esac
65 | get_download_url="$base_url/tags/$version"
66 | download_url=$(curl -s "$get_download_url" | grep -Eo "\"browser_download_url\": \".*${os}.*${cpu_arch}.*\"" | awk -F'["]' '{print $4}' | head -n1)
67 |
68 | # Download the binary
69 | echo "Downloading gost version $version... (by $download_url)"
70 | curl -fsSL -o gost.tar.gz "$download_url"
71 |
72 | # Extract and install the binary
73 | echo "Installing gost..."
74 | tar -xzf gost.tar.gz
75 | chmod +x gost
76 | mv gost /usr/local/bin/gost || ( echo "gost not found after install" >&2 ; exit 1 ; )
77 |
78 | echo "gost installation completed!"
79 | }
80 |
81 | # Retrieve available versions from GitHub API
82 | versions=$(curl -s "$base_url" | grep -oP 'tag_name": "\K[^"]+')
83 |
84 | # Check if --install option provided
85 | if [[ "$1" == "--install" ]]; then
86 | # Install the latest version automatically
87 | latest_version=$(echo "$versions" | head -n 1)
88 | install_gost $latest_version
89 | else
90 | # Display available versions to the user
91 | echo "Available gost versions:"
92 | select version in $versions; do
93 | if [[ -n $version ]]; then
94 | install_gost $version
95 | break
96 | else
97 | echo "Invalid choice! Please select a valid option."
98 | fi
99 | done
100 | fi
101 |
--------------------------------------------------------------------------------
/utils/linux_chrome_deb_repo_installer.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # should be runned under root
4 |
5 | INSTALL_ROOT="$1"
6 | CHROME_VERSION="$2"
7 |
8 | mkdir -p "$INSTALL_ROOT"
9 |
10 | curl "https://dl.google.com/linux/linux_signing_key.pub" 2>/dev/null | tee /etc/apt/trusted.gpg.d/google.asc >/dev/null
11 |
12 | touch /etc/apt/sources.list.d/chrome-find-repos.list
13 | echo 'deb [arch=amd64] https://dl.google.com/linux/chrome/deb/ stable main' >>/etc/apt/sources.list.d/chrome-find-repos.list
14 |
15 | apt update -y --no-install-recommends >/dev/null 2>&1
16 | mkdir -p "$INSTALL_ROOT"
17 |
18 | apt list --all-versions 2>/dev/null | grep -E '^(google-chrome-|chromium/)' | \
19 | tr '\t' ' ' >/tmp/available_all_chrome_versions
20 |
21 | ARCH_SYNONYMS="$(arch)"
22 | if [ "$ARCH_SYNONYMS" = "aarch64" -o "$ARCH_SYNONYMS" = "arm64" ] ; then
23 | ARCH_SYNONYMS="aarch64|arm64"
24 | elif [ "$ARCH_SYNONYMS" = "armv7l" -o "$ARCH_SYNONYMS" = "armhf" ] ; then
25 | ARCH_SYNONYMS="armv7l|armhf"
26 | fi
27 |
28 | cat /tmp/available_all_chrome_versions | awk -F' ' '{if($3 ~ /^'"$ARCH_SYNONYMS"'$/){print $0}}' \
29 | >/tmp/available_platform_chrome_versions
30 |
31 | FOUND_VERSION=$(cat /tmp/available_platform_chrome_versions |
32 | awk '{ if ($2 ~ /^'"$(echo "$CHROME_VERSION" | sed 's/[.]/\\\./')"'/) {print $1" "$2} }' |
33 | sed -r 's|(^[^ ]+)/[^ ]+ (.*)$|\1 \2|' | head -n1 | tr ' ' '=')
34 |
35 | if [ "$FOUND_VERSION" = "" ] ; then
36 | echo "Can't find chrome of required version: $CHROME_VERSION , all available versions (for all platforms):" >&2
37 | cat /tmp/available_all_chrome_versions >&2
38 | echo "Version available for your platform ($(arch)):" >&2
39 | cat /tmp/available_platform_chrome_versions >&2
40 | exit 1
41 | fi
42 |
43 | echo "To install package: $FOUND_VERSION"
44 |
45 | apt remove -y "$(echo "$FOUND_VERSION" | awk -F= '{print $1}')" >/dev/null 2>&1
46 |
47 | rm -rf /tmp/chrome_download >/dev/null
48 | mkdir /tmp/chrome_download
49 | pushd /tmp/chrome_download >/dev/null 2>&1
50 |
51 | apt download "$FOUND_VERSION" >/tmp/chrome_install.err 2>&1 || (
52 | echo "Chrome install failed:" >&2 ; cat /tmp/chrome_install.err >&2 ;
53 | echo "Available versions: " >&2 ; cat /tmp/available_platform_chrome_versions >&2 ;
54 | exit 1 ;
55 | ) || exit 1
56 |
57 | CHROME_DEPS=$(find . -type f -exec apt-cache depends {} \; | \
58 | sed -r 's/^<(.*)>$/\1/' | sort -u | grep -E '^chromium-common')
59 |
60 | if [ "$CHROME_DEPS" != "" ] ; then
61 | DEP_VERSION=$(echo "$FOUND_VERSION" | awk -F'=' '{print $2}')
62 | if [ "$DEP_VERSION" != "" ] ; then
63 | DEP_VERSION="=$DEP_VERSION"
64 | fi
65 | INSTALL_CHROME_DEPS=$(echo "$CHROME_DEPS" | tr ' ' '\n' | grep -v -E '^$' | sed -r 's/$/'"$DEP_VERSION"'/' | tr '\n' ' ')
66 | echo "To install package deps: $INSTALL_CHROME_DEPS"
67 | apt download $INSTALL_CHROME_DEPS >>/tmp/chrome_install.err 2>&1 || (
68 | echo "Chrome deps install failed:" >&2 ; cat /tmp/chrome_install.err >&2 ;
69 | echo "Available versions: " >&2 ; cat /tmp/available_platform_chrome_versions >&2 ;
70 | exit 1 ;
71 | ) || exit 1
72 | fi
73 |
74 | find . -type f -exec dpkg-deb -R {} "$INSTALL_ROOT" \;
75 |
76 | popd
77 |
--------------------------------------------------------------------------------
/utils/linux_chrome_archive_installer.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import shutil
4 | import logging
5 | import json
6 | import zipfile
7 | import argparse
8 | from urllib.request import urlretrieve, urlopen
9 |
10 |
11 | def fetch_package(download_url):
12 | return urlretrieve(download_url)[0]
13 |
14 |
15 | def unzip_package(
16 | fp, extract_root='/', unzip_path='/tmp/unzip_chrome',
17 | extract_sub_directory=''
18 | ):
19 | try:
20 | os.unlink(unzip_path)
21 | except (FileNotFoundError, OSError):
22 | pass
23 |
24 | os.makedirs(unzip_path, mode=0o755, exist_ok=True)
25 |
26 | with zipfile.ZipFile(fp, mode="r") as zf:
27 | zf.extractall(unzip_path)
28 |
29 | shutil.copytree(
30 | os.path.join(unzip_path, extract_sub_directory), extract_root,
31 | dirs_exist_ok=True)
32 | shutil.rmtree(unzip_path)
33 |
34 |
35 | def download_and_install(version_prefix = None, install_root = None, arch = 'x86_64'):
36 | # Script can install chrome only on linux platforms and only on x86_64.
37 | # here no archive of versions for linux/arm64
38 | if arch == 'x86_64':
39 | target_platform = "linux64"
40 | else:
41 | raise Exception("Unknown or unsupported platform: " + str(arch))
42 |
43 | chrome_download_url = None
44 | with urlopen(
45 | "https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json"
46 | ) as conn:
47 | response = conn.read().decode()
48 | response_json = json.loads(response)
49 |
50 | # If version is undefined: use max_version
51 | if version_prefix == '':
52 | version_prefix = None
53 |
54 | for version_obj in response_json['versions']:
55 | if ('version' in version_obj and 'downloads' in version_obj and (
56 | version_prefix is None or version_obj['version'].startswith(version_prefix))):
57 | downloads_obj = version_obj['downloads']
58 | if ('chrome' in downloads_obj):
59 | local_chrome_download_url = None
60 |
61 | for platform_obj in downloads_obj['chrome']:
62 | if platform_obj['platform'] == target_platform:
63 | local_chrome_download_url = platform_obj['url']
64 |
65 | if local_chrome_download_url is not None:
66 | chrome_download_url = local_chrome_download_url
67 | if version_prefix is not None:
68 | break
69 |
70 | if chrome_download_url is None:
71 | raise Exception("Can't find download urls")
72 |
73 | print("Download chrome by url: " + str(chrome_download_url), flush=True)
74 | extract_root = install_root if install_root is not None else '/usr/bin/'
75 | unzip_package(
76 | fetch_package(chrome_download_url), extract_root=extract_root,
77 | extract_sub_directory=('chrome-' + target_platform))
78 |
79 | os.chmod(os.path.join(extract_root, 'chrome'), 0o755)
80 | os.chmod(os.path.join(extract_root, 'chrome-wrapper'), 0o755)
81 | os.chmod(os.path.join(extract_root, 'chrome_crashpad_handler'), 0o755)
82 | os.chmod(os.path.join(extract_root, 'chrome_sandbox'), 0o755)
83 |
84 | os.system(
85 | "sed -i 's/Google Chrome for Testing/Google Chrome\\x00for Testing/' " +
86 | str(extract_root) + "/chrome")
87 | return True
88 |
89 |
90 | if __name__ == "__main__":
91 | parser = argparse.ArgumentParser(description='linux_chrome_installer.')
92 | parser.add_argument("-v", "--version-prefix", type=str, default='120.')
93 | parser.add_argument("-i", "--install-root", type=str, default='/usr/bin')
94 | parser.add_argument("--arch", type=str, default='x86_64')
95 | args = parser.parse_args()
96 |
97 | try:
98 | res = download_and_install(
99 | version_prefix = args.version_prefix,
100 | install_root = args.install_root,
101 | arch = args.arch
102 | )
103 | except Exception as e:
104 | logging.error("Can't install chrome: " + str(e))
105 | sys.exit(1)
106 |
--------------------------------------------------------------------------------
/src/yandex_captcha_puzzle_solver/proxy_controller.py:
--------------------------------------------------------------------------------
1 | import typing
2 | import threading
3 | import subprocess
4 | import socket
5 | import logging
6 | import contextlib
7 | import oslex
8 | import jinja2
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | class ProxyController(object):
14 | _proxy_cmd_template: jinja2.Template
15 | _lock: threading.Lock
16 | _proxies_by_url: typing.Dict[str, object] # -> ProxyHolder
17 | _proxies_by_port: typing.Dict[int, object] # -> ProxyHolder
18 |
19 | class PortBusy(Exception):
20 | pass
21 |
22 | class NoPortForListen(Exception):
23 | pass
24 |
25 | class ProxyHolder(object):
26 | _proxy_storage: object # ProxyController
27 | _local_port: int
28 | _url: str
29 | _ref_count: int = 0
30 | _start_wait: threading.Lock
31 | _started: bool = False
32 | _process = None
33 |
34 | # [start_port .. end_port]: localy started proxies will use ports in this interval
35 | def __init__(self, proxy_storage: object, local_port: int, url: str):
36 | self._proxy_storage = proxy_storage
37 | self._start_wait = threading.Lock()
38 | self._local_port = local_port
39 | self._url = url
40 |
41 | def add_ref(self):
42 | # wait start if it in progress
43 | with self._start_wait:
44 | if not self._started:
45 | self._proxy_storage._start_proxy(self)
46 | self._started = True
47 | self._ref_count += 1
48 |
49 | def remove_ref(self):
50 | self._ref_count -= 1
51 | if self._ref_count == 0:
52 | self._proxy_storage._close_proxy(self)
53 |
54 | class ProxyHolderRef(object):
55 | _proxy_holder: object # ProxyController.ProxyHolder
56 |
57 | def __init__(self, proxy_holder: object):
58 | self._proxy_holder = proxy_holder
59 | self._proxy_holder.add_ref()
60 |
61 | def local_port(self):
62 | return self._proxy_holder._local_port
63 |
64 | def url(self):
65 | return self._proxy_holder._url
66 |
67 | def is_alive(self):
68 | return self._proxy_holder._process is not None
69 |
70 | def release(self):
71 | if self._proxy_holder:
72 | self._proxy_holder.remove_ref()
73 | self._proxy_holder = None
74 |
75 | def __enter__(self):
76 | return self
77 |
78 | def __exit__(self, type, value, traceback):
79 | self.release()
80 | return False
81 |
82 | def __del__(self):
83 | self.release()
84 |
85 | def __init__(
86 | self,
87 | start_port=10000,
88 | end_port=20000,
89 | command="gost -L=socks5://127.0.0.1:{{LOCAL_PORT}} -F='{{UPSTREAM_URL}}'"
90 | ):
91 | self._proxy_cmd_template = jinja2.Environment().from_string(command)
92 | self._lock = threading.Lock()
93 | self._proxies_by_url = {}
94 | self._proxies_by_port = {}
95 | self._start_port = start_port
96 | self._end_port = end_port
97 |
98 | def get_proxy(self, url):
99 | new_proxy_holder: ProxyController.ProxyHolder = None
100 |
101 | with self._lock:
102 | if url in self._proxies_by_url:
103 | return ProxyController.ProxyHolderRef(self._proxies_by_url[url])
104 | new_proxy_holder_port = self._choose_port(url)
105 | new_proxy_holder = ProxyController.ProxyHolder(self, new_proxy_holder_port, url)
106 | self._proxies_by_url[url] = new_proxy_holder
107 | self._proxies_by_port[new_proxy_holder_port] = new_proxy_holder
108 |
109 | return ProxyController.ProxyHolderRef(new_proxy_holder)
110 | # < Start/wait start or simple increase ref.
111 |
112 | def opened_proxies_count(self):
113 | return len(self._proxies_by_port)
114 |
115 | @staticmethod
116 | def _port_is_listen(port):
117 | with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
118 | try:
119 | result = sock.connect_ex(("127.0.0.1", port))
120 | return result == 0
121 | except socket.gaierror:
122 | return False
123 |
124 | def _choose_port(self, url):
125 | base_port_offset = hash(url) % (self._end_port - self._start_port + 1)
126 | for port_offset in range(self._end_port - self._start_port + 1):
127 | check_port = self._start_port + (base_port_offset + port_offset) % (
128 | self._end_port - self._start_port + 1)
129 | if check_port in self._proxies_by_port:
130 | continue
131 | if ProxyController._port_is_listen(check_port):
132 | raise ProxyController.PortBusy(
133 | "Port " + str(check_port) + " dedicated for proxy usage is busy.")
134 | return check_port
135 | raise ProxyController.NoPortForListen()
136 |
137 | def _start_proxy(self, proxy_holder):
138 | # Start proxy process
139 | proxy_cmd = self._proxy_cmd_template.render({
140 | 'LOCAL_PORT': str(proxy_holder._local_port),
141 | 'UPSTREAM_URL': proxy_holder._url})
142 | logger.info("Start with: " + str(proxy_cmd))
143 | proxy_holder._process = subprocess.Popen(
144 | oslex.split(proxy_cmd), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
145 |
146 | def _close_proxy(self, proxy_holder):
147 | # Close proxy process
148 | with self._lock:
149 | del self._proxies_by_url[proxy_holder._url]
150 | del self._proxies_by_port[proxy_holder._local_port]
151 | if proxy_holder._process:
152 | logger.info("Close proxy for: " + str(proxy_holder._url))
153 | proxy_holder._process.kill()
154 | proxy_holder._process.wait()
155 | proxy_holder._process = None
156 |
--------------------------------------------------------------------------------
/docker/rootfs/opt/yandex_captcha_puzzle_solver/bin/YandexCaptchaPuzzleSolverRun.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | chrome_diagnostic() {
4 | rm -rf /tmp/chrome_testing_run/
5 | mkdir -p /tmp/chrome_testing_run/user_data
6 | XVFB_OUTPUT_FILE="/tmp/chrome_testing_run/xvfb.out"
7 | CHROME_OUTPUT_FILE="/tmp/chrome_testing_run/chrome.out"
8 | SCREENSHOT_FILE="/tmp/chrome_testing_run/screenshot.png"
9 | USER_DATA_DIR="/tmp/chrome_testing_run/user_data"
10 |
11 | Xvfb :99999 >"$XVFB_OUTPUT_FILE" 2>&1 &
12 | XVFB_PID=$!
13 |
14 | sleep 1
15 | if ! ps -p "$XVFB_PID" > /dev/null; then
16 | echo "Chrome diagnostic failed (Xvfb running)" >&2
17 | cat "$XVFB_OUTPUT_FILE" >&2
18 | return 1
19 | fi
20 |
21 | "$CHROME_BIN" '--remote-allow-origins=*' \
22 | --no-first-run \
23 | --no-service-autorun \
24 | --no-default-browser-check \
25 | --homepage=about:blank \
26 | --no-pings \
27 | --password-store=basic \
28 | --disable-infobars \
29 | --disable-breakpad \
30 | --disable-component-update \
31 | --disable-backgrounding-occluded-windows \
32 | --disable-renderer-backgrounding \
33 | --disable-background-networking \
34 | --disable-dev-shm-usage \
35 | --disable-features=IsolateOrigins,site-per-process \
36 | --disable-session-crashed-bubble \
37 | --disable-search-engine-choice-screen \
38 | --user-data-dir=/tmp/chrome_testing_run/ \
39 | --disable-features=IsolateOrigins,site-per-process \
40 | --disable-session-crashed-bubble \
41 | --no-sandbox \
42 | --remote-debugging-host=127.0.0.1 \
43 | --remote-debugging-port=44444 \
44 | --user-data-dir="$USER_DATA_DIR" \
45 | --timeout=60 \
46 | --window-size=1920,1200 \
47 | --headless \
48 | --screenshot="$SCREENSHOT_FILE" \
49 | "https://www.google.com" \
50 | >"$CHROME_OUTPUT_FILE" 2>&1 &
51 | CHROME_PID=$!
52 |
53 | START_TIME=$(date +%s)
54 | WAIT_TIMEOUT=30
55 | EXIT_CODE=1
56 |
57 | while true
58 | do
59 | CUR_TIME=$(date +%s)
60 | if [[ $((CUR_TIME - START_TIME)) -gt "$WAIT_TIMEOUT" ]]; then
61 | break
62 | fi
63 | if ! ps -p "$CHROME_PID" > /dev/null; then
64 | break
65 | fi
66 | if [ -f "$SCREENSHOT_FILE" ]; then
67 | EXIT_CODE=0
68 | break
69 | fi
70 | sleep 1
71 | done
72 |
73 | if [ -f "$SCREENSHOT_FILE" ]; then
74 | EXIT_CODE=0
75 | fi
76 |
77 | if [[ $EXIT_CODE == 0 ]]
78 | then
79 | echo "Chrome diagnostic success"
80 | else
81 | echo "Chrome diagnostic failed (chrome running)" >&2
82 | cat "$CHROME_OUTPUT_FILE" >&2
83 | fi
84 |
85 | kill "$CHROME_PID" 2>/dev/null
86 | wait "$CHROME_PID"
87 |
88 | kill "$XVFB_PID" 2>/dev/null
89 | wait "$XVFB_PID"
90 |
91 | return $EXIT_CODE
92 | }
93 |
94 | set -o pipefail
95 |
96 | CURRENT_UID=$(id -u)
97 | CURRENT_GID=$(id -g)
98 |
99 | export IN_DOCKER=true
100 | export WORKSPACE_ROOT=/opt/yandex_captcha_puzzle_solver/var/
101 | export PYTHONPATH=$PYTHONPATH:/opt/yandex_captcha_puzzle_solver/lib/
102 | CHROME_BIN=$(which chrome || which chromium)
103 |
104 | if [ "$CHROME_BIN" = "" ] ; then
105 | echo "Can't find chrome executable" >&2
106 | exit 1
107 | fi
108 |
109 | sudo -n find "$WORKSPACE_ROOT" -exec chown "$CURRENT_UID:$CURRENT_GID" {} \;
110 | mkdir -p "$WORKSPACE_ROOT/log"
111 |
112 | # Non critical - simple make chrome happy and disable some its errors.
113 | # Start dbus for exclude chrome errors:
114 | # Failed to connect to the bus: Failed to connect to socket /run/dbus/system_bus_socket: No such file or directory
115 | # Failed to connect to the bus: Could not parse server address: Unknown address type
116 | XDG_RUNTIME_DIR=/run/xdg/
117 | sudo bash -c "
118 | sudo service dbus start
119 | mkdir -p '$XDG_RUNTIME_DIR'
120 | chmod 700 '$XDG_RUNTIME_DIR'
121 | chown '$(id -un):$(id -gn)' '$XDG_RUNTIME_DIR'"
122 | DBUS_SESSION_BUS_ADDRESS="unix:path=$XDG_RUNTIME_DIR/bus"
123 | dbus-daemon --session --address="$DBUS_SESSION_BUS_ADDRESS" --nofork --nopidfile --syslog-only &
124 |
125 | # Run diagnostic if required
126 | if [ "$CHECK_SYSTEM" = true ] ; then
127 | chrome_diagnostic || exit 1
128 | fi
129 |
130 | # Start grounding server - web server, that will fill fake captcha form.
131 | python3 /opt/yandex_captcha_puzzle_solver/bin/grounding_server/grounding_server.py \
132 | --port=9001 \
133 | --page-template=/opt/yandex_captcha_puzzle_solver/etc/html_templates/index.html.j2 \
134 | --form-page-template=/opt/yandex_captcha_puzzle_solver/etc/html_templates/form.html.j2 \
135 | >"$WORKSPACE_ROOT/log/grounding_server.log" 2>&1 &
136 |
137 | # Up default proxy, that will be used for solve without proxy defined in request.
138 | bash /opt/yandex_captcha_puzzle_solver/bin/YandexCaptchaPuzzleSolverProxyRun.sh \
139 | 10000 "http://localhost:9001" "" "$WORKSPACE_ROOT/log/" \
140 | >"$WORKSPACE_ROOT/log/yandex_proxy_run.out" 2>&1 &
141 |
142 | # Run service
143 | ADD_PARAMS=""
144 | if [ "$CHROME_DISABLE_GPU" = true ] ; then
145 | ADD_PARAMS="$ADD_PARAMS --disable-gpu"
146 | fi
147 |
148 | if [ "$VERBOSE" = true ] ; then
149 | ADD_PARAMS="$ADD_PARAMS --verbose"
150 | fi
151 |
152 | if [ "$DEBUG" = true ] ; then
153 | mkdir -p "$WORKSPACE_ROOT/debug"
154 | ADD_PARAMS="$ADD_PARAMS --debug-dir=$WORKSPACE_ROOT/debug"
155 | fi
156 |
157 | echo "Run server $(pip show yandex-captcha-puzzle-solver | grep Version | awk '{print $2}'
158 | ), chrome: $("$CHROME_BIN" --version)"
159 |
160 | yandex_captcha_puzzle_solve_server \
161 | -b 0.0.0.0:8080 \
162 | --proxy http://127.0.0.1:10000 \
163 | --proxy-listen-start-port 10001 \
164 | --proxy-listen-end-port 20000 \
165 | --proxy-command 'bash /opt/yandex_captcha_puzzle_solver/bin/YandexCaptchaPuzzleSolverProxyRun.sh {{LOCAL_PORT}} "http://localhost:9001" "{{UPSTREAM_URL}}"' \
166 | $ADD_PARAMS \
167 | 2>&1 | \
168 | tee "$WORKSPACE_ROOT/log/yandex_captcha_puzzle_solver.log"
169 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG PYTHON_VERSION=3.11
2 |
3 | FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
4 |
5 | ARG CHROME_VERSION=""
6 |
7 | WORKDIR /app/
8 |
9 | ENV PACKAGES_DIR=/packages
10 |
11 | # Build dummy packages to skip installing them and their dependencies
12 | RUN mkdir -p "${PACKAGES_DIR}" \
13 | && apt-get update \
14 | && apt-get install -y --no-install-recommends equivs \
15 | && equivs-control libgl1-mesa-dri \
16 | && printf 'Section: misc\nPriority: optional\nStandards-Version: 3.9.2\nPackage: libgl1-mesa-dri\nVersion: 99.0.0\nDescription: Dummy package for libgl1-mesa-dri\n' >> libgl1-mesa-dri \
17 | && equivs-build libgl1-mesa-dri \
18 | && mv libgl1-mesa-dri_*.deb ${PACKAGES_DIR}/libgl1-mesa-dri.deb \
19 | && equivs-control adwaita-icon-theme \
20 | && printf 'Section: misc\nPriority: optional\nStandards-Version: 3.9.2\nPackage: adwaita-icon-theme\nVersion: 99.0.0\nDescription: Dummy package for adwaita-icon-theme\n' >> adwaita-icon-theme \
21 | && equivs-build adwaita-icon-theme \
22 | && mv adwaita-icon-theme_*.deb ${PACKAGES_DIR}/adwaita-icon-theme.deb
23 |
24 | # Install gost proxy (for process requests with proxy, that require authorization)
25 | RUN apt-get install -y --no-install-recommends curl # gost-install.sh requirement
26 | COPY utils/gost-install.sh ./gost-install.sh
27 | RUN chmod +x ./gost-install.sh && bash -c "./gost-install.sh --install"
28 |
29 | COPY utils/linux_chrome_archive_installer.py ./linux_chrome_archive_installer.py
30 | COPY utils/linux_chrome_deb_repo_installer.sh ./linux_chrome_deb_repo_installer.sh
31 |
32 | # If CHROME_VERSION ins't defined obviously use tested version by platform.
33 | RUN if [ "$CHROME_VERSION" = "" ] ; then \
34 | BUILD_ARCH="$(arch)" ; \
35 | if [ "$BUILD_ARCH" = "arm64" ] ; then echo 'CHROME_VERSION="120."' >>/tmp/build.env ; \
36 | elif [ "$BUILD_ARCH" = "aarch64" -o "$BUILD_ARCH" = "armv7l" ] ; then echo 'CHROME_VERSION="130."' >>/tmp/build.env ; \
37 | else echo 'CHROME_VERSION="131."' >>/tmp/build.env ; \
38 | fi ; \
39 | else echo 'CHROME_VERSION="'"$CHROME_VERSION"'"' >>/tmp/build.env ; \
40 | fi
41 |
42 | # We prefer version from archive, because it is more productive (faster start),
43 | # but for ARM's here no available versions in archive
44 | RUN . /tmp/build.env ; if [ "$(arch)" != "x86_64" ] ; then \
45 | echo "To install chrome($CHROME_VERSION) from google repository (no archive versions for ARM)" ; \
46 | chmod +x ./linux_chrome_deb_repo_installer.sh ; \
47 | bash -c "./linux_chrome_deb_repo_installer.sh /opt/yandex_captcha_puzzle_solver/installed_chrome/ '$CHROME_VERSION'" || \
48 | { echo "Can't install chrome (required version '$CHROME_VERSION')" >&2 ; exit 1 ; } ; \
49 | else \
50 | echo "To install chrome($CHROME_VERSION) from archive" ; \
51 | mkdir -p /opt/yandex_captcha_puzzle_solver/installed_chrome/usr/bin/ ; \
52 | python3 ./linux_chrome_archive_installer.py \
53 | --version-prefix="$CHROME_VERSION" \
54 | --install-root=/opt/yandex_captcha_puzzle_solver/installed_chrome/usr/bin/ \
55 | --arch=$(arch) || \
56 | { echo "Can't install chrome (required version '$CHROME_VERSION')" >&2 ; exit 1 ; } ; \
57 | fi
58 |
59 |
60 | FROM python:${PYTHON_VERSION}-slim-bookworm
61 |
62 | ARG UID=1111
63 | ARG GID=0
64 | ARG UNAME=yandex_captcha_puzzle_solver
65 | ARG CHECK_SYSTEM=false
66 | ARG CHROME_DISABLE_GPU=false
67 |
68 | ENV PACKAGES_DIR=/packages
69 | ENV CHECK_SYSTEM=${CHECK_SYSTEM}
70 | ENV CHROME_DISABLE_GPU=${CHROME_DISABLE_GPU}
71 | ENV DEBUG=false
72 | ENV VERBOSE=false
73 | ENV PYTHONPATH=/usr/lib/python3/dist-packages/
74 |
75 | # Copy dummy packages
76 | COPY --from=builder ${PACKAGES_DIR} ${PACKAGES_DIR}
77 | COPY --from=builder /usr/local/bin/gost /usr/local/bin/gost
78 |
79 | # Copy installed chrome
80 | COPY --from=builder /opt/yandex_captcha_puzzle_solver/installed_chrome /
81 |
82 | # Install dependencies and create user
83 | # You can test Chromium running this command inside the container:
84 | # xvfb-run -s "-screen 0 1600x1200x24" chromium --no-sandbox
85 | # The error traces is like this: "*** stack smashing detected ***: terminated"
86 | # To check the package versions available you can use this command:
87 | # apt-cache madison chromium
88 |
89 | # Install dummy packages
90 | RUN dpkg -i ${PACKAGES_DIR}/*.deb \
91 | # Install dependencies
92 | && apt-get update \
93 | && apt-get install -y --no-install-recommends \
94 | $(apt-cache depends chromium | grep Depends | sed "s/.*ends:\ //" | grep -v -E '^<.*>$' | tr '\n' ' ') \
95 | && apt-get install -y --no-install-recommends \
96 | xvfb dumb-init procps curl vim xauth sudo git \
97 | # Remove temporary files and hardware decoding libraries
98 | && rm -rf /var/lib/apt/lists/* \
99 | && find /usr/lib/ -type f -name 'libmfxhw*' -delete \
100 | && find /usr/lib/ -type d -name mfx -exec rm -rf {} \; \
101 | && mkdir -p /app/bin/
102 |
103 | RUN mkdir -p "/app/.config/chromium/Crash Reports/pending"
104 |
105 | RUN if [ "$UID" -ne 0 ] ; then echo '%sudo ALL=(ALL:ALL) NOPASSWD:ALL' >/etc/sudoers.d/nopasswd \
106 | && adduser --disabled-password --gecos '' --uid "${UID}" --gid "${GID}" --shell /bin/bash ${UNAME} \
107 | && adduser ${UNAME} sudo \
108 | && chown -R ${UNAME} /app/ \
109 | && mkdir -p /opt/yandex_captcha_puzzle_solver/var/ \
110 | && chown -R ${UNAME} /opt/yandex_captcha_puzzle_solver/var/ ; \
111 | fi
112 |
113 | WORKDIR /app
114 |
115 | RUN apt-get update && \
116 | apt install -y --no-install-recommends python3-opencv python3-numpy python3-cffi
117 |
118 | COPY . yandex_captcha_puzzle_solver
119 | RUN ADDITIONAL_PYTHONPATH="$PYTHONPATH" pip install --prefer-binary yandex_captcha_puzzle_solver/
120 |
121 | COPY src/grounding_server /opt/yandex_captcha_puzzle_solver/bin/grounding_server
122 | COPY src/mitm_addons /opt/yandex_captcha_puzzle_solver/lib/mitm_addons
123 | COPY docker/rootfs /
124 | COPY docker/requirements.txt /app/
125 | RUN pip install -r /app/requirements.txt
126 |
127 | USER ${UID}
128 |
129 | # dumb-init avoids zombie chromium processes
130 | ENTRYPOINT ["/usr/bin/dumb-init", "--"]
131 | CMD ["/bin/bash", "-c", "/opt/yandex_captcha_puzzle_solver/bin/YandexCaptchaPuzzleSolverRun.sh"]
132 |
--------------------------------------------------------------------------------
/.github/workflows/docker-publish.yml:
--------------------------------------------------------------------------------
1 | name: Docker
2 |
3 | # This workflow uses actions that are not certified by GitHub.
4 | # They are provided by a third-party and are governed by
5 | # separate terms of service, privacy policy, and support
6 | # documentation.
7 |
8 | on:
9 | push:
10 | branches: [ "main" ]
11 | # Publish semver tags as releases.
12 | tags: [ 'v*.*.*' ]
13 | workflow_dispatch:
14 |
15 | env:
16 | # Use docker.io for Docker Hub if empty
17 | REGISTRY: ghcr.io
18 | # github.repository as /
19 | IMAGE_NAME: ${{ github.repository }}
20 |
21 |
22 | jobs:
23 | build:
24 | strategy:
25 | fail-fast: false
26 | matrix:
27 | include:
28 | - builder: ubuntu-latest
29 | platform: linux/x86_64
30 | - builder: ubuntu-latest
31 | platform: linux/amd64
32 | - builder: ubuntu-latest
33 | platform: linux/arm64
34 | - builder: ubuntu-latest
35 | platform: linux/aarch64
36 | - builder: ubuntu-latest
37 | platform: linux/arm/v7
38 | - builder: ubuntu-latest
39 | platform: linux/arm/v8
40 | #- builder: ubuntu-latest # No chrome
41 | # platform: linux/arm/v6
42 | #- builder: ubuntu-latest # No gost, no chrome
43 | # platform: linux/s390x
44 | #- builder: ubuntu-latest # No gost, custom chrome
45 | # platform: linux/ppc64le
46 |
47 | runs-on: ${{ matrix.builder }}
48 |
49 | permissions:
50 | contents: read
51 | packages: write
52 | id-token: write
53 |
54 | steps:
55 | - name: Prepare
56 | run: |
57 | platform=${{ matrix.platform }}
58 | echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
59 |
60 | - name: Checkout repository
61 | uses: actions/checkout@v4
62 |
63 | # Install the cosign tool except on PR
64 | # https://github.com/sigstore/cosign-installer
65 | - name: Install cosign
66 | uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 #v3.5.0
67 | with:
68 | cosign-release: 'v2.2.4'
69 |
70 | - name: Set up QEMU
71 | uses: docker/setup-qemu-action@v3
72 |
73 | # Set up BuildKit Docker container builder to be able to build
74 | # multi-platform images and export cache
75 | # https://github.com/docker/setup-buildx-action
76 | - name: Set up Docker Buildx
77 | uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0
78 |
79 | # Login to docker hub for push only digest packages
80 | - name: Login to dockerhub
81 | uses: docker/login-action@v3
82 | with:
83 | registry: ${{ env.REGISTRY }}
84 | username: ${{ github.actor }}
85 | password: ${{ secrets.GITHUB_TOKEN }}
86 |
87 | # Extract metadata (tags, labels) for Docker
88 | # https://github.com/docker/metadata-action
89 | - name: Extract Docker metadata
90 | id: meta
91 | uses: docker/metadata-action@v5
92 | with:
93 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
94 |
95 | - name: Build and push by digest
96 | id: build
97 | uses: docker/build-push-action@v6
98 | with:
99 | context: .
100 | platforms: ${{ matrix.platform }}
101 | labels: ${{ steps.meta.outputs.labels }}
102 | outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
103 |
104 | - name: Export digest
105 | run: |
106 | mkdir -p /tmp/digests
107 | digest="${{ steps.build.outputs.digest }}"
108 | touch "/tmp/digests/${digest#sha256:}"
109 |
110 | - name: Upload digest
111 | uses: actions/upload-artifact@v4
112 | with:
113 | name: digests-${{ env.PLATFORM_PAIR }}
114 | path: /tmp/digests/*
115 | if-no-files-found: error
116 | retention-days: 1
117 |
118 | # merge images to one multi-platform image
119 | merge:
120 | needs:
121 | - build
122 |
123 | runs-on: ubuntu-latest
124 |
125 | permissions:
126 | contents: read
127 | packages: write
128 | id-token: write
129 |
130 | steps:
131 | - name: Download digests
132 | uses: actions/download-artifact@v4
133 | with:
134 | path: /tmp/digests
135 | pattern: digests-*
136 | merge-multiple: true
137 |
138 | - name: Set up Docker Buildx
139 | uses: docker/setup-buildx-action@v3
140 |
141 | - name: Docker meta
142 | id: meta
143 | uses: docker/metadata-action@v5
144 | with:
145 | images: ${{ env.IMAGE_NAME }}
146 | tags: |
147 | type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
148 | type=ref,enable=true,priority=600,prefix=,suffix=,event=tag
149 | type=ref,enable=true,priority=600,prefix=,suffix=,event=branch
150 | labels: |
151 | org.opencontainers.image.title=yandex-captcha-puzzle-solver
152 | org.opencontainers.image.description=Yandex Captcha Puzzle Solve Server
153 | org.opencontainers.image.vendor=yuri.kuznecov@gmail.com
154 |
155 | - name: Login to dockerhub
156 | uses: docker/login-action@v3
157 | with:
158 | registry: ${{ env.REGISTRY }}
159 | username: ${{ github.actor }}
160 | password: ${{ secrets.GITHUB_TOKEN }}
161 |
162 | - name: Create manifest list and push
163 | working-directory: /tmp/digests
164 | run: |
165 | docker buildx imagetools create $(jq -cr '.tags | map("-t ${{ env.REGISTRY }}/" + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
166 | $(printf '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@sha256:%s ' *)
167 |
168 | - name: Inspect image
169 | run: |
170 | set -e
171 | set -o pipefail
172 | echo "${{ steps.meta.outputs.tags }}" | sed -r 's/[ ]+/ /g' | tr ' ' '\n' | sed -r 's|^|${{ env.REGISTRY }}/|' | \
173 | while read IMAGE_NAME ; do \
174 | docker buildx imagetools inspect "$IMAGE_NAME" || exit 1 ; \
175 | done
176 |
177 | # TODO: publish to docker.io
178 |
--------------------------------------------------------------------------------
/src/yandex_captcha_puzzle_solver/browser_wrapper.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import typing
4 | import traceback
5 | import asyncio
6 | import uuid
7 | import http.cookiejar
8 | import shutil
9 | import logging
10 | import numpy as np
11 |
12 | import cv2
13 |
14 | import zendriver_flare_bypasser as zendriver
15 |
16 | XVFB_DISPLAY = None
17 |
18 | logger = logging.getLogger(__name__)
19 |
20 |
21 | class Rect(object):
22 | left: int
23 | top: int
24 | width: int
25 | height: int
26 |
27 |
28 | """
29 | Trivial wrapper for browser (driver).
30 | Allow to localize driver operations implementation and requirements,
31 | and simplify migration to other driver.
32 | """
33 |
34 |
35 | class BrowserWrapper(object):
36 | _zendriver_driver: zendriver.Browser = None
37 | _page = None
38 |
39 | class FakePosition(object):
40 | center = None
41 |
42 | def __init__(self, center):
43 | self.center = tuple(float(x) for x in center)
44 |
45 | class FakeNode(object):
46 | attributes = None
47 |
48 | class FakeElement(zendriver.Element):
49 | _position = None
50 |
51 | def __init__(self, page: zendriver.Tab, center_coords):
52 | super(BrowserWrapper.FakeElement, self).__init__(
53 | BrowserWrapper.FakeNode(), # zendriver.cdp.dom.Node
54 | page # zendriver.Tab
55 | )
56 | self._position = BrowserWrapper.FakePosition(center_coords)
57 |
58 | def _make_attrs(self): # override for exclude exception on __init__
59 | pass
60 |
61 | # overrides for call only cdp click send in zendriver.Element.mouse_click
62 | async def get_position(self):
63 | return self._position
64 |
65 | async def flash(self, duration: typing.Union[float, int] = 0.5):
66 | pass
67 |
68 | def __init__(self, zendriver_driver: zendriver.Browser, user_data_dir: str = None):
69 | self._zendriver_driver = zendriver_driver
70 | self._user_data_dir = user_data_dir
71 |
72 | def __del__(self):
73 | if self._user_data_dir:
74 | shutil.rmtree(self._user_data_dir, ignore_errors=True)
75 |
76 | @staticmethod
77 | def start_xvfb_display():
78 | if sys.platform != 'win32':
79 | global XVFB_DISPLAY
80 | if XVFB_DISPLAY is None:
81 | from xvfbwrapper import Xvfb
82 | XVFB_DISPLAY = Xvfb()
83 | XVFB_DISPLAY.start()
84 |
85 | @staticmethod
86 | async def create(proxy = None, disable_gpu = False):
87 | user_data_dir = os.path.join("/tmp", str(uuid.uuid4())) # < Each created chrome should be isolated.
88 | BrowserWrapper.start_xvfb_display()
89 | browser_args = []
90 | if proxy:
91 | browser_args.append("--proxy-server=" + proxy)
92 | if disable_gpu:
93 | browser_args += [
94 | "--disable-gpu",
95 | "--disable-software-rasterizer"
96 | ]
97 | if sys.platform == 'win32':
98 | browser_args += ["--headless"]
99 |
100 | browser_args += ["--user-data-dir=" + user_data_dir]
101 | browser_args += ["--ignore-certificate-errors", "--ignore-urlfetcher-cert-requests"]
102 |
103 | try:
104 | zendriver_driver = await zendriver.start(
105 | sandbox=False,
106 | browser_args=browser_args
107 | )
108 | return BrowserWrapper(zendriver_driver, user_data_dir = user_data_dir)
109 | finally:
110 | shutil.rmtree(user_data_dir, ignore_errors=True)
111 |
112 | # Get original driver page impl - can be used only in user command specific implementations
113 | def get_driver(self):
114 | return self._page
115 |
116 | async def size(self):
117 | image = await self.get_screenshot()
118 | image_height, image_width, _ = image.shape
119 | return image_width, image_height
120 |
121 | async def get_outputs(self):
122 | try:
123 | stdout_bytes, stderr_bytes = await self._zendriver_driver.communicate()
124 | return [stdout_bytes, stderr_bytes]
125 | except Exception:
126 | return None
127 |
128 | async def current_url(self):
129 | return self._page.url
130 |
131 | async def close(self):
132 | self._page = None
133 | if self._zendriver_driver:
134 | await self._zendriver_driver.stop()
135 | if self._user_data_dir:
136 | shutil.rmtree(self._user_data_dir, ignore_errors=True)
137 | self._user_data_dir = None
138 |
139 | async def select_text(self, css_selector):
140 | try:
141 | res = await self._page.select(css_selector, timeout=0)
142 | return res.text
143 | except asyncio.TimeoutError:
144 | return None
145 |
146 | async def select_count(self, css_selector):
147 | try:
148 | return len(await self._page.select_all(css_selector, timeout=0)) # Select without waiting.
149 | except asyncio.TimeoutError:
150 | return 0
151 |
152 | async def get(self, url):
153 | # we work only with one page - close all tabs (excluding first - this close browser)
154 | for tab_i, tab in enumerate(self._zendriver_driver.tabs):
155 | if tab_i > 0:
156 | await tab.close()
157 | self._page = await self._zendriver_driver.get(url)
158 |
159 | async def click(self, css_selector):
160 | try:
161 | element = await self._page.select(css_selector, timeout=0)
162 | except asyncio.TimeoutError:
163 | return False
164 | await element.click()
165 | return True
166 |
167 | async def click_coords(self, coords):
168 | # Specific workaround for zendriver
169 | # click by coordinates without no driver patching.
170 | step = "start"
171 | try:
172 | fake_node = BrowserWrapper.FakeElement(self._page, coords)
173 | step = "mouse_click"
174 | await fake_node.mouse_click()
175 | except Exception as e:
176 | print("EXCEPTION on click_coords '" + step + "': " + str(e))
177 | raise
178 |
179 | async def mouse_down(self):
180 | try:
181 | await self._page.mouse.down()
182 | except Exception as e:
183 | print("EXCEPTION on mouse_down: " + str(e) + ":\n" + traceback.format_exc())
184 | raise
185 |
186 | async def mouse_up(self):
187 | try:
188 | await self._page.mouse.up()
189 | except Exception as e:
190 | print("EXCEPTION on mouse_up: " + str(e))
191 | raise
192 |
193 | async def mouse_move(self, coords):
194 | try:
195 | await self._page.mouse.move(coords[0], coords[1])
196 | except Exception as e:
197 | print("EXCEPTION on mouse_move: " + str(e))
198 | raise
199 |
200 | async def get_user_agent(self):
201 | return await self._page.evaluate("window.navigator.userAgent")
202 |
203 | async def get_dom(self):
204 | res_dom = await self._page.get_content()
205 | return (res_dom if res_dom is not None else "") # zendriver return None sometimes (on error)
206 |
207 | async def get_element_screenshot(self, css_selector) -> tuple[np.array, Rect]:
208 | # < Return screenshot as cv2 image (numpy array)
209 | tmp_file_path = None
210 |
211 | try:
212 | try:
213 | element = await self._page.select(css_selector, timeout=0)
214 | except asyncio.TimeoutError:
215 | return (None, None)
216 |
217 | if element is None:
218 | return (None, None)
219 |
220 | try:
221 | logger.info("To get position for '" + css_selector + "'")
222 | pos = await element.get_position() # abs=True don't works
223 | finally:
224 | logger.info("From get position for '" + css_selector + "'")
225 | rect = Rect()
226 | rect.left = pos.abs_x
227 | rect.top = pos.abs_y
228 | rect.width = pos.width
229 | rect.height = pos.height
230 |
231 | try:
232 | if tmp_file_path is None:
233 | tmp_file_path = os.path.join("/tmp", str(uuid.uuid4()) + ".jpg")
234 | await element.save_screenshot(tmp_file_path)
235 | return cv2.imread(tmp_file_path), rect
236 | except Exception as e:
237 | # return None for:
238 | # "not finished loading yet"
239 | # "Cannot take screenshot with 0 height." - elements isn't loaded yet.
240 | # "Could not find object with given id" - element dissappeared, js on page changed DOM.
241 | #
242 | msg = str(e).lower()
243 | if (
244 | "not finished loading yet" not in msg and
245 | "cannot take screenshot " not in msg and
246 | "could not find object" not in msg and
247 | "could not find position" not in msg
248 | ):
249 | raise
250 | return (None, None)
251 | finally:
252 | if tmp_file_path is not None and os.path.exists(tmp_file_path):
253 | os.remove(tmp_file_path)
254 |
255 | async def get_screenshot(self): # Return screenshot as cv2 image (numpy array)
256 | tmp_file_path = None
257 | try:
258 | while True:
259 | try:
260 | tmp_file_path = os.path.join("/tmp", str(uuid.uuid4()) + ".jpg")
261 | await self._page.save_screenshot(tmp_file_path)
262 | return cv2.imread(tmp_file_path)
263 | except zendriver.core.connection.ProtocolException as e:
264 | if "not finished loading yet" not in str(e):
265 | raise
266 | await asyncio.sleep(1)
267 | finally:
268 | if tmp_file_path is not None and os.path.exists(tmp_file_path):
269 | os.remove(tmp_file_path)
270 |
271 | async def save_screenshot(self, image_path):
272 | while True:
273 | try:
274 | await self._page.save_screenshot(image_path)
275 | return
276 | except zendriver.core.connection.ProtocolException as e:
277 | if "not finished loading yet" not in str(e):
278 | raise
279 | await asyncio.sleep(1)
280 |
281 | async def set_cookies(self, cookies: list[dict]):
282 | # convert {"name": "...", "value": "...", ...} to array of http.cookiejar.Cookie
283 | cookie_jar = http.cookiejar.CookieJar()
284 | for c in cookies:
285 | # TO CHECK, that all fields filled correctly.
286 | cookie_jar.set_cookie(http.cookiejar.Cookie(
287 | None, # version
288 | c.get('name', None),
289 | c.get('value', None),
290 | c.get('port', 443),
291 | None, # port_specified
292 | c.get('domain', None),
293 | None, # domain_specified
294 | None, # domain_initial_dot
295 | c.get('path', '/'),
296 | None, # path_specified
297 | c.get('secure', False),
298 | c.get('expires', None), # < here expected float seconds since epoch time.
299 | None, # discard
300 | None, # comment
301 | None, # comment_url
302 | None # rest
303 | ))
304 | await self._zendriver_driver.cookies.set_all(cookie_jar)
305 |
306 | async def get_cookies(self) -> list[dict]:
307 | # return list of dict have format: {"name": "...", "value": "..."}
308 | zendriver_cookies = await self._zendriver_driver.cookies.get_all(requests_cookie_format=True)
309 | res = []
310 | # convert array of http.cookiejar.Cookie to expected cookie format
311 | for cookie in zendriver_cookies:
312 | res.append({
313 | "name": cookie.name,
314 | "value": cookie.value,
315 | "port": cookie.port,
316 | "domain": cookie.domain,
317 | "path": cookie.path,
318 | "secure": cookie.secure
319 | })
320 | return res
321 |
--------------------------------------------------------------------------------
/src/yandex_captcha_puzzle_solver/yandex_captcha_puzzle_solve_server.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import re
4 | import typing
5 | import typing_extensions
6 | import datetime
7 | import copy
8 | import platform
9 | import uuid
10 | import pathlib
11 | import traceback
12 | import logging
13 | import argparse
14 | import urllib3.util
15 | import fastapi
16 | import pydantic
17 |
18 | import yandex_captcha_puzzle_solver
19 |
20 | logger = logging.getLogger(__name__)
21 |
22 | USE_GUNICORN = (
23 | sys.platform not in ['win32', 'cygwin'] and 'YANDEX_SOLVER_USE_UVICORN' not in os.environ
24 | )
25 |
26 | if USE_GUNICORN:
27 | import gunicorn.app.wsgiapp
28 | else:
29 | import uvicorn.main
30 |
31 | # Remove requirement for Content-Type header presence.
32 |
33 |
34 | class RemoveContentTypeRequirementMiddleware(object):
35 | def __init__(self, app):
36 | self._app = app
37 |
38 | async def __call__(self, scope, receive, send):
39 | headers = scope["headers"]
40 | content_type_found = False
41 | for header_index, header in enumerate(headers):
42 | if not isinstance(header, tuple) or len(header) != 2:
43 | # Unexpected headers format - don't make something.
44 | content_type_found = True
45 | break
46 | if header[0].decode('utf-8').lower() == 'content-type':
47 | headers[header_index] = (b'content-type', b'application/json')
48 | content_type_found = True
49 | break
50 | if not content_type_found:
51 | headers.append((b'content-type', b'application/json'))
52 |
53 | return await self._app(scope, receive, send)
54 |
55 |
56 | server = fastapi.FastAPI(
57 | openapi_url='/docs/openapi.json',
58 | docs_url='/docs',
59 | swagger_ui_parameters={"defaultModelsExpandDepth": -1},
60 | tags_metadata=[]
61 | )
62 |
63 | server.add_middleware(RemoveContentTypeRequirementMiddleware)
64 |
65 | PROXY_ANNOTATION = """Proxy in format: ://(:@)?: .
66 | Examples: socks5://1.1.1.1:2000, http://user:password@1.1.1.1:8080.
67 | For yandex solver compatibility allowed format:
68 | {"url": "://:", "username": "", "port": ""}
69 | If you use proxy with authorization and use yandex-captcha-puzzle-solver as package, please,
70 | read instructions - need to install gost."""
71 |
72 | solver_args = {
73 | 'proxy_controller': None,
74 | 'disable_gpu': False,
75 | 'debug_dir': None
76 | }
77 |
78 |
79 | class ProxyModel(pydantic.BaseModel):
80 | url: str = pydantic.Field(default=None, description='Proxy url')
81 | username: str = pydantic.Field(default=None, description='Proxy authorization username')
82 | password: str = pydantic.Field(default=None, description='Proxy authorization password')
83 |
84 |
85 | class CookieModel(pydantic.BaseModel):
86 | name: str = pydantic.Field(description='Cookie name')
87 | value: str = pydantic.Field(description='Cookie value (empty string if no value)')
88 | domain: str = pydantic.Field(description='Cookie domain') # < Is required - we don't allow super cookies usage.
89 | port: typing.Optional[int] = pydantic.Field(default=None, description='Cookie port')
90 | path: typing.Optional[str] = pydantic.Field(default='/', description='Cookie path')
91 | secure: typing.Optional[bool] = pydantic.Field(default=True, description='Cookie is secure')
92 | expires: typing.Optional[int] = pydantic.Field(
93 | default=None, description='Cookie expire time in seconds after epoch start'
94 | )
95 |
96 |
97 | class HandleCommandResponseSolution(pydantic.BaseModel):
98 | status: str
99 | url: str
100 | cookies: list[CookieModel] = pydantic.Field(default=[], description='Cookies got after solving')
101 | user_agent: typing.Optional[str] = None
102 | token: typing.Optional[str] = None
103 |
104 |
105 | class HandleCommandResponse(pydantic.BaseModel):
106 | status: str
107 | message: str
108 | startTimestamp: float
109 | endTimestamp: float
110 | solution: typing.Optional[HandleCommandResponseSolution] = None
111 |
112 |
113 | async def process_solve_request(
114 | url: str,
115 | yandex_key: str,
116 | cookies: list[CookieModel] = None,
117 | max_timeout: int = None, # in msec.
118 | proxy: typing.Union[str, ProxyModel] = None,
119 | ):
120 | start_timestamp = datetime.datetime.timestamp(datetime.datetime.now())
121 |
122 | # Adapt proxy format for canonical representation.
123 | if proxy is not None and not isinstance(proxy, str):
124 | if proxy.url is not None:
125 | parsed_proxy = urllib3.util.parse_url(proxy.url)
126 | proxy = (
127 | parsed_proxy.scheme + "://" +
128 | (
129 | proxy.username + ":" + (proxy.password if proxy.password else '') + '@'
130 | if proxy.username else ''
131 | ) +
132 | parsed_proxy.hostname +
133 | (":" + str(parsed_proxy.port) if parsed_proxy.port else '')
134 | )
135 | else:
136 | proxy = None
137 |
138 | try:
139 | solve_request = yandex_captcha_puzzle_solver.Request()
140 | solve_request.yandex_key = yandex_key
141 | solve_request.url = url
142 | solve_request.cookies = [
143 | (cookie if isinstance(cookie, dict) else cookie.__dict__)
144 | for cookie in cookies
145 | ] if cookies else []
146 | solve_request.max_timeout = max_timeout * 1.0 / 1000
147 | solve_request.proxy = proxy
148 |
149 | global solver_args
150 | local_solver_args = copy.copy(solver_args)
151 | if local_solver_args['debug_dir']:
152 | debug_dir = os.path.join(local_solver_args['debug_dir'], str(uuid.uuid4()))
153 | pathlib.Path(debug_dir).mkdir(parents=True, exist_ok=True)
154 | local_solver_args['debug_dir'] = debug_dir
155 | solver = yandex_captcha_puzzle_solver.Solver(
156 | **local_solver_args)
157 | solve_response = await solver.solve(solve_request)
158 |
159 | return HandleCommandResponse(
160 | status="ok",
161 | message=solve_response.message,
162 | startTimestamp=start_timestamp,
163 | endTimestamp=datetime.datetime.timestamp(datetime.datetime.now()),
164 | solution=HandleCommandResponseSolution(
165 | status="ok",
166 | url=solve_response.url,
167 | cookies=[ # Convert cookiejar.Cookie to CookieModel
168 | CookieModel(**cookie) for cookie in solve_response.cookies
169 | ],
170 | # < pass cookies as dict's (solver don't know about rest model).
171 | user_agent=solve_response.user_agent,
172 | message=solve_response.message,
173 | token=solve_response.token
174 | )
175 | )
176 |
177 | except Exception as e:
178 | print(str(e))
179 | print(traceback.format_exc(), flush=True)
180 | return HandleCommandResponse(
181 | status="error",
182 | message="Error: " + str(e),
183 | startTimestamp=start_timestamp,
184 | endTimestamp=datetime.datetime.timestamp(datetime.datetime.now()),
185 | )
186 |
187 |
188 | # REST API methods.
189 | @server.post(
190 | "/get_token", response_model=HandleCommandResponse, tags=['Standard API'],
191 | response_model_exclude_none=True
192 | )
193 | async def Get_cookies_after_solve(
194 | url: typing_extensions.Annotated[
195 | str,
196 | fastapi.Body(description="Url for solve challenge.")
197 | ],
198 | yandex_key: typing_extensions.Annotated[
199 | str,
200 | fastapi.Body(description="Yandex captcha key")
201 | ],
202 | cookies: typing_extensions.Annotated[
203 | typing.List[CookieModel],
204 | fastapi.Body(description="Cookies to send.")
205 | ] = None,
206 | maxTimeout: typing_extensions.Annotated[
207 | float,
208 | fastapi.Body(description="Max processing timeout in ms.")
209 | ] = 60000,
210 | proxy: typing_extensions.Annotated[
211 | typing.Union[str, ProxyModel],
212 | fastapi.Body(description=PROXY_ANNOTATION)
213 | ] = None,
214 | ):
215 | return await process_solve_request(
216 | url=url,
217 | yandex_key=yandex_key,
218 | cookies=cookies,
219 | max_timeout=maxTimeout,
220 | proxy=proxy,
221 | )
222 |
223 |
224 | def server_run():
225 | try:
226 | logging.basicConfig(
227 | format='%(asctime)s [%(name)s] [%(levelname)s]: %(message)s',
228 | handlers=[logging.StreamHandler(sys.stdout)],
229 | level=logging.INFO
230 | )
231 |
232 | logging.getLogger('urllib3').setLevel(logging.ERROR)
233 |
234 | logger.info(
235 | "Start yandex_captcha_puzzle_server:\n" +
236 | " version: " + str(yandex_captcha_puzzle_solver.__version__) + "\n" +
237 | " python version = " + ".".join([str(x) for x in list(sys.version_info)]) + "\n" +
238 | " os = " + " ".join([platform.system(), platform.release(), platform.version()]) + "\n" +
239 | " docker = " + os.environ.get('IN_DOCKER', "false") + "\n" +
240 | " arch = " + str(platform.machine()) + "\n" +
241 | " processor = " + str(platform.processor())
242 | )
243 |
244 | parser = argparse.ArgumentParser(
245 | description='Start yandex captcha puzzle solve server.',
246 | epilog='Other arguments will be passed to gunicorn or uvicorn(win32) as is.')
247 | parser.add_argument("-b", "--bind", type=str, default='127.0.0.1:8000')
248 | # < parse for pass to gunicorn as is and as "--host X --port X" to uvicorn
249 | parser.add_argument(
250 | "--proxy-listen-start-port", type=int, default=10000,
251 | help="""Port interval start, that can be used for up local proxies on request processing"""
252 | )
253 | parser.add_argument(
254 | "--proxy-listen-end-port", type=int, default=20000,
255 | help="""Port interval end for up local proxies"""
256 | )
257 | parser.add_argument(
258 | "--proxy-command", type=str,
259 | default=None,
260 | help="""command template (jinja2), that will be used for up proxy for process request
261 | with arguments: LOCAL_PORT, UPSTREAM_URL - proxy passed in request"""
262 | )
263 | parser.add_argument("--disable-gpu", action='store_true')
264 | parser.add_argument("--verbose", action='store_true')
265 | parser.add_argument(
266 | "--debug-dir", type=str, default=None,
267 | help="""directory for save intermediate DOM dumps and screenshots on solving,
268 | for each request will be created unique directory"""
269 | )
270 | parser.add_argument("--proxy", type=str)
271 | parser.set_defaults(disable_gpu=False, debug=False)
272 | args, unknown_args = parser.parse_known_args()
273 | try:
274 | host, port = args.bind.split(':')
275 | except Exception:
276 | print("Invalid 'bind' argument value: " + str(args.bind), file=sys.stderr, flush=True)
277 | sys.exit(1)
278 |
279 | if args.verbose:
280 | logging.getLogger('zendriver.core.browser').setLevel(logging.DEBUG)
281 | logging.getLogger('yandex_captcha_puzzle_solver.yandex_captcha_puzzle_solver').setLevel(logging.DEBUG)
282 | logging.getLogger('uc.connection').setLevel(logging.INFO)
283 |
284 | global solver_args
285 |
286 | if args.debug_dir:
287 | logging.getLogger('yandex_captcha_puzzle_solver.yandex_captcha_puzzle_solver').setLevel(logging.DEBUG)
288 | solver_args['debug_dir'] = args.debug_dir
289 |
290 | sys.argv = [re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])]
291 | sys.argv += unknown_args
292 |
293 | solver_args['proxy'] = args.proxy
294 |
295 | # Init ProxyController
296 | solver_args['proxy_controller'] = yandex_captcha_puzzle_solver.proxy_controller.ProxyController(
297 | start_port=args.proxy_listen_start_port,
298 | end_port=args.proxy_listen_end_port,
299 | command=args.proxy_command)
300 |
301 | if args.disable_gpu:
302 | solver_args['disable_gpu'] = True
303 |
304 | if USE_GUNICORN:
305 | sys.argv += ['-b', args.bind]
306 | sys.argv += ['--worker-class', 'uvicorn.workers.UvicornWorker']
307 | sys.argv += ['yandex_captcha_puzzle_solver:server']
308 | sys.exit(gunicorn.app.wsgiapp.run())
309 | else:
310 | sys.argv += ['--host', host]
311 | sys.argv += ['--port', port]
312 | sys.argv += ['yandex_captcha_puzzle_solver:server']
313 | sys.exit(uvicorn.main.main())
314 |
315 | except Exception as e:
316 | logging.error(str(e))
317 | sys.exit(1)
318 |
319 |
320 | if __name__ == '__main__':
321 | server_run()
322 |
--------------------------------------------------------------------------------
/src/yandex_captcha_puzzle_solver/image_processor.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import typing
4 | import enum
5 | import collections
6 | import math
7 | import numpy as np
8 |
9 | import cv2
10 |
11 |
12 | class ImageProcessor(object):
13 |
14 | """
15 | Joint representation
16 | """
17 | class JointSegment(object):
18 | class Type(enum.Enum):
19 | HORIZONTAL = 1
20 | VERTICAL = 2
21 |
22 | start_point: typing.Tuple[int, int]
23 | length: int
24 | type: Type
25 |
26 | def __init__(
27 | self,
28 | start_point: typing.Tuple[int, int] = None,
29 | length: int = None,
30 | type: Type = None
31 | ):
32 | self.start_point = start_point
33 | self.length = length
34 | self.type = type
35 |
36 | # Get rect of modal window (that contains challenge).
37 | @staticmethod
38 | def get_modal_frame_rect(
39 | image, color = (255, 255, 255),
40 | save_steps_dir: str = None,
41 | logger = None
42 | ):
43 | image_height, image_width, _ = image.shape
44 | color_delta = 20
45 | mask = cv2.inRange(
46 | image,
47 | (
48 | max(color[0] - color_delta, 0),
49 | max(color[1] - color_delta, 0),
50 | max(color[2] - color_delta, 0)
51 | ),
52 | (
53 | min(color[0] + color_delta, 255),
54 | min(color[1] + color_delta, 255),
55 | min(color[2] + color_delta, 255)
56 | )
57 | )
58 |
59 | if save_steps_dir:
60 | cv2.imwrite(os.path.join(save_steps_dir, 'mask.png'), mask)
61 |
62 | contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
63 | res_box = None
64 | for c in contours:
65 | x, y, w, h = cv2.boundingRect(c)
66 | if res_box is None or w * h > res_box[2] * res_box[3]:
67 | res_box = (x, y, w, h)
68 |
69 | if res_box is not None:
70 | compress_width = 10
71 | compress_height = 10
72 | res_box = (
73 | res_box[0] + int(compress_width / 2),
74 | res_box[1] + int(compress_height / 2),
75 | res_box[2] - compress_width,
76 | res_box[3] - compress_height
77 | )
78 | if save_steps_dir:
79 | debug_image = image.copy()
80 | debug_image = cv2.rectangle(
81 | debug_image,
82 | (res_box[0], res_box[1]),
83 | (res_box[0] + res_box[2], res_box[1] + res_box[3]),
84 | (0, 0, 255),
85 | 2
86 | )
87 | cv2.imwrite(os.path.join(save_steps_dir, 'rect.png'), debug_image)
88 |
89 | return res_box
90 |
91 | # Get slider points (for drag from to)
92 | @staticmethod
93 | def get_drag_points(image, logger = None, save_steps_dir: str = None, log_prefix = ''):
94 | image_height, image_width, _ = image.shape
95 | slider_color = (255, 130, 82) # < GBR color of slider.
96 | slider_color_delta = 50
97 | mask = cv2.inRange(
98 | image,
99 | (
100 | max(slider_color[0] - slider_color_delta, 0),
101 | max(slider_color[1] - slider_color_delta, 0),
102 | max(slider_color[2] - slider_color_delta, 0)
103 | ),
104 | (
105 | min(slider_color[0] + slider_color_delta, 255),
106 | min(slider_color[1] + slider_color_delta, 255),
107 | min(slider_color[2] + slider_color_delta, 255)
108 | )
109 | )
110 |
111 | if save_steps_dir:
112 | cv2.imwrite(os.path.join(save_steps_dir, 'mask.png'), mask)
113 |
114 | broad_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (10, 10))
115 | mask = cv2.dilate(mask, broad_kernel, iterations = 1)
116 |
117 | if save_steps_dir:
118 | cv2.imwrite(os.path.join(save_steps_dir, 'dilated_mask.png'), mask)
119 |
120 | erode_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (20, 20))
121 | mask = cv2.erode(mask, erode_kernel, iterations = 1)
122 |
123 | if save_steps_dir:
124 | cv2.imwrite(os.path.join(save_steps_dir, 'eroded_mask.png'), mask)
125 |
126 | points_x, points_y = np.where(mask >= 255)
127 | if len(points_x) == 0:
128 | return None
129 |
130 | down_point_pos = random.randint(0, len(points_x) - 1)
131 | down_point = (points_y[down_point_pos], points_x[down_point_pos])
132 | up_point_pos = random.randint(0, len(points_x) - 1)
133 | up_point = (image_width - points_y[up_point_pos], points_x[up_point_pos])
134 |
135 | if save_steps_dir:
136 | debug_image = image.copy()
137 | debug_image = cv2.circle(debug_image, down_point, 5, (0, 0, 255), 2)
138 | debug_image = cv2.circle(debug_image, up_point, 5, (0, 0, 255), 2)
139 | cv2.imwrite(os.path.join(save_steps_dir, 'image_with_points.png'), debug_image)
140 |
141 | return [down_point, up_point]
142 |
143 | # Get puzzle joints for evaluate diff
144 | @staticmethod
145 | def get_puzzle_joints(
146 | image, logger = None, save_steps_dir: str = None, log_prefix = ''
147 | ) -> typing.List[
148 | typing.Tuple[ # < Pair of segments, that represent puzzle joints.
149 | typing.Tuple[typing.Tuple[int, int], typing.Tuple[int, int]],
150 | typing.Tuple[typing.Tuple[int, int], typing.Tuple[int, int]]
151 | ]
152 | ]:
153 | # Return array of two elements tuples, where element is point
154 | # check horizontal net
155 | puzzle_vertical_separators, puzzle_horizontal_separators = ImageProcessor._determine_separators(
156 | image, save_steps_dir = save_steps_dir
157 | )
158 |
159 | if puzzle_vertical_separators is None or puzzle_horizontal_separators is None:
160 | return []
161 |
162 | # construct joints
163 | res_joints: typing.List[typing.Tuple[ImageProcessor.JointSegment, ImageProcessor.JointSegment]] = []
164 | for h_index, h in enumerate(puzzle_horizontal_separators): # < h is Tuple[Tuple[int, int], Tuple[int, int]]
165 | for v_index, v in enumerate(puzzle_vertical_separators):
166 | vertical_indentation = 2
167 | horizontal_indentation = 2
168 | if h_index < len(puzzle_horizontal_separators) - 1:
169 | # add horizontal joint
170 | j_len = v[1][0] - v[0][1] - 2 * vertical_indentation
171 | res_joints.append(
172 | (
173 | ImageProcessor.JointSegment(
174 | start_point=(v[0][1] + horizontal_indentation, h[1][0] - vertical_indentation),
175 | length=j_len,
176 | type=ImageProcessor.JointSegment.Type.HORIZONTAL
177 | ),
178 | ImageProcessor.JointSegment(
179 | start_point=(v[0][1] + horizontal_indentation, h[1][1] + vertical_indentation),
180 | length=j_len,
181 | type=ImageProcessor.JointSegment.Type.HORIZONTAL
182 | ),
183 | )
184 | )
185 | if v_index < len(puzzle_vertical_separators) - 1:
186 | # add vertical joint
187 | j_len = h[1][0] - h[0][1] - 2 * horizontal_indentation
188 | res_joints.append(
189 | (
190 | ImageProcessor.JointSegment(
191 | start_point=(v[1][0] - horizontal_indentation, h[0][1] + vertical_indentation),
192 | length=j_len,
193 | type=ImageProcessor.JointSegment.Type.VERTICAL
194 | ),
195 | ImageProcessor.JointSegment(
196 | start_point=(v[1][1] + horizontal_indentation, h[0][1] + vertical_indentation),
197 | length=j_len,
198 | type=ImageProcessor.JointSegment.Type.VERTICAL
199 | ),
200 | )
201 | )
202 |
203 | # draw joints
204 | if save_steps_dir:
205 | debug_image = image.copy()
206 | for joint_from, joint_to in res_joints:
207 | cv2.line(
208 | debug_image,
209 | joint_from.start_point,
210 | (
211 | joint_from.start_point[0] + (
212 | joint_from.length if joint_from.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0),
213 | joint_from.start_point[1] + (
214 | joint_from.length if joint_from.type == ImageProcessor.JointSegment.Type.VERTICAL else 0)
215 | ),
216 | (0, 0, 255),
217 | 1
218 | )
219 | cv2.line(
220 | debug_image,
221 | joint_to.start_point,
222 | (
223 | joint_to.start_point[0] + (joint_to.length if joint_to.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0),
224 | joint_to.start_point[1] + (joint_to.length if joint_to.type == ImageProcessor.JointSegment.Type.VERTICAL else 0)
225 | ),
226 | (0, 0, 255),
227 | 1
228 | )
229 | cv2.imwrite(os.path.join(save_steps_dir, 'image_with_joints.png'), debug_image)
230 |
231 | return res_joints
232 |
233 | @staticmethod
234 | def evaluate_joints_diff(
235 | image,
236 | joints: typing.List[typing.Tuple[JointSegment, JointSegment]],
237 | evaluate_points = 10
238 | ) -> float:
239 | color_diff_sum = 0
240 | color_diff_count = 0
241 | for joint_from, joint_to in joints:
242 | for i in range(evaluate_points):
243 | point1 = (
244 | (
245 | joint_from.start_point[0] +
246 | int(
247 | joint_from.length * i / evaluate_points
248 | if joint_from.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0
249 | )
250 | ),
251 | (
252 | joint_from.start_point[1] +
253 | int(
254 | joint_from.length * i / evaluate_points
255 | if joint_from.type == ImageProcessor.JointSegment.Type.VERTICAL else 0
256 | )
257 | )
258 | )
259 | point2 = (
260 | (
261 | joint_to.start_point[0] +
262 | int(
263 | joint_to.length * i / evaluate_points
264 | if joint_to.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0
265 | )
266 | ),
267 | (
268 | joint_to.start_point[1] +
269 | int(
270 | joint_to.length * i / evaluate_points
271 | if joint_to.type == ImageProcessor.JointSegment.Type.VERTICAL else 0
272 | )
273 | )
274 | )
275 | c1 = image[point1[1], point1[0]]
276 | c2 = image[point2[1], point2[0]]
277 | color_diff_sum += math.sqrt(
278 | (float(c2[0]) - float(c1[0]))**2 +
279 | (float(c2[1]) - float(c1[1]))**2 +
280 | (float(c2[2]) - float(c1[2]))**2
281 | )
282 | color_diff_count += 1
283 | return color_diff_sum / color_diff_count if color_diff_count > 0 else 0
284 |
285 | @staticmethod
286 | def _lines_to_intervals(lines, sum_threshold) -> typing.List[
287 | typing.Tuple[
288 | int, # pos
289 | int # num of lines after pos
290 | ]
291 | ]:
292 | # Return closed intervals (include right line that obey threshold)
293 | last_interval_start = None
294 | intervals = []
295 |
296 | for i, line_sum in enumerate(lines):
297 | if line_sum >= sum_threshold:
298 | if last_interval_start is None:
299 | last_interval_start = i
300 | elif last_interval_start is not None:
301 | # close interval
302 | intervals.append([last_interval_start, i - 1])
303 | last_interval_start = None
304 |
305 | if last_interval_start is not None:
306 | intervals.append([last_interval_start, len(lines) - 1])
307 |
308 | return intervals
309 |
310 | @staticmethod
311 | def group_values(arr, radius):
312 | d = sorted(arr)
313 | m = [[d[0]]]
314 | for x in d[1:]:
315 | if x - m[-1][0] < radius:
316 | m[-1].append(x)
317 | else:
318 | m.append([x])
319 | return m
320 |
321 | @staticmethod
322 | def _get_separators_group(
323 | separators,
324 | min_len,
325 | max_len
326 | ) -> typing.List[
327 | typing.Tuple[
328 | typing.Tuple[int, int], # left/up group of separators
329 | typing.Tuple[int, int] # right/down group of separators
330 | ]
331 | ]:
332 | block_heights = collections.OrderedDict()
333 | for l_index in range(1, len(separators)):
334 | prev_bottom = separators[l_index - 1][1]
335 | cur_top = separators[l_index][0]
336 | block_height = cur_top - prev_bottom
337 | if block_height not in block_heights:
338 | block_heights[block_height] = []
339 | block_heights[block_height].append((separators[l_index - 1], separators[l_index]))
340 |
341 | if block_heights.keys():
342 | key_groups = ImageProcessor.group_values(block_heights.keys(), 4)
343 | for height_group in key_groups:
344 | avg_height = int(sum(height_group) / len(height_group))
345 | if (avg_height >= int(min_len) and avg_height < int(max_len)):
346 | # check number of separators
347 | res_separators = []
348 | for h in height_group:
349 | res_separators += block_heights[h]
350 | res_separators = sorted(res_separators, key=lambda sep: sep[0])
351 | if (len(res_separators) >= 3 and len(res_separators) <= 7):
352 | # found puzzle groups
353 | return res_separators
354 |
355 | return None
356 |
357 | @staticmethod
358 | def _determine_separators(
359 | image, white_percent = 0.94,
360 | lower_color = (210, 210, 210), upper_color = (256, 256, 256),
361 | save_steps_dir = None
362 | ):
363 | h, w, _ = image.shape
364 | mask = cv2.inRange(image, lower_color, upper_color)
365 | mask = mask / 255
366 |
367 | # determine horizontal separators
368 | horizontal_lines = np.sum(mask, axis = 1)
369 | horizontal_separators = ImageProcessor._lines_to_intervals(horizontal_lines, w * white_percent)
370 | # filter horizontal separators
371 | puzzle_horizontal_separators = ImageProcessor._get_separators_group(
372 | horizontal_separators, h / 20, h / 2
373 | )
374 |
375 | # determine vertical separators
376 | vertical_lines = np.sum(mask, axis = 0)
377 | vertical_separators = ImageProcessor._lines_to_intervals(vertical_lines, h * white_percent)
378 | # filter vertical separators
379 | puzzle_vertical_separators = ImageProcessor._get_separators_group(
380 | vertical_separators, w / 20, w / 2
381 | )
382 |
383 | if puzzle_horizontal_separators is None or puzzle_vertical_separators is None:
384 | return None, None
385 |
386 | if save_steps_dir:
387 | debug_image = image.copy()
388 | for h_el in puzzle_horizontal_separators: # < Tuple[Tuple[int, int], Tuple[int, int]]
389 | cv2.line(debug_image, (0, h_el[0][1]), (w, h_el[0][1]), (255, 0, 0), 1)
390 | cv2.line(debug_image, (0, h_el[1][0]), (w, h_el[1][0]), (255, 0, 0), 1)
391 | for v_el in puzzle_vertical_separators: # < Tuple[Tuple[int, int], Tuple[int, int]]
392 | cv2.line(debug_image, (v_el[0][1], 0), (v_el[0][1], h), (0, 0, 255), 1)
393 | cv2.line(debug_image, (v_el[1][0], 0), (v_el[1][0], h), (0, 0, 255), 1)
394 | cv2.imwrite(os.path.join(save_steps_dir, 'image_with_sep.png'), debug_image)
395 |
396 | return puzzle_vertical_separators, puzzle_horizontal_separators
397 |
--------------------------------------------------------------------------------
/src/yandex_captcha_puzzle_solver/yandex_captcha_puzzle_solver.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import logging
3 | import os
4 | import typing
5 | import random
6 | import datetime
7 | import asyncio
8 | import certifi
9 | import contextlib
10 |
11 | # Image processing imports
12 | import numpy as np
13 | import cv2
14 |
15 | from .browser_wrapper import BrowserWrapper
16 | from .proxy_controller import ProxyController
17 | from .image_processor import ImageProcessor
18 |
19 | logger = logging.getLogger(__name__)
20 |
21 | YANDEX_CAPTCHA_SELECTORS = [
22 | 'iframe[src*="smartcaptcha.yandexcloud.net"]',
23 | ]
24 | USER_AGENT = None
25 |
26 | _SHORT_TIMEOUT = 1
27 | _REDIRECT_WAIT_TIMEOUT = 5
28 |
29 |
30 | """
31 | Request for process, can be extended and some custom fields used in process_command.
32 | """
33 |
34 |
35 | class Request(object):
36 | url: str = None
37 | yandex_key: str = None
38 | proxy: dict = None
39 | max_timeout: float = 60 # timeout in sec
40 | cookies: dict = None
41 |
42 | def __init__(self, _dict=None):
43 | if _dict:
44 | self.__dict__.update(_dict)
45 |
46 | def __str__(self):
47 | return str(self.__dict__)
48 |
49 |
50 | class Response(object):
51 | url: str = None
52 | cookies: list = None
53 | user_agent: str = None
54 | token: str = None
55 | message: str = None
56 |
57 | def __str__(self):
58 | return str(self.__dict__)
59 |
60 |
61 | class Solver(object):
62 | """
63 | Solver
64 | """
65 | _proxy: str = None
66 | _driver: BrowserWrapper = None
67 | _proxy_controller: ProxyController = None
68 | _disable_gpu: bool = False
69 | _screenshot_i: int = 0
70 | _debug_dir: str = None
71 |
72 | class Exception(Exception):
73 | step = None
74 |
75 | def __init__(self, message: str, step: str = None):
76 | super().__init__(message)
77 | self.step = step
78 |
79 | def __init__(
80 | self,
81 | proxy: str = None,
82 | proxy_controller = None,
83 | disable_gpu = False,
84 | debug_dir: str = None
85 | ):
86 | self._proxy = proxy
87 | self._driver = None
88 | self._proxy_controller = proxy_controller
89 | self._debug_dir = debug_dir
90 | self._disable_gpu = disable_gpu
91 |
92 | async def save_screenshot(
93 | self, step_name, image = None, mark_coords = None, mark_rect = None, mark_joints = None
94 | ):
95 | if self._debug_dir:
96 | screenshot_file_without_ext = os.path.join(
97 | self._debug_dir, str(self._screenshot_i) + '_' + step_name)
98 |
99 | if image is not None:
100 | cv2.imwrite(screenshot_file_without_ext + ".jpg", image)
101 | else:
102 | await self._driver.save_screenshot(screenshot_file_without_ext + ".jpg")
103 |
104 | if mark_coords or mark_rect or mark_joints:
105 | image = cv2.imread(screenshot_file_without_ext + ".jpg")
106 | if mark_coords:
107 | for mark_c in mark_coords:
108 | image = cv2.circle(image, mark_c, 5, (255, 0, 0), 2)
109 | if mark_rect:
110 | image = cv2.rectangle(
111 | image,
112 | (mark_rect[0], mark_rect[1]),
113 | (mark_rect[2], mark_rect[3]),
114 | (255, 0, 0),
115 | 2 # < thickness
116 | )
117 | if mark_joints:
118 | for joint_from, joint_to in mark_joints:
119 | image = cv2.line(
120 | image,
121 | joint_from.start_point,
122 | (
123 | joint_from.start_point[0] + (
124 | joint_from.length if joint_from.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0
125 | ),
126 | joint_from.start_point[1] + (
127 | joint_from.length if joint_from.type == ImageProcessor.JointSegment.Type.VERTICAL else 0
128 | )
129 | ),
130 | (0, 0, 255),
131 | 1
132 | )
133 | image = cv2.line(
134 | image,
135 | joint_to.start_point,
136 | (
137 | joint_to.start_point[0] + (
138 | joint_to.length if joint_to.type == ImageProcessor.JointSegment.Type.HORIZONTAL else 0
139 | ),
140 | joint_to.start_point[1] + (
141 | joint_to.length if joint_to.type == ImageProcessor.JointSegment.Type.VERTICAL else 0
142 | )
143 | ),
144 | (0, 0, 255),
145 | 1
146 | )
147 | cv2.imwrite(screenshot_file_without_ext + "_mark.jpg", image)
148 |
149 | dom = await self._driver.get_dom()
150 | with open(screenshot_file_without_ext + '.html', 'w') as fp:
151 | fp.write(dom)
152 | self._screenshot_i += 1
153 |
154 | logger.debug("Screenshot saved to '" + screenshot_file_without_ext + "'")
155 |
156 | async def solve(self, req: Request) -> Response:
157 | # do some validations
158 | if req.url is None:
159 | raise Exception("Parameter 'url' should be defined.")
160 |
161 | try:
162 | logger.info("Solve request: " + str(req))
163 | res = await asyncio.wait_for(self._resolve_challenge(req), req.max_timeout)
164 | logger.info("Solve result: " + str(res))
165 | except asyncio.TimeoutError:
166 | raise Exception("Processing timeout (max_timeout=" + str(req.max_timeout) + ")")
167 | return res
168 |
169 | async def _resolve_challenge(self, req: Request) -> Response:
170 | start_time: datetime.datetime = datetime.datetime.now()
171 | step = 'start'
172 | try:
173 | # Use default upped proxy
174 | use_proxy: str = self._proxy
175 | proxy_holder = None
176 |
177 | step = 'proxy init'
178 | if req.proxy:
179 | # Up proxy with specific end proxy (for yandex requests)
180 | if not self._proxy_controller:
181 | raise Solver.Exception("For use proxy with authorization you should pass proxy_controller into c-tor")
182 | proxy_holder = self._proxy_controller.get_proxy(use_proxy)
183 | use_proxy = "socks5://127.0.0.1:" + str(proxy_holder.local_port())
184 | else:
185 | proxy_holder = contextlib.nullcontext()
186 |
187 | with proxy_holder:
188 | try:
189 | step = 'browser init'
190 | self._driver: BrowserWrapper = await BrowserWrapper.create(
191 | use_proxy, disable_gpu = self._disable_gpu
192 | )
193 | logger.info(
194 | 'New instance of webdriver has been created to perform the request (proxy=' +
195 | str(use_proxy) + '), timeout=' + str(req.max_timeout))
196 | return await self._resolve_challenge_impl(req, start_time)
197 | finally:
198 | logger.info('Close webdriver')
199 | if self._driver is not None:
200 | await self._driver.close()
201 | logger.debug('A used instance of webdriver has been destroyed')
202 | if logger.isEnabledFor(logging.DEBUG) and self._driver is not None:
203 | # Read outputs only after driver close (when process stopped),
204 | # otherwise output reading can be blocked.
205 | outputs = await self._driver.get_outputs()
206 | if outputs:
207 | for output_i, output in enumerate(outputs):
208 | logger.debug(
209 | "Webdriver output #" + str(output_i) + ":" +
210 | "\n---------------------------------------\n" +
211 | str(output.decode("utf-8")) +
212 | "\n---------------------------------------\n"
213 | )
214 | self._driver = None
215 | except Solver.Exception as e:
216 | error_message = (
217 | "Error solving the challenge. On platform " + str(sys.platform) +
218 | " at step '" + str(e.step) + "': " +
219 | str(e).replace('\n', '\\n')
220 | )
221 | logger.error(error_message)
222 | raise Solver.Exception(error_message, step=e.step)
223 | except Exception as e:
224 | error_message = (
225 | "Error solving the challenge. On platform " + str(sys.platform) +
226 | " at step '" + step + "': " +
227 | str(e).replace('\n', '\\n')
228 | )
229 | logger.error(error_message)
230 | raise Solver.Exception(error_message)
231 |
232 | async def _check_challenge(self):
233 | for selector in YANDEX_CAPTCHA_SELECTORS:
234 | if await self._driver.select_count(selector) > 0:
235 | return True
236 | return False
237 |
238 | async def _wait_screenshot(self, css_selector) -> typing.Tuple[typing.Any, typing.Tuple[int, int, int, int]]:
239 | while True:
240 | captcha_frame_image, rect = await self._driver.get_element_screenshot(
241 | 'div[class="smart-captcha"]'
242 | )
243 | if captcha_frame_image is not None:
244 | return (captcha_frame_image, rect)
245 | await self.save_screenshot('element_screenshot_step')
246 | await asyncio.sleep(1)
247 |
248 | async def _challenge_wait_and_click_loop(self) -> str:
249 | attempt = 0
250 | width, height = await self._driver.size()
251 |
252 | while True:
253 | logger.info("Challenge step #" + str(attempt))
254 | await self.save_screenshot('attempt')
255 |
256 | # Check state of captcha.
257 | checkbox = await self._driver.select_count('iframe[src*="smartcaptcha.yandexcloud.net/checkbox"]')
258 |
259 | if checkbox > 0:
260 | # Checkbox state (slider), can be in two states: need to solve, solved.
261 | captcha_frame_image, captcha_frame_rect = await self._wait_screenshot('div[class="smart-captcha"]')
262 | image_height, image_width, _ = captcha_frame_image.shape
263 |
264 | logger.info("Drag slider")
265 | await self.save_screenshot('attempt_to_move_slider')
266 | # scroller in form
267 | down_and_up_points = Solver._get_drag_points(captcha_frame_image)
268 | if down_and_up_points is not None:
269 | down_point = down_and_up_points[0]
270 | if down_point[0] < image_width / 2:
271 | # Need to solve checkbox
272 | up_point = down_and_up_points[1]
273 | logger.info("To move slider from " + str(down_point) + " to " + str(up_point))
274 | await self.save_screenshot('attempt_move_down_point', mark_coords=[down_point, up_point])
275 | abs_down_point = (captcha_frame_rect.left + down_point[0], captcha_frame_rect.top + down_point[1])
276 | abs_up_point = (captcha_frame_rect.left + up_point[0], captcha_frame_rect.top + up_point[1])
277 | premove_point = (down_point[0] - random.randint(5, 10), down_point[1] - random.randint(5, 10))
278 | await self._driver.mouse_move(premove_point)
279 | await self._driver.mouse_move(down_point)
280 | await self._driver.mouse_down()
281 | await self.save_screenshot('attempt_move_down', mark_coords=[down_point])
282 | steps = 20
283 | for i in range(1, steps + 1):
284 | x = round(abs_down_point[0] + (abs_up_point[0] - abs_down_point[0]) * (i / steps))
285 | y = round(abs_down_point[1] + (abs_up_point[1] - abs_down_point[1]) * (i / steps))
286 | await self._driver.mouse_move((x, y))
287 | await self._driver.mouse_up()
288 | await self.save_screenshot('attempt_from_move_slider_after_up')
289 | await asyncio.sleep(1) # Wait advanced challenge loading.
290 | else:
291 | # Solved (slider in right part) - send form
292 | await self._driver.click("#submit_captcha_button")
293 | await asyncio.sleep(1) # Wait form sending.
294 | # get token as text
295 | res = await self._driver.select_text('#smart_token')
296 | return res
297 |
298 | advanced = await self._driver.select_count('iframe[src*="smartcaptcha.yandexcloud.net/advanced"]')
299 | if advanced > 0:
300 | # modal mode - find puzzle position
301 | full_image = await self._driver.get_screenshot()
302 | # get internal slider position in modal
303 | logger.info("Solve modal window")
304 | await self.save_screenshot("modal_solve_start")
305 | modal_rect = ImageProcessor.get_modal_frame_rect(full_image)
306 | modal_image = full_image[
307 | modal_rect[1]:modal_rect[1] + modal_rect[3], modal_rect[0]:modal_rect[0] + modal_rect[2]
308 | ]
309 | find_slider_rect = (
310 | 0,
311 | int(modal_rect[3] * 3 / 5),
312 | modal_rect[2],
313 | int(modal_rect[3] * 2 / 5),
314 | )
315 | slider_image = modal_image[
316 | find_slider_rect[1]:find_slider_rect[1] + find_slider_rect[3],
317 | find_slider_rect[0]:find_slider_rect[0] + find_slider_rect[2]
318 | ]
319 | down_and_up_points = ImageProcessor.get_drag_points(slider_image)
320 |
321 | if down_and_up_points is not None:
322 | assert down_and_up_points[0][0] < down_and_up_points[1][0]
323 | # offset down_and_up_points relative full image
324 | down_and_up_points = (
325 | (
326 | down_and_up_points[0][0] + modal_rect[0] + find_slider_rect[0],
327 | down_and_up_points[0][1] + modal_rect[1] + find_slider_rect[1]
328 | ),
329 | (
330 | down_and_up_points[1][0] + modal_rect[0] + find_slider_rect[0],
331 | down_and_up_points[1][1] + modal_rect[1] + find_slider_rect[1]
332 | )
333 | )
334 | await self.save_screenshot("modal_slider_pos", mark_coords=down_and_up_points)
335 |
336 | # get joints in modal
337 | find_puzzle_rect = (
338 | 0,
339 | 0,
340 | modal_rect[2],
341 | int(modal_rect[3] * 3 / 5),
342 | )
343 | find_puzzle_image = modal_image[
344 | find_puzzle_rect[1]:find_puzzle_rect[3],
345 | find_puzzle_rect[0]:find_puzzle_rect[2]
346 | ]
347 | logger.debug("To save modal_find_puzzle")
348 | await self.save_screenshot("modal_find_puzzle", image=find_puzzle_image)
349 | logger.debug("From save modal_find_puzzle")
350 | joints = ImageProcessor.get_puzzle_joints(find_puzzle_image)
351 | # offset joints relative full image
352 | for joint1, joint2 in joints:
353 | joint1.start_point = (
354 | joint1.start_point[0] + modal_rect[0] + find_puzzle_rect[0],
355 | joint1.start_point[1] + modal_rect[1] + find_puzzle_rect[1]
356 | )
357 | joint2.start_point = (
358 | joint2.start_point[0] + modal_rect[0] + find_puzzle_rect[0],
359 | joint2.start_point[1] + modal_rect[1] + find_puzzle_rect[1]
360 | )
361 |
362 | logger.debug("Modal solving started with " + str(len(joints)) + " joints")
363 | await self.save_screenshot("modal_start", mark_joints=joints)
364 | start_point = down_and_up_points[0]
365 | await self._driver.mouse_move(start_point)
366 | await self._driver.mouse_down()
367 | slider_steps = 20
368 | min_diff = 0
369 | max_diff = 10000000
370 | end_point = down_and_up_points[1]
371 | depth_step_i = 0
372 | select_pos = None
373 | while abs(max_diff - min_diff) > 0.1 and abs(end_point[0] - start_point[0]) > 2:
374 | min_diff = 10000000
375 | max_diff = 0
376 | new_start_point = None # < Positions around min diff point
377 | new_end_point = None
378 |
379 | check_points: typing.List[typing.Tuple[int, int]] = []
380 | if end_point[0] - start_point[0] > slider_steps:
381 | for i in range(slider_steps + 1):
382 | check_points.append(Solver._middle_point(start_point, end_point, float(i) / slider_steps))
383 | else:
384 | for i in range(start_point[0], end_point[0] + 1):
385 | check_points.append((i, start_point[1]))
386 |
387 | for check_point_i, middle_point in enumerate(check_points):
388 | await self._driver.mouse_move(middle_point)
389 | full_image = await self._driver.get_screenshot()
390 | joints_diff = ImageProcessor.evaluate_joints_diff(full_image, joints)
391 | await self.save_screenshot(
392 | "modal_slider_pos_" + str(i) + "_" + str(joints_diff), mark_joints=joints
393 | )
394 | if new_start_point is None or joints_diff < min_diff:
395 | min_diff = joints_diff
396 | new_start_point = check_points[check_point_i - 1] if check_point_i > 0 else middle_point
397 | new_end_point = check_points[check_point_i + 1] if check_point_i < len(check_points) - 1 else middle_point
398 | select_pos = middle_point
399 | max_diff = max(max_diff, joints_diff)
400 |
401 | start_point = new_start_point
402 | end_point = new_end_point
403 | logger.debug(
404 | "Puzzle solve step " + str(depth_step_i) + ", min_diff = " + str(min_diff) +
405 | ", max_diff = " + str(max_diff) +
406 | ", start_point = " + str(start_point) +
407 | ", end_point = " + str(end_point) +
408 | ", select_pos = " + str(select_pos) +
409 | ", check_points = " + str(check_points)
410 | )
411 | depth_step_i += 1
412 |
413 | await self._driver.mouse_move(select_pos)
414 | await self.save_screenshot("modal_final_slider_pos")
415 | await self._driver.mouse_up()
416 | await asyncio.sleep(1)
417 | await self.save_screenshot('modal_final')
418 |
419 | attempt = attempt + 1
420 | await asyncio.sleep(_SHORT_TIMEOUT)
421 |
422 | async def _resolve_challenge_impl(self, req: Request, start_time: datetime.datetime) -> Response:
423 | step = 'solving'
424 | try:
425 | res = Response()
426 |
427 | step = 'navigate to url'
428 | # navigate to the page
429 | result_url = req.url
430 | if "?" not in result_url:
431 | result_url += "?"
432 | if result_url:
433 | if result_url[-1] != '?':
434 | result_url += '&'
435 | result_url += 'solver_intercept=1&yandex_captcha_key=' + str(req.yandex_key)
436 | logger.debug(f'Navigating to... {result_url}')
437 | await self._driver.get(result_url)
438 |
439 | logger.debug('To make screenshot')
440 | await self.save_screenshot('evil_logic')
441 |
442 | step = 'set cookies'
443 |
444 | # set cookies if required
445 | if req.cookies:
446 | logger.debug('Setting cookies...')
447 | await self._driver.set_cookies(req.cookies)
448 | await self._driver.get(result_url)
449 |
450 | step = 'solve challenge'
451 | token = await self._challenge_wait_and_click_loop()
452 | res.message = "Challenge solved!" # expect exception if challenge isn't solved
453 | res.token = token
454 |
455 | logger.info("Challenge solving finished")
456 | await self.save_screenshot('solving_finish')
457 |
458 | # Click submit and get token text
459 |
460 | step = 'get cookies'
461 | res.url = await self._driver.current_url()
462 | res.cookies = await self._driver.get_cookies()
463 | logger.info("Cookies got")
464 | global USER_AGENT
465 | if USER_AGENT is None:
466 | step = 'get user-agent'
467 | USER_AGENT = await self._driver.get_user_agent()
468 | res.user_agent = USER_AGENT
469 |
470 | await self.save_screenshot('finish')
471 | logger.info('Solving finished')
472 |
473 | return res
474 | except Exception as e:
475 | raise Solver.Exception(str(e), step=step)
476 |
477 | @staticmethod
478 | def _middle_point(start_point: typing.Tuple[int, int], end_point: typing.Tuple[int, int], coef: float):
479 | return (
480 | int(round(start_point[0] + float(end_point[0] - start_point[0]) * coef)),
481 | int(round(start_point[1] + float(end_point[1] - start_point[1]) * coef))
482 | )
483 |
484 | @staticmethod
485 | def _get_dominant_color(image):
486 | a2D = image.reshape(-1, image.shape[-1])
487 | col_range = (256, 256, 256) # generically: a2D.max(0)+1
488 | a1D = np.ravel_multi_index(a2D.T, col_range)
489 | return np.unravel_index(np.bincount(a1D).argmax(), col_range)
490 |
491 | @staticmethod
492 | def _get_drag_points(image, logger = None, save_steps_dir: str = None, log_prefix = ''):
493 | image_height, image_width, _ = image.shape
494 | slider_color = (255, 130, 82) # < GBR color of slider.
495 | slider_color_delta = 50
496 | mask = cv2.inRange(
497 | image,
498 | (
499 | max(slider_color[0] - slider_color_delta, 0),
500 | max(slider_color[1] - slider_color_delta, 0),
501 | max(slider_color[2] - slider_color_delta, 0)
502 | ),
503 | (
504 | min(slider_color[0] + slider_color_delta, 255),
505 | min(slider_color[1] + slider_color_delta, 255),
506 | min(slider_color[2] + slider_color_delta, 255)
507 | ))
508 |
509 | if save_steps_dir:
510 | cv2.imwrite(os.path.join(save_steps_dir, 'mask.png'), mask)
511 |
512 | broad_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (10, 10))
513 | mask = cv2.dilate(mask, broad_kernel, iterations = 1)
514 |
515 | if save_steps_dir:
516 | cv2.imwrite(os.path.join(save_steps_dir, 'dilated_mask.png'), mask)
517 |
518 | erode_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (20, 20))
519 | mask = cv2.erode(mask, erode_kernel, iterations = 1)
520 |
521 | if save_steps_dir:
522 | cv2.imwrite(os.path.join(save_steps_dir, 'eroded_mask.png'), mask)
523 |
524 | points_x, points_y = np.where(mask >= 255)
525 | if len(points_x) == 0:
526 | return None
527 |
528 | down_point_pos = random.randint(0, len(points_x) - 1)
529 | down_point = (points_y[down_point_pos], points_x[down_point_pos])
530 | up_point_pos = random.randint(0, len(points_x) - 1)
531 | up_point = (image_width - points_y[up_point_pos], points_x[up_point_pos])
532 |
533 | if save_steps_dir:
534 | debug_image = image.copy()
535 | debug_image = cv2.circle(debug_image, down_point, 5, (0, 0, 255), 2)
536 | debug_image = cv2.circle(debug_image, up_point, 5, (0, 0, 255), 2)
537 | cv2.imwrite(os.path.join(save_steps_dir, 'image_with_points.png'), debug_image)
538 |
539 | return [down_point, up_point]
540 |
541 |
542 | # fix ssl certificates for compiled binaries
543 | # https://github.com/pyinstaller/pyinstaller/issues/7229
544 | # https://stackoverflow.com/questions/55736855/how-to-change-the-cafile-argument-in-the-ssl-module-in-python3
545 | os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()
546 | os.environ["SSL_CERT_FILE"] = certifi.where()
547 |
548 | if __name__ == '__main__':
549 | sys.stdout.reconfigure(encoding="utf-8")
550 | logger.basicConfig(
551 | format='%(asctime)s [%(name)s] [%(levelname)s]: %(message)s',
552 | handlers=[logger.StreamHandler(sys.stdout)],
553 | level=logging.INFO)
554 |
555 | req = Request()
556 | req.url = 'https://knopka.ashoo.id'
557 |
558 | solver = Solver()
559 | res = solver.solve(req)
560 |
--------------------------------------------------------------------------------