├── src
    └── chrome_lens_py
    │   ├── cli
    │       ├── __init__.py
    │       └── main.py
    │   ├── core
    │       ├── __init__.py
    │       ├── protobuf_builder.py
    │       ├── request_handler.py
    │       └── image_processor.py
    │   ├── utils
    │       ├── __init__.py
    │       ├── sharex.py
    │       ├── general.py
    │       ├── font_manager.py
    │       ├── config_manager.py
    │       └── lens_betterproto.py
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── exceptions.py
    │   └── api.py
├── setup.py
├── experiments
    ├── exp.md
    ├── reverse.py
    └── test.py
├── requirements.txt
├── LICENSE
├── setup.cfg
├── .gitignore
├── docs
    └── sharex.md
├── .github
    └── workflows
    │   └── python-publish.yml
├── README.md
└── README_RU.md


/src/chrome_lens_py/cli/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | 
3 | setuptools.setup()
4 | 


--------------------------------------------------------------------------------
/experiments/exp.md:
--------------------------------------------------------------------------------
1 | The folder is just for experimentation, testing old, new methods. Stupid ideas, etc.


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | Pillow
3 | filetype
4 | json5
5 | PySocks
6 | httpx[socks]
7 | numpy
8 | rich
9 | betterproto >= 2.0.0b6


--------------------------------------------------------------------------------
/src/chrome_lens_py/__init__.py:
--------------------------------------------------------------------------------
 1 | from .api import LensAPI
 2 | from .exceptions import LensAPIError, LensException, LensImageError, LensProtobufError
 3 | 
 4 | __all__ = [
 5 |     "LensAPI",
 6 |     "LensException",
 7 |     "LensAPIError",
 8 |     "LensImageError",
 9 |     "LensProtobufError",
10 | ]
11 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/utils/sharex.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import platform
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | 
 7 | def copy_to_clipboard(text: str) -> bool:
 8 |     """Copies the provided text to the clipboard."""
 9 |     system = platform.system()
10 |     try:
11 |         import pyperclip  # type: ignore
12 | 
13 |         pyperclip.copy(text)
14 |         logger.info("Text copied to clipboard.")
15 |         return True
16 |     except ImportError:
17 |         logger.error(
18 |             "Module 'pyperclip' not found. Please install it to use clipboard functionality (pip install 'chrome-lens-py[clipboard]')."
19 |         )
20 |         if system == "Linux":
21 |             logger.info(
22 |                 "On Linux, you might also need to install xclip or xsel: sudo apt-get install xclip (or xsel)"
23 |             )
24 |         return False
25 |     except Exception as e:
26 |         logger.error(f"Failed to copy text to clipboard: {e}")
27 |         return False
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Bropines
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/constants.py:
--------------------------------------------------------------------------------
 1 | # API
 2 | LENS_CRUPLOAD_ENDPOINT = "https://lensfrontend-pa.googleapis.com/v1/crupload"
 3 | DEFAULT_API_KEY = "AIzaSyDr2UxVnv_U85AbhhY8XSHSIavUW0DC-sY"
 4 | # https://github.com/AuroraWright/owocr
 5 | 
 6 | 
 7 | # headers
 8 | DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
 9 | DEFAULT_HEADERS = {
10 |     "Content-Type": "application/x-protobuf",
11 |     "X-Goog-Api-Key": DEFAULT_API_KEY,
12 |     "User-Agent": DEFAULT_USER_AGENT,
13 |     "Accept-Encoding": "gzip, deflate, br",
14 |     "Accept": "*/*",
15 | }
16 | 
17 | # img types
18 | SUPPORTED_MIMES_FOR_PREPARE = [
19 |     "image/jpeg",
20 |     "image/png",
21 |     "image/webp",
22 |     "image/bmp",
23 |     "image/gif",
24 |     "image/tiff",
25 | ]
26 | DEFAULT_IMAGE_MAX_DIMENSION = 1500
27 | 
28 | # region and time zone
29 | DEFAULT_CLIENT_REGION = "US"
30 | DEFAULT_CLIENT_TIME_ZONE = "America/New_York"
31 | DEFAULT_OCR_LANG = ""
32 | 
33 | # Fonts
34 | DEFAULT_FONT_SIZE_OVERLAY = 20
35 | DEFAULT_FONT_PATH_WINDOWS = "arial.ttf"
36 | DEFAULT_FONT_PATH_LINUX = "DejaVuSans.ttf"
37 | DEFAULT_FONT_PATH_MACOS = "Arial.ttf"
38 | 
39 | # Configuration
40 | APP_NAME_FOR_CONFIG = "chrome-lens-py"
41 | DEFAULT_CONFIG_FILENAME = "config.json"
42 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/exceptions.py:
--------------------------------------------------------------------------------
 1 | class LensException(Exception):
 2 |     """Base class for exceptions of this library."""
 3 | 
 4 |     pass
 5 | 
 6 | 
 7 | class LensAPIError(LensException):
 8 |     """Exception for errors related to HTTP requests to the Lens API."""
 9 | 
10 |     def __init__(self, message, status_code=None, response_body=None):
11 |         super().__init__(message)
12 |         self.status_code = status_code
13 |         self.response_body = response_body
14 | 
15 |     def __str__(self):
16 |         msg = super().__str__()
17 |         if self.status_code:
18 |             msg += f" (Status Code: {self.status_code})"
19 |         if self.response_body:
20 |             response_body_str = str(self.response_body)
21 |             if len(response_body_str) > 200:
22 |                 response_body_str = response_body_str[:200] + "..."
23 |             msg += f"\nResponse Body (partial): {response_body_str}"
24 |         return msg
25 | 
26 | 
27 | class LensImageError(LensException):
28 |     """Exception for errors related to image processing."""
29 | 
30 |     pass
31 | 
32 | 
33 | class LensProtobufError(LensException):
34 |     """Exception for errors related to the creation or parsing of Protobuf messages."""
35 | 
36 |     pass
37 | 
38 | 
39 | class LensFontError(LensException):
40 |     """Exception for font-related errors."""
41 | 
42 |     pass
43 | 
44 | 
45 | class LensConfigError(LensException):
46 |     """Exception for configuration-related errors."""
47 | 
48 |     pass
49 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = chrome_lens_py
 3 | version = 3.3.1
 4 | author = Bropines
 5 | author_email = bropines@gmail.com
 6 | description = Python library for Google Lens OCR and Translation using the crupload endpoint.
 7 | long_description = file: README.md
 8 | long_description_content_type = text/markdown
 9 | url = https://github.com/bropines/chrome-lens-py
10 | license = MIT License
11 | classifiers =
12 |     Programming Language :: Python :: 3
13 |     Programming Language :: Python :: 3.8
14 |     Programming Language :: Python :: 3.9
15 |     Programming Language :: Python :: 3.10
16 |     Programming Language :: Python :: 3.11
17 |     Programming Language :: Python :: 3.12
18 |     Operating System :: OS Independent
19 |     Topic :: Scientific/Engineering :: Image Recognition
20 |     Topic :: Software Development :: Libraries :: Python Modules
21 |     Intended Audience :: Developers
22 | 
23 | [options]
24 | package_dir =
25 |     = src 
26 | packages = find: 
27 | python_requires = >=3.8
28 | install_requires =
29 |     httpx[socks] >= 0.20
30 |     Pillow >= 9.0
31 |     betterproto >= 2.0.0b6
32 |     filetype >= 1.0
33 |     rich >= 10.0
34 |     numpy
35 | 
36 | [options.packages.find]
37 | where = src
38 | exclude =
39 |     tests*
40 | 
41 | [options.entry_points]
42 | console_scripts =
43 |     lens_scan = chrome_lens_py.cli.main:run
44 | 
45 | [options.extras_require]
46 | dev =
47 |     black
48 |     isort
49 |     flake8
50 |     mypy
51 |     pytest
52 |     pytest-asyncio
53 | clipboard =
54 |     pyperclip>=1.8
55 | 
56 | [isort]
57 | profile = black
58 | 
59 | [flake8]
60 | max-line-length = 140
61 | extend-ignore =
62 |     E203,
63 |     W503, 
64 |     F401
65 | exclude =
66 |     .git,
67 |     __pycache__,
68 |     build,
69 |     dist,
70 |     venv,
71 |     .venv,
72 |     lens_betterproto.py


--------------------------------------------------------------------------------
/src/chrome_lens_py/utils/general.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from urllib.parse import urlparse
 4 | 
 5 | import filetype  # type: ignore
 6 | 
 7 | from ..constants import SUPPORTED_MIMES_FOR_PREPARE
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | def is_url(string: str) -> bool:
13 |     """Проверяет, является ли строка валидным URL."""
14 |     try:
15 |         result = urlparse(string)
16 |         return all([result.scheme, result.netloc])
17 |     except (ValueError, AttributeError):
18 |         return False
19 | 
20 | 
21 | def is_image_file_supported(path_or_url: str) -> bool:
22 |     """
23 |     Checks if the string is a URL or a supported image file.
24 |     Used in the CLI for quick validation before passing to the API.
25 |     """
26 |     if is_url(path_or_url):
27 |         logger.debug(
28 |             f"'{path_or_url}' is a URL, assuming it's a valid image source for the API."
29 |         )
30 |         return True
31 | 
32 |     if not os.path.isfile(path_or_url):
33 |         return False
34 | 
35 |     try:
36 |         kind = filetype.guess(path_or_url)
37 |         if kind and kind.mime in SUPPORTED_MIMES_FOR_PREPARE:
38 |             return True
39 | 
40 |         ext = os.path.splitext(path_or_url)[1].lower()
41 |         pillow_common_exts = [
42 |             ".png",
43 |             ".jpg",
44 |             ".jpeg",
45 |             ".gif",
46 |             ".bmp",
47 |             ".webp",
48 |             ".tif",
49 |             ".tiff",
50 |         ]
51 |         if ext in pillow_common_exts:
52 |             logger.debug(
53 |                 f"File '{path_or_url}' has a common Pillow extension '{ext}', assuming supported."
54 |             )
55 |             return True
56 | 
57 |     except Exception as e:
58 |         logger.warning(f"Could not guess file type for '{path_or_url}': {e}")
59 |         return True
60 | 
61 |     return False
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | *.pkl
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | older/
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | response_debug.txt
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | Pipfile.lock
 90 | 
 91 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 92 | __pypackages__/
 93 | 
 94 | # Environments
 95 | .env
 96 | .venv
 97 | env/
 98 | venv/
 99 | ENV/
100 | env.bak/
101 | venv.bak/
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | .dmypy.json
109 | dmypy.json
110 | 
111 | # Pyre type checker
112 | .pyre/
113 | 
114 | # Editor / IDE specific
115 | .vscode/
116 | .idea/
117 | *.sublime-project
118 | *.sublime-workspace
119 | 
120 | # egg-info and related files
121 | *.egg-info/
122 | src/chrome_lens_py.egg-info/
123 | 
124 | # mylist
125 | tree.txt
126 | test.py
127 | 


--------------------------------------------------------------------------------
/docs/sharex.md:
--------------------------------------------------------------------------------
 1 | ## Custom ShareX OCR with Google Lens
 2 | 
 3 | It's possible to use the `chrome-lens-py` package with ShareX to OCR images using the Google Lens API, providing a significant upgrade over the default OCR in ShareX. Here's how to set it up:
 4 | 
 5 | 0. Get [ShareX](https://getsharex.com/) if you don't have it already.
 6 | 1. Install Python 3.10+ from the [Python Official website](https://www.python.org/downloads/) or via [Pyenv-WIN](https://github.com/pyenv-win/pyenv-win).
 7 |    **IMPORTANT:** During installation, you **must** check the "Add Python to PATH" option, otherwise this will not work.
 8 | 
 9 | 2. Install the `chrome-lens-py` library with clipboard support:
10 |    ```bash
11 |    pip install "chrome-lens-py[clipboard]"
12 |    ```
13 | 3. Find the path to the installed `lens_scan` executable. Run the following command in PowerShell:
14 |    ```powershell
15 |    (Get-Command lens_scan).Source
16 |    ```
17 |    You will get a path similar to this:
18 |    ```
19 |    C:\Users\bropi\.pyenv\pyenv-win\shims\lens_scan.bat
20 |    ```
21 | 
22 |    Copy this path for the next steps.
23 | 
24 | 4. Open the ShareX main window and navigate to `Hotkey settings...`. Create a new hotkey. For the task, select `Screen capture` -> `Capture region (Light)`.
25 | 
26 | 5. Now, open the settings for that new hotkey (the gear icon).
27 |    - Under the **Tasks** tab, ensure `Capture region (Light)` is selected.
28 |    - Go to the **Actions** tab and check the `Override actions` box.
29 |    - Click **Add...** and set up a new action with the following details:
30 | 
31 |    ![Screenshot of ShareX Action settings](https://github.com/user-attachments/assets/38ac5d3c-0119-496a-92ab-02a63dd2152c)
32 | 
33 |    - **Name:** `Lens OCR` (or any name you prefer)
34 |    - **File path:** Paste the path you copied in step 3. For example:
35 |      - `C:\Users\bropi\.pyenv\pyenv-win\shims\lens_scan.bat`
36 |    - **Arguments:** Enter `"$input" --sharex`
37 |    - Uncheck `Hidden window` if you need to troubleshoot later. Otherwise, leaving it checked is fine.
38 | 
39 | 6. Save the action. Back in the Hotkey settings, make sure your new `Lens OCR` action is checked in the list.
40 | 
41 | 7. You can now close the settings windows. Use your new hotkey to capture a region of your screen. The image will be processed, and the recognized text will be automatically copied to your clipboard.
42 | 
43 | ![GIF demonstrating the OCR process](https://lune.dimden.dev/1bf28abae5b0.gif)
44 | 
45 | ## Troubleshooting
46 | If it takes a long time to process the image and nothing gets copied to your clipboard, an error might be occurring in the script. To see the error, go back to your `Lens OCR` Action settings (step 5), uncheck the **"Hidden window"** option, and run the hotkey again. A console window will appear showing any error messages.
47 | 
48 | ## Updating
49 | To update the package to the latest version, simply run the following command in your terminal:
50 | ```bash
51 | pip install --upgrade "chrome-lens-py[clipboard]"
52 | ```
53 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
  1 | # This workflow will upload a Python Package using Twine when a release is created
  2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
  3 | 
  4 | # This workflow uses actions that are not certified by GitHub.
  5 | # They are provided by a third-party and are governed by
  6 | # separate terms of service, privacy policy, and support
  7 | # documentation.
  8 | 
  9 | name: Upload Python Package
 10 | 
 11 | on:
 12 |   push:
 13 |     paths:
 14 |       - 'src/**'
 15 |       - 'requirements.txt'
 16 |   
 17 |   pull_request:
 18 |     paths:
 19 |       - 'src/**'
 20 |       - 'requirements.txt'
 21 |   workflow_dispatch:
 22 |   release:
 23 |     types: [published]
 24 |     
 25 | 
 26 | permissions:
 27 |   contents: read
 28 | 
 29 | jobs:
 30 |   formating:
 31 |     name: Checking formatting
 32 |     runs-on: ubuntu-latest
 33 |     steps:
 34 |       - uses: actions/checkout@v4
 35 | 
 36 |       - name: Set up Python
 37 |         uses: actions/setup-python@v5.5.0
 38 |         with:
 39 |           python-version: '3.11'
 40 |           cache: 'pip'
 41 |       
 42 |       - name: Install dependencies
 43 |         run: pip install black isort
 44 | 
 45 |       - name: Check formatting with Black
 46 |         run: black --check src/
 47 | 
 48 |       - name: Check imports sorting with isort
 49 |         run: isort --check-only src/ --profile black
 50 |         
 51 |   
 52 |   lint:
 53 |     name: Checking linting errors
 54 |     runs-on: ubuntu-latest
 55 |     steps:
 56 |       - uses: actions/checkout@v4
 57 | 
 58 |       - name: Set up Python
 59 |         uses: actions/setup-python@v5.5.0
 60 |         with:
 61 |           python-version: '3.11'
 62 |           cache: 'pip'
 63 |       
 64 |       - name: Install dependencies
 65 |         run: pip install flake8
 66 |       
 67 |       - name: Lint with flake8
 68 |         run: flake8 src/
 69 |         
 70 |   security:
 71 |     name: Checking security
 72 |     runs-on: ubuntu-latest
 73 |     steps:
 74 |       - uses: actions/checkout@v4
 75 |       
 76 |       - uses: actions/setup-python@v5
 77 |         with:
 78 |           python-version: '3.11'
 79 |           cache: 'pip'
 80 |           
 81 |       - name: Install dependencies
 82 |         run: pip install bandit
 83 |           
 84 |       - name: Run bandit
 85 |         run: bandit -r src/ -lll
 86 | 
 87 |   build:
 88 |     name: Build packages
 89 |     runs-on: ubuntu-latest
 90 |     needs: [lint, security, formating]
 91 |     steps:
 92 |       - uses: actions/checkout@v4
 93 | 
 94 |       - name: Set up Python
 95 |         uses: actions/setup-python@v5
 96 |         with:
 97 |           python-version: '3.8'
 98 |           cache: 'pip'
 99 | 
100 |       - name: Install build
101 |         run: pip install build
102 | 
103 |       - name: Build a binary wheel and a source tarball
104 |         run: python3 -m build
105 | 
106 |       - name: Store the distribution packages
107 |         uses: actions/upload-artifact@v4
108 |         with:
109 |           name: python-package-distributions
110 |           path: dist/
111 | 
112 |   publish-to-pypi:
113 |     name: Publish Python distribution to PyPI
114 |     if: startsWith(github.ref, 'refs/tags/')
115 |     needs:
116 |     - build
117 |     runs-on: ubuntu-latest
118 |     environment:
119 |       name: pypi
120 |       url: https://pypi.org/p/chrome_lens_py
121 |     permissions:
122 |       id-token: write
123 |     steps:
124 |     - name: Download all the dists
125 |       uses: actions/download-artifact@v4
126 |       with:
127 |         name: python-package-distributions
128 |         path: dist/
129 |     - name: Publish distribution to PyPI
130 |       uses: pypa/gh-action-pypi-publish@release/v1.12
131 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/utils/font_manager.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import sys
  4 | from typing import Optional, Union
  5 | 
  6 | from PIL import ImageFont
  7 | 
  8 | from ..constants import (
  9 |     DEFAULT_FONT_PATH_LINUX,
 10 |     DEFAULT_FONT_PATH_MACOS,
 11 |     DEFAULT_FONT_PATH_WINDOWS,
 12 |     DEFAULT_FONT_SIZE_OVERLAY,
 13 | )
 14 | from ..exceptions import LensFontError
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | FontType = Union[ImageFont.FreeTypeFont, ImageFont.ImageFont]
 19 | 
 20 | 
 21 | def get_default_system_font_path() -> Optional[str]:
 22 |     if sys.platform.startswith("win"):
 23 |         font_path = os.path.join(
 24 |             os.environ.get("SystemRoot", "C:\\Windows"),
 25 |             "Fonts",
 26 |             DEFAULT_FONT_PATH_WINDOWS,
 27 |         )
 28 |         if os.path.exists(font_path):
 29 |             return font_path
 30 |     elif sys.platform == "darwin":
 31 |         potential_paths = [
 32 |             f"/System/Library/Fonts/Supplemental/{DEFAULT_FONT_PATH_MACOS}",
 33 |             f"/Library/Fonts/{DEFAULT_FONT_PATH_MACOS}",
 34 |             DEFAULT_FONT_PATH_MACOS,
 35 |         ]
 36 |         for path in potential_paths:
 37 |             try:
 38 |                 ImageFont.truetype(path, DEFAULT_FONT_SIZE_OVERLAY)
 39 |                 return path
 40 |             except IOError:
 41 |                 continue
 42 |     else:  # Linux
 43 |         try:
 44 |             import subprocess
 45 | 
 46 |             result = subprocess.run(
 47 |                 ["fc-match", "-f", "%{file}", DEFAULT_FONT_PATH_LINUX],
 48 |                 capture_output=True,
 49 |                 text=True,
 50 |                 check=False,
 51 |             )
 52 |             if result.returncode == 0 and result.stdout.strip():
 53 |                 return result.stdout.strip()
 54 |         except (FileNotFoundError, Exception) as e:
 55 |             logger.debug(f"Could not find font via fc-match: {e}")
 56 |         return DEFAULT_FONT_PATH_LINUX
 57 | 
 58 |     logger.warning(
 59 |         "Could not automatically determine a default system font path. Please specify via config or --font."
 60 |     )
 61 |     return None
 62 | 
 63 | 
 64 | def get_font(
 65 |     font_path_override: Optional[str] = None, font_size_override: Optional[int] = None
 66 | ) -> FontType:
 67 |     font_size = (
 68 |         font_size_override
 69 |         if font_size_override is not None
 70 |         else DEFAULT_FONT_SIZE_OVERLAY
 71 |     )
 72 |     font_path = font_path_override
 73 | 
 74 |     if not font_path:
 75 |         font_path = get_default_system_font_path()
 76 |         if font_path:
 77 |             logger.debug(f"Using system default font: {font_path}")
 78 |         else:
 79 |             logger.warning(
 80 |                 "No font path specified and system default not found. Pillow will use its built-in default font."
 81 |             )
 82 |             try:
 83 |                 return ImageFont.load_default()
 84 |             except Exception as e:
 85 |                 logger.error(f"Error loading Pillow's default font: {e}")
 86 |                 raise LensFontError(f"Error loading Pillow's default font: {e}")
 87 | 
 88 |     if not font_path:
 89 |         logger.error("Font path is not defined. Cannot load font.")
 90 |         raise LensFontError("The path to the font is not defined.")
 91 | 
 92 |     try:
 93 |         logger.debug(f"Attempting to load font: '{font_path}' with size {font_size}")
 94 |         return ImageFont.truetype(font_path, font_size)
 95 |     except IOError:
 96 |         logger.error(
 97 |             f"Font file not found or cannot be read: {font_path}. Pillow will try its default."
 98 |         )
 99 |         try:
100 |             return ImageFont.load_default()
101 |         except Exception as e:
102 |             logger.error(
103 |                 f"Critical: Could not load specified font '{font_path}' nor Pillow's default font: {e}"
104 |             )
105 |             raise LensFontError(
106 |                 f"Failed to load the '{font_path}' font or the default Pillow font: {e}"
107 |             )
108 |     except Exception as e:
109 |         logger.error(f"Unexpected error loading font '{font_path}': {e}", exc_info=True)
110 |         raise LensFontError(f"Unexpected error while loading font '{font_path}': {e}")
111 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/core/protobuf_builder.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import random
  3 | from typing import TYPE_CHECKING, Optional, Tuple
  4 | 
  5 | from ..constants import (
  6 |     DEFAULT_CLIENT_REGION,
  7 |     DEFAULT_CLIENT_TIME_ZONE,
  8 |     DEFAULT_OCR_LANG,
  9 | )
 10 | from ..exceptions import LensProtobufError
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from ..utils.lens_betterproto import (
 14 |         AppliedFilter,
 15 |         AppliedFilters,
 16 |         AppliedFilterTranslate,
 17 |         ImageData,
 18 |         ImageMetadata,
 19 |         ImagePayload,
 20 |         LensOverlayClientContext,
 21 |         LensOverlayClusterInfo,
 22 |         LensOverlayFilterType,
 23 |         LensOverlayObjectsRequest,
 24 |         LensOverlayRequestContext,
 25 |         LensOverlayRequestId,
 26 |         LensOverlayRoutingInfo,
 27 |         LensOverlayServerRequest,
 28 |         LocaleContext,
 29 |         Platform,
 30 |         Surface,
 31 |     )
 32 | else:
 33 |     from ..utils.lens_betterproto import (
 34 |         AppliedFilter,
 35 |         AppliedFilters,
 36 |         AppliedFilterTranslate,
 37 |         ImageData,
 38 |         ImageMetadata,
 39 |         ImagePayload,
 40 |         LensOverlayClientContext,
 41 |         LensOverlayClusterInfo,
 42 |         LensOverlayFilterType,
 43 |         LensOverlayObjectsRequest,
 44 |         LensOverlayRequestContext,
 45 |         LensOverlayRequestId,
 46 |         LensOverlayRoutingInfo,
 47 |         LensOverlayServerRequest,
 48 |         LocaleContext,
 49 |         Platform,
 50 |         Surface,
 51 |     )
 52 | 
 53 | logger = logging.getLogger(__name__)
 54 | 
 55 | 
 56 | def create_ocr_translate_request(
 57 |     image_bytes: bytes,
 58 |     width: int,
 59 |     height: int,
 60 |     ocr_language: str,
 61 |     target_translation_language: Optional[str] = None,
 62 |     source_translation_language: Optional[str] = None,
 63 |     client_region: Optional[str] = None,
 64 |     client_time_zone: Optional[str] = None,
 65 |     session_uuid: Optional[int] = None,
 66 |     sequence_id: int = 1,
 67 |     image_sequence_id: int = 1,
 68 |     routing_info: Optional["LensOverlayRoutingInfo"] = None,
 69 | ) -> Tuple[bytes, int]:
 70 |     try:
 71 |         server_request = LensOverlayServerRequest()
 72 |         objects_request = LensOverlayObjectsRequest()
 73 |         request_context = LensOverlayRequestContext()
 74 | 
 75 |         uuid_to_use = (
 76 |             session_uuid
 77 |             if session_uuid is not None
 78 |             else random.randint(0, (1 << 63) - 1)
 79 |         )
 80 |         if session_uuid is None:
 81 |             logger.debug(
 82 |                 f"ProtobufBuilder: No session_uuid provided, generated new one: {uuid_to_use}"
 83 |             )
 84 |         else:
 85 |             logger.debug(f"ProtobufBuilder: Using provided session_uuid: {uuid_to_use}")
 86 | 
 87 |         request_id_obj = LensOverlayRequestId(
 88 |             uuid=uuid_to_use,
 89 |             sequence_id=sequence_id,
 90 |             image_sequence_id=image_sequence_id,
 91 |         )
 92 |         if routing_info:
 93 |             request_id_obj.routing_info = routing_info
 94 |         request_context.request_id = request_id_obj
 95 | 
 96 |         effective_client_region = (
 97 |             client_region if client_region is not None else DEFAULT_CLIENT_REGION
 98 |         )
 99 |         effective_client_time_zone = (
100 |             client_time_zone
101 |             if client_time_zone is not None
102 |             else DEFAULT_CLIENT_TIME_ZONE
103 |         )
104 | 
105 |         locale_ctx = LocaleContext(
106 |             language=ocr_language,
107 |             region=effective_client_region,
108 |             time_zone=effective_client_time_zone,
109 |         )
110 |         client_ctx = LensOverlayClientContext(
111 |             platform=Platform.WEB, surface=Surface.CHROMIUM, locale_context=locale_ctx
112 |         )
113 | 
114 |         if target_translation_language:
115 |             translate_options = AppliedFilterTranslate(
116 |                 target_language=target_translation_language
117 |             )
118 |             if source_translation_language:
119 |                 translate_options.source_language = source_translation_language
120 | 
121 |             applied_filter_translate = AppliedFilter(
122 |                 filter_type=LensOverlayFilterType.TRANSLATE, translate=translate_options
123 |             )
124 |             client_ctx.client_filters = AppliedFilters(
125 |                 filter=[applied_filter_translate]
126 |             )
127 | 
128 |         request_context.client_context = client_ctx
129 |         objects_request.request_context = request_context
130 | 
131 |         image_payload_obj = ImagePayload(image_bytes=image_bytes)
132 |         image_metadata_obj = ImageMetadata(width=width, height=height)
133 |         image_data_obj = ImageData(
134 |             payload=image_payload_obj, image_metadata=image_metadata_obj
135 |         )
136 |         objects_request.image_data = image_data_obj
137 |         server_request.objects_request = objects_request
138 | 
139 |         protobuf_payload_bytes = bytes(server_request)
140 |         logger.debug(
141 |             "Protobuf request created. UUID: %s, SeqID: %s, ImgSeqID: %s, Size: %d bytes.",
142 |             uuid_to_use,
143 |             sequence_id,
144 |             image_sequence_id,
145 |             len(protobuf_payload_bytes),
146 |         )
147 |         return protobuf_payload_bytes, uuid_to_use
148 | 
149 |     except TypeError as te:
150 |         logger.error(f"TypeError during Protobuf request creation: {te}", exc_info=True)
151 |         raise LensProtobufError(
152 |             f"Type error when creating a Protobuf request: {te}"
153 |         ) from te
154 |     except Exception as e:
155 |         logger.error(f"Error creating Protobuf request: {e}", exc_info=True)
156 |         raise LensProtobufError(f"Error while creating Protobuf request: {e}") from e
157 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/utils/config_manager.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import os
  4 | from typing import Any, Dict, Optional
  5 | 
  6 | from ..constants import APP_NAME_FOR_CONFIG, DEFAULT_CONFIG_FILENAME
  7 | from ..exceptions import LensConfigError
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | def get_default_config_dir(app_name: str = APP_NAME_FOR_CONFIG) -> str:
 13 |     """Returns the default configuration directory path for the application."""
 14 |     home_dir = os.path.expanduser("~")
 15 |     # This structure is a common convention
 16 |     config_dir_base = os.getenv("XDG_CONFIG_HOME", os.path.join(home_dir, ".config"))
 17 |     return os.path.join(config_dir_base, app_name)
 18 | 
 19 | 
 20 | def load_config(config_file_path: str) -> Dict[str, Any]:
 21 |     """
 22 |     Loads configuration from a JSON file.
 23 |     Returns an empty dictionary if the file is not found.
 24 |     Raises LensConfigError on parsing or I/O errors.
 25 |     """
 26 |     if os.path.isfile(config_file_path):
 27 |         try:
 28 |             with open(config_file_path, "r", encoding="utf-8") as f:
 29 |                 return json.load(f)
 30 |         except json.JSONDecodeError as e:
 31 |             raise LensConfigError(
 32 |                 f"Error decoding JSON from config file '{config_file_path}': {e}"
 33 |             )
 34 |         except IOError as e:
 35 |             raise LensConfigError(
 36 |                 f"I/O error reading config file '{config_file_path}': {e}"
 37 |             )
 38 |     return {}
 39 | 
 40 | 
 41 | def get_effective_config_value(
 42 |     cli_arg_value: Optional[Any], config_file_value: Optional[Any], default_value: Any
 43 | ) -> Any:
 44 |     """Determines the effective configuration value. Priority: CLI > Config File > Default."""
 45 |     if cli_arg_value is not None:
 46 |         return cli_arg_value
 47 |     if config_file_value is not None:
 48 |         return config_file_value
 49 |     return default_value
 50 | 
 51 | 
 52 | def build_app_config(
 53 |     cli_args: Optional[Dict[str, Any]] = None, config_file_path: Optional[str] = None
 54 | ) -> Dict[str, Any]:
 55 |     """
 56 |     Builds the final application config by merging values from CLI args and a config file.
 57 |     """
 58 |     cli = cli_args or {}
 59 |     loaded_config = load_config(config_file_path) if config_file_path else {}
 60 | 
 61 |     if loaded_config:
 62 |         logging.info("Applying settings from config file:")
 63 |         for key, value in loaded_config.items():
 64 |             if key.lower() not in ["api_key", "proxy"]:
 65 |                 logging.info(f"  - {key}: {value}")
 66 | 
 67 |     # Priority: CLI > Config File > Default (handled by get_effective_config_value)
 68 |     # Defaults are defined in constants.py or as literals here.
 69 |     from ..constants import (
 70 |         DEFAULT_API_KEY,
 71 |         DEFAULT_CLIENT_REGION,
 72 |         DEFAULT_CLIENT_TIME_ZONE,
 73 |     )
 74 | 
 75 |     final_config = {
 76 |         "api_key": get_effective_config_value(
 77 |             cli.get("api_key"), loaded_config.get("api_key"), DEFAULT_API_KEY
 78 |         ),
 79 |         "client_region": get_effective_config_value(
 80 |             cli.get("client_region"),
 81 |             loaded_config.get("client_region"),
 82 |             DEFAULT_CLIENT_REGION,
 83 |         ),
 84 |         "client_time_zone": get_effective_config_value(
 85 |             cli.get("client_time_zone"),
 86 |             loaded_config.get("client_time_zone"),
 87 |             DEFAULT_CLIENT_TIME_ZONE,
 88 |         ),
 89 |         "proxy": get_effective_config_value(
 90 |             cli.get("proxy"), loaded_config.get("proxy"), None
 91 |         ),
 92 |         "timeout": int(
 93 |             get_effective_config_value(
 94 |                 cli.get("timeout"), loaded_config.get("timeout"), 60
 95 |             )
 96 |         ),
 97 |         "font_path": get_effective_config_value(
 98 |             cli.get("font_path"), loaded_config.get("font_path"), None
 99 |         ),
100 |         "font_size": (
101 |             int(
102 |                 get_effective_config_value(
103 |                     cli.get("font_size"), loaded_config.get("font_size"), 20
104 |                 )
105 |             )
106 |             if get_effective_config_value(
107 |                 cli.get("font_size"), loaded_config.get("font_size"), None
108 |             )
109 |             is not None
110 |             else None
111 |         ),
112 |         "logging_level": get_effective_config_value(
113 |             cli.get("logging_level"), loaded_config.get("logging_level"), "WARNING"
114 |         ).upper(),
115 |         "ocr_preserve_line_breaks": get_effective_config_value(
116 |             cli.get("ocr_preserve_line_breaks"),
117 |             loaded_config.get("ocr_preserve_line_breaks"),
118 |             True,
119 |         ),
120 |     }
121 |     return final_config
122 | 
123 | 
124 | def update_config_file_from_cli(cli_args: Dict[str, Any], config_file_path: str):
125 |     """Updates the config file with values from CLI args (only safe fields)."""
126 |     current_config = load_config(config_file_path)
127 | 
128 |     fields_to_update = [
129 |         "client_region",
130 |         "client_time_zone",
131 |         "proxy",
132 |         "timeout",
133 |         "font_path",
134 |         "font_size",
135 |         "logging_level",
136 |         "ocr_preserve_line_breaks",
137 |     ]
138 |     updated = False
139 |     for field in fields_to_update:
140 |         cli_value = cli_args.get(field)
141 |         if cli_value is not None and current_config.get(field) != cli_value:
142 |             current_config[field] = cli_value
143 |             updated = True
144 | 
145 |     if not updated:
146 |         logging.info("No configuration changes to save from CLI arguments.")
147 |         return
148 | 
149 |     config_dir = os.path.dirname(config_file_path)
150 |     try:
151 |         if not os.path.exists(config_dir):
152 |             os.makedirs(config_dir)
153 |         with open(config_file_path, "w", encoding="utf-8") as f:
154 |             json.dump(current_config, f, indent=4, ensure_ascii=False)
155 |         logging.info(f"Configuration file updated: {config_file_path}")
156 |     except (IOError, TypeError) as e:
157 |         raise LensConfigError(f"Error saving config file '{config_file_path}': {e}")
158 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/core/request_handler.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
  3 | 
  4 | import httpx
  5 | 
  6 | from ..constants import DEFAULT_HEADERS, LENS_CRUPLOAD_ENDPOINT
  7 | from ..exceptions import LensAPIError, LensProtobufError
  8 | 
  9 | if TYPE_CHECKING:
 10 |     from ..utils.lens_betterproto import (
 11 |         LensOverlayClusterInfo,
 12 |         LensOverlayRoutingInfo,
 13 |         LensOverlayServerResponse,
 14 |     )
 15 | else:
 16 |     from ..utils.lens_betterproto import (
 17 |         LensOverlayClusterInfo,
 18 |         LensOverlayRoutingInfo,
 19 |         LensOverlayServerResponse,
 20 |     )
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | class LensRequestHandler:
 26 |     def __init__(
 27 |         self,
 28 |         api_key: str,
 29 |         proxy: Optional[Union[str, Dict[str, httpx.AsyncBaseTransport]]] = None,
 30 |         timeout: int = 60,
 31 |     ):
 32 |         self.api_key = api_key
 33 |         self.proxy_settings: Dict[str, Any] = {}
 34 |         self.timeout = timeout
 35 | 
 36 |         if proxy:
 37 |             if isinstance(proxy, str):
 38 |                 self.proxy_settings["proxy"] = proxy
 39 |                 logger.info(f"Using single proxy URL: {proxy}")
 40 |             elif isinstance(proxy, dict):
 41 |                 self.proxy_settings["mounts"] = proxy
 42 |                 logger.info(f"Using proxy mounts configuration: {proxy}")
 43 |             else:
 44 |                 logger.warning(
 45 |                     f"Invalid proxy type: {type(proxy)}. Proxy will not be used."
 46 |                 )
 47 | 
 48 |         self.current_session_uuid: Optional[int] = None
 49 |         self.current_sequence_id: int = 0
 50 |         self.current_image_sequence_id: int = 0
 51 |         self.last_cluster_info: Optional["LensOverlayClusterInfo"] = None
 52 | 
 53 |     def _get_headers(self) -> dict:
 54 |         headers = DEFAULT_HEADERS.copy()
 55 |         headers["X-Goog-Api-Key"] = self.api_key
 56 |         return headers
 57 | 
 58 |     def start_new_session(self):
 59 |         self.current_session_uuid = None
 60 |         self.current_sequence_id = 0
 61 |         self.current_image_sequence_id = 0
 62 |         self.last_cluster_info = None
 63 |         logger.info("LensRequestHandler: New session initiated (state reset).")
 64 | 
 65 |     def get_next_sequence_ids_for_request(
 66 |         self, is_new_image_payload: bool
 67 |     ) -> Tuple[Optional[int], int, int]:
 68 |         self.current_sequence_id += 1
 69 |         if is_new_image_payload:
 70 |             self.current_image_sequence_id += 1
 71 | 
 72 |         logger.debug(
 73 |             f"RequestHandler: Providing IDs for request: "
 74 |             f"SessionUUID (current): {self.current_session_uuid}, "
 75 |             f"Next SeqID: {self.current_sequence_id}, "
 76 |             f"Next ImgSeqID: {self.current_image_sequence_id} (is_new_image: {is_new_image_payload})"
 77 |         )
 78 |         return (
 79 |             self.current_session_uuid,
 80 |             self.current_sequence_id,
 81 |             self.current_image_sequence_id,
 82 |         )
 83 | 
 84 |     async def send_request(
 85 |         self, protobuf_payload: bytes, request_uuid_used: int
 86 |     ) -> "LensOverlayServerResponse":
 87 |         headers = self._get_headers()
 88 | 
 89 |         if self.current_session_uuid is None:
 90 |             self.current_session_uuid = request_uuid_used
 91 |             logger.info(
 92 |                 f"RequestHandler: Session UUID initialized by this request: {self.current_session_uuid}"
 93 |             )
 94 | 
 95 |         logger.info(
 96 |             "Sending request to %s (UUID: %s, SeqID: %s) with payload size: %d bytes.",
 97 |             LENS_CRUPLOAD_ENDPOINT,
 98 |             self.current_session_uuid,
 99 |             self.current_sequence_id,
100 |             len(protobuf_payload),
101 |         )
102 | 
103 |         response_bytes = b""
104 |         async with httpx.AsyncClient(**self.proxy_settings, http2=True) as client:
105 |             try:
106 |                 response = await client.post(
107 |                     LENS_CRUPLOAD_ENDPOINT,
108 |                     content=protobuf_payload,
109 |                     headers=headers,
110 |                     timeout=self.timeout,
111 |                 )
112 |                 logger.debug(f"Response status: {response.status_code}")
113 |                 response_bytes = await response.aread()
114 |                 response.raise_for_status()
115 | 
116 |                 logger.debug(f"Response content length: {len(response_bytes)} bytes.")
117 | 
118 |                 server_response_proto = LensOverlayServerResponse().parse(
119 |                     response_bytes
120 |                 )
121 | 
122 |                 if (
123 |                     server_response_proto.error
124 |                     and server_response_proto.error.error_type != 0
125 |                 ):
126 |                     error_msg = f"Lens API server error. Type: {server_response_proto.error.error_type}"
127 |                     logger.error(error_msg)
128 |                     raise LensAPIError(
129 |                         error_msg,
130 |                         status_code=response.status_code,
131 |                         response_body=response_bytes.decode(errors="replace"),
132 |                     )
133 | 
134 |                 if (
135 |                     server_response_proto.objects_response
136 |                     and server_response_proto.objects_response.cluster_info
137 |                 ):
138 |                     self.last_cluster_info = (
139 |                         server_response_proto.objects_response.cluster_info
140 |                     )
141 |                     if (
142 |                         self.last_cluster_info
143 |                         and self.last_cluster_info.server_session_id
144 |                     ):
145 |                         logger.debug(
146 |                             f"RequestHandler: Updated last_cluster_info. ServerSessionID: {self.last_cluster_info.server_session_id}, "
147 |                             f"RoutingInfo available: {bool(self.last_cluster_info.routing_info)}"
148 |                         )
149 |                 else:
150 |                     self.last_cluster_info = None
151 |                     logger.debug(
152 |                         "RequestHandler: No cluster_info in response or no objects_response."
153 |                     )
154 | 
155 |                 return server_response_proto
156 | 
157 |             except httpx.HTTPStatusError as e_http:
158 |                 response_text_content = e_http.response.text
159 |                 logger.error(
160 |                     f"HTTP error: {e_http.response.status_code} - {response_text_content[:500]}",
161 |                     exc_info=True,
162 |                 )
163 |                 raise LensAPIError(
164 |                     f"HTTP ошибка: {e_http.response.status_code}",
165 |                     status_code=e_http.response.status_code,
166 |                     response_body=response_text_content,
167 |                 ) from e_http
168 |             except httpx.RequestError as e_req:
169 |                 logger.error(
170 |                     f"Request error (possibly proxy-related): {e_req}", exc_info=True
171 |                 )
172 |                 raise LensAPIError(
173 |                     f"Ошибка сети или запроса (возможно, связана с прокси): {e_req}"
174 |                 ) from e_req
175 |             except (LensProtobufError, ValueError) as e_parse:
176 |                 logger.error(
177 |                     f"Error parsing Protobuf response: {e_parse}", exc_info=True
178 |                 )
179 |                 try:
180 |                     decoded_for_error = response_bytes.decode(errors="replace")
181 |                 except AttributeError:
182 |                     decoded_for_error = str(response_bytes)
183 |                 raise LensProtobufError(
184 |                     f"Protobuf response parsing error: {e_parse}",
185 |                     response_body=decoded_for_error,
186 |                 ) from e_parse
187 |             except Exception as e_gen:
188 |                 logger.error(f"Unexpected error during request: {e_gen}", exc_info=True)
189 |                 raise LensAPIError(
190 |                     f"Unexpected error while executing the request: {e_gen}"
191 |                 )
192 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/core/image_processor.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import logging
  3 | import math
  4 | from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
  5 | 
  6 | import httpx
  7 | import numpy as np
  8 | from PIL import Image, ImageDraw, ImageFile, ImageFont
  9 | 
 10 | from ..constants import DEFAULT_IMAGE_MAX_DIMENSION
 11 | from ..exceptions import LensImageError
 12 | from ..utils.font_manager import FontType
 13 | from ..utils.general import is_url
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from ..utils.lens_betterproto import CenterRotatedBox, CoordinateType
 17 | else:
 18 |     from ..utils.lens_betterproto import CenterRotatedBox, CoordinateType
 19 | 
 20 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | 
 24 | async def _get_pil_from_source(image_source: Any) -> Image.Image:
 25 |     """
 26 |     Takes any supported source and returns a PIL.Image object.
 27 |     Raises LensImageError if the source is unsupported or an error occurs.
 28 |     """
 29 |     if isinstance(image_source, Image.Image):
 30 |         logger.debug("Processing PIL.Image object source.")
 31 |         return image_source.copy()
 32 | 
 33 |     if isinstance(image_source, str):
 34 |         if is_url(image_source):
 35 |             logger.debug(f"Processing URL source: {image_source}")
 36 |             try:
 37 |                 async with httpx.AsyncClient(timeout=30) as client:
 38 |                     response = await client.get(image_source, follow_redirects=True)
 39 |                     response.raise_for_status()
 40 |                 return Image.open(io.BytesIO(response.content))
 41 |             except httpx.RequestError as e:
 42 |                 raise LensImageError(
 43 |                     f"Network error downloading URL '{image_source}': {e}"
 44 |                 ) from e
 45 |             except Exception as e:
 46 |                 raise LensImageError(
 47 |                     f"Error processing URL '{image_source}': {e}"
 48 |                 ) from e
 49 |         else:  # It's a file path
 50 |             logger.debug(f"Processing file path source: {image_source}")
 51 |             try:
 52 |                 return Image.open(image_source)
 53 |             except FileNotFoundError:
 54 |                 raise LensImageError(f"File not found at path: {image_source}")
 55 |             except Exception as e:
 56 |                 raise LensImageError(
 57 |                     f"Error opening file path '{image_source}': {e}"
 58 |                 ) from e
 59 | 
 60 |     if isinstance(image_source, np.ndarray):
 61 |         logger.debug("Processing NumPy array source.")
 62 |         try:
 63 |             return Image.fromarray(image_source)
 64 |         except Exception as e:
 65 |             raise LensImageError(f"Error converting NumPy array to image: {e}") from e
 66 | 
 67 |     if isinstance(image_source, bytes):
 68 |         logger.debug("Processing bytes source.")
 69 |         try:
 70 |             return Image.open(io.BytesIO(image_source))
 71 |         except Exception as e:
 72 |             raise LensImageError(f"Error opening image from bytes: {e}") from e
 73 | 
 74 |     raise LensImageError(f"Unsupported image source type: {type(image_source)}")
 75 | 
 76 | 
 77 | def _resize_and_serialize_pil_image(pil_image: Image.Image) -> Tuple[bytes, int, int]:
 78 |     """Resizes (if necessary) and serializes a PIL.Image to PNG bytes."""
 79 |     if pil_image.mode != "RGBA":
 80 |         pil_image = pil_image.convert("RGBA")
 81 | 
 82 |     if (
 83 |         pil_image.width > DEFAULT_IMAGE_MAX_DIMENSION
 84 |         or pil_image.height > DEFAULT_IMAGE_MAX_DIMENSION
 85 |     ):
 86 |         pil_image.thumbnail(
 87 |             (DEFAULT_IMAGE_MAX_DIMENSION, DEFAULT_IMAGE_MAX_DIMENSION),
 88 |             Image.Resampling.LANCZOS,
 89 |         )
 90 | 
 91 |     img_byte_arr = io.BytesIO()
 92 |     pil_image.save(img_byte_arr, format="PNG")
 93 | 
 94 |     return img_byte_arr.getvalue(), pil_image.width, pil_image.height
 95 | 
 96 | 
 97 | async def prepare_image_for_api(
 98 |     image_source: Any,
 99 | ) -> Tuple[bytes, int, int, Image.Image]:
100 |     """
101 |     Main preparation function. Takes any source, processes it, and returns API-ready data and the original image.
102 |     """
103 |     try:
104 |         pil_image = await _get_pil_from_source(image_source)
105 |         original_pil_image = pil_image.copy()
106 |         img_bytes, width, height = _resize_and_serialize_pil_image(pil_image)
107 |         return img_bytes, width, height, original_pil_image
108 |     except LensImageError as e:
109 |         raise e
110 |     except Exception as e:
111 |         raise LensImageError(
112 |             f"An unexpected error occurred during image preparation: {e}"
113 |         ) from e
114 | 
115 | 
116 | def get_word_geometry_data(box: "CenterRotatedBox") -> Optional[Dict[str, Any]]:
117 |     """Extracts detailed, user-friendly geometry data from a CenterRotatedBox object."""
118 |     if not (hasattr(box, "center_x") and hasattr(box, "center_y")):
119 |         return None
120 | 
121 |     angle_rad = getattr(box, "rotation_z", 0.0)
122 |     angle_deg = math.degrees(angle_rad)
123 | 
124 |     coord_type_enum = getattr(box, "coordinate_type", 0)
125 |     coord_type_str = "NORMALIZED" if coord_type_enum == 1 else "IMAGE"
126 | 
127 |     return {
128 |         "center_x": box.center_x,
129 |         "center_y": box.center_y,
130 |         "width": getattr(box, "width", 0.0),
131 |         "height": getattr(box, "height", 0.0),
132 |         "angle_deg": angle_deg,
133 |         "coordinate_type": coord_type_str,
134 |     }
135 | 
136 | 
137 | def draw_overlay_on_image(
138 |     original_image: Image.Image,
139 |     ocr_boxes_norm: list[Tuple[float, float, float, float]],
140 |     translated_text: Optional[str],
141 |     font: FontType,
142 |     fill_color: str = "white",
143 |     text_color: str = "black",
144 | ) -> Image.Image:
145 |     """Draws an overlay on the image: fills OCR areas and writes translated text."""
146 |     img_draw = original_image.copy()
147 |     if img_draw.mode != "RGBA":
148 |         img_draw = img_draw.convert("RGBA")
149 |     draw = ImageDraw.Draw(img_draw)
150 |     img_width, img_height = img_draw.size
151 | 
152 |     if not ocr_boxes_norm:
153 |         return img_draw
154 | 
155 |     for norm_x1, norm_y1, norm_x2, norm_y2 in ocr_boxes_norm:
156 |         draw.rectangle(
157 |             (
158 |                 int(norm_x1 * img_width),
159 |                 int(norm_y1 * img_height),
160 |                 int(norm_x2 * img_width),
161 |                 int(norm_y2 * img_height),
162 |             ),
163 |             fill=fill_color,
164 |         )
165 | 
166 |     if not translated_text:
167 |         return img_draw
168 | 
169 |     overall_ocr_min_x = min(b[0] for b in ocr_boxes_norm)
170 |     overall_ocr_min_y = min(b[1] for b in ocr_boxes_norm)
171 |     overall_ocr_max_x = max(b[2] for b in ocr_boxes_norm)
172 |     overall_ocr_max_y = max(b[3] for b in ocr_boxes_norm)
173 | 
174 |     px_overall_x1 = int(overall_ocr_min_x * img_width)
175 |     px_overall_y1 = int(overall_ocr_min_y * img_height)
176 |     px_overall_x2 = int(overall_ocr_max_x * img_width)
177 |     px_overall_y2 = int(overall_ocr_max_y * img_height)
178 | 
179 |     overlay_width_px = px_overall_x2 - px_overall_x1
180 |     if overlay_width_px <= 0:
181 |         return img_draw
182 | 
183 |     padding = 4
184 |     available_width_for_text = overlay_width_px - 2 * padding
185 |     if available_width_for_text <= 0:
186 |         return img_draw
187 | 
188 |     lines_to_draw = []
189 |     current_line = ""
190 |     for word in translated_text.split():
191 |         test_line = f"{current_line} {word}".strip()
192 |         try:
193 |             line_width = draw.textlength(test_line, font=font)
194 |         except AttributeError:
195 |             bbox = draw.textbbox((0, 0), test_line, font=font)
196 |             line_width = bbox[2] - bbox[0]
197 | 
198 |         if line_width <= available_width_for_text:
199 |             current_line = test_line
200 |         else:
201 |             if current_line:
202 |                 lines_to_draw.append(current_line)
203 |             current_line = word
204 |     if current_line:
205 |         lines_to_draw.append(current_line)
206 | 
207 |     current_y = px_overall_y1 + padding
208 |     line_spacing = 2
209 |     for line_str in lines_to_draw:
210 |         try:
211 |             bbox = draw.textbbox((0, 0), line_str, font=font)
212 |             line_height = bbox[3] - bbox[1]
213 |             line_width = bbox[2] - bbox[0]
214 | 
215 |             if current_y + line_height > px_overall_y2 - padding:
216 |                 break
217 | 
218 |             pos_x = px_overall_x1 + (overlay_width_px - line_width) / 2
219 |             draw.text(
220 |                 (pos_x, current_y),
221 |                 line_str,
222 |                 fill=text_color,
223 |                 font=font,
224 |             )
225 |             current_y += line_height + line_spacing
226 |         except Exception as e:
227 |             logger.warning(f"Could not draw line '{line_str}': {e}")
228 |             if hasattr(font, "size"):
229 |                 line_height = font.size  # type: ignore [attr-defined]
230 |             else:
231 |                 line_height = 12
232 |             current_y += line_height + line_spacing
233 |             continue
234 | 
235 |     return img_draw
236 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Chrome Lens API for Python
  2 | 
  3 | **English** | [Русский](/README_RU.md)
  4 | 
  5 | [![PyPI version](https://badge.fury.io/py/chrome-lens-py.svg)](https://badge.fury.io/py/chrome-lens-py)
  6 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
  7 | [![Python versions](https://img.shields.io/pypi/pyversions/chrome-lens-py.svg)](https://pypi.org/project/chrome-lens-py)
  8 | [![Downloads](https://static.pepy.tech/badge/chrome-lens-py)](https://pepy.tech/project/chrome-lens-py)
  9 | 
 10 | > [!IMPORTANT]
 11 | > **Major Rewrite (Version 3.1.0+)**
 12 | > This library has been completely rewritten from the ground up. It now uses a modern asynchronous architecture (`async`/`await`) and communicates directly with Google's Protobuf endpoint for significantly improved reliability and performance.
 13 | >
 14 | > **Please update your projects accordingly. All API calls are now `async`.**
 15 | >
 16 | 
 17 | > [!Warning]
 18 | > Also, please note that the library has been completely rewritten, and I could have missed something, or not spelled it out. If you notice an error, please let me know in Issues
 19 | 
 20 | This project provides a powerful, asynchronous Python library and command-line tool for interacting with Google Lens. It allows you to perform advanced Optical Character Recognition (OCR), get segmented text blocks (e.g., for comics), translate text, and get precise word coordinates.
 21 | 
 22 | ## ✨ Key Features
 23 | 
 24 | -   **Modern Backend**: Utilizes Google's official Protobuf endpoint (`v1/crupload`) for robust and accurate results.
 25 | -   **Asynchronous & Safe**: Built with `asyncio` and `httpx`. Includes a built-in semaphore to prevent API abuse and IP bans from excessive concurrent requests.
 26 | -   **Powerful OCR & Segmentation**:
 27 |     -   Extract text from images as a single string.
 28 |     -   Get text segmented into logical blocks (paragraphs, dialog bubbles) with their own coordinates.
 29 |     -   Get individual text lines with their own precise geometry.
 30 | -   **Built-in Translation**: Instantly translate recognized text into any supported language.
 31 | -   **Versatile Image Sources**: Process images from a **file path**, **URL**, **bytes**, **PIL Image** object, or **NumPy array**.
 32 | -   **Text Overlay**: Automatically generate and save images with the translated text rendered over them(works poorly, alas, no time to do better).
 33 | -   **Feature-Rich CLI**: A simple yet powerful command-line interface (`lens_scan`) for quick use.
 34 | -   **Proxy Support**: Full support for HTTP, HTTPS, and SOCKS proxies.
 35 | -   **Clipboard Integration**: Instantly copy OCR or translation results to your clipboard with the `--sharex` flag.
 36 | -   **Flexible Configuration**: Manage settings via a `config.json` file, CLI arguments, or environment variables.
 37 | 
 38 | ## 🚀 Installation
 39 | 
 40 | You can install the package using `pip`:
 41 | 
 42 | ```bash
 43 | pip install chrome-lens-py
 44 | ```
 45 | 
 46 | To enable clipboard functionality (the `--sharex` flag), install the library with the `[clipboard]` extra:
 47 | 
 48 | ```bash
 49 | pip install "chrome-lens-py[clipboard]"
 50 | ```
 51 | 
 52 | Or, install the latest version directly from GitHub:
 53 | ```bash
 54 | pip install git+https://github.com/bropines/chrome-lens-py.git
 55 | ```
 56 | 
 57 | ## 🚀 Usage
 58 | 
 59 | <details>
 60 |   <summary><b>🛠️ CLI Usage (`lens_scan`)</b></summary>
 61 | 
 62 |   The command-line tool provides quick access to the library's features directly from your terminal.
 63 | 
 64 |   ```bash
 65 |   lens_scan <image_source> [ocr_lang] [options]
 66 |   ```
 67 | 
 68 |   -   **`<image_source>`**: Path to a local image file or an image URL.
 69 |   -   **`[ocr_lang]`** (optional): BCP 47 language code for OCR (e.g., 'en', 'ja'). If omitted, the API will attempt to auto-detect the language.
 70 | 
 71 |   #### **Options**
 72 | 
 73 | | Flag | Alias | Description |
 74 | | :--- | :--- | :--- |
 75 | | `--translate <lang>` | `-t` | **Translate** the OCR text to the target language code (e.g., `en`, `ru`). |
 76 | | `--translate-from <lang>` | | Specify the source language for translation (otherwise auto-detected). |
 77 | | `--translate-out <path>` | `-to` | **Save** the image with the translated text overlaid to the specified file path. |
 78 | | `--output-blocks` | `-b` | **Output OCR text as segmented blocks** (useful for comics). Incompatible with `--get-coords` and `--output-lines`.|
 79 | | `--output-lines` | `-ol` | **Output OCR text as individual lines** with their geometry. Incompatible with `--output-blocks` and `--get-coords`.|
 80 | | `--get-coords` | | Output recognized words and their coordinates in JSON format. Incompatible with `--output-blocks` and `--output-lines`. |
 81 | | `--sharex` | `-sx` | **Copy** the result (translation or OCR) to the clipboard. |
 82 | | `--ocr-single-line` | | Join all recognized OCR text into a single line, removing line breaks. |
 83 | | `--config-file <path>`| | Path to a custom JSON configuration file. |
 84 | | `--update-config` | | Update the default config file with settings from the current command. |
 85 | | `--font <path>` | | Path to a `.ttf` font file for the text overlay. |
 86 | | `--font-size <size>` | | Font size for the text overlay (default: 20). |
 87 | | `--proxy <url>` | | Proxy server URL (e.g., `socks5://127.0.0.1:9050`). |
 88 | | `--logging-level <lvl>`| `-l` | Set logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`). |
 89 | | `--help` | `-h` | Show this help message and exit. |
 90 | 
 91 |   #### **Examples**
 92 | 
 93 |   **1. Basic OCR and Translation**
 94 |   
 95 |   Auto-detects the source language on the image and translates it to English. This is the most common use case.
 96 |   ```bash
 97 |   lens_scan "path/to/your/image.png" -t en
 98 |   ```
 99 | 
100 |   ---
101 |   
102 |   **2. Get Segmented Text Blocks (for Comics/Manga)**
103 | 
104 |   Ideal for images with multiple, separate text boxes. This command outputs each recognized text block individually, making it perfect for translating comics or complex documents.
105 |   ```bash
106 |   lens_scan "path/to/manga.jpg" ja -b
107 |   ```
108 |   - `-b` is the alias for `--output-blocks`.
109 | 
110 |   ---
111 |   
112 |   **3. Get Individual Text Lines**
113 |   
114 |   Outputs each recognized line of text along with its geometry.
115 |   ```bash
116 |   lens_scan "path/to/document.png" --output-lines
117 |   ```
118 |   - `-ol` is the alias for `--output-lines`.
119 | 
120 |   ---
121 | 
122 |   **4. Get Coordinates of All Individual Words**
123 |   
124 |   Outputs a detailed JSON array containing every single recognized word and its precise geometric data (center, size, angle). Useful for programmatic analysis or custom overlays.
125 |   ```bash
126 |   lens_scan "path/to/diagram.png" --get-coords
127 |   ```
128 |   
129 |   ---
130 | 
131 |   **5. Translate, Save Overlay, and Copy to Clipboard**
132 |   
133 |   A power-user workflow. This command will:
134 |   1. OCR a Japanese image.
135 |   2. Translate it to Russian.
136 |   3. Save a new image named `translated_manga.png` with the Russian text rendered on it.
137 |   4. Copy the final translation to your clipboard.
138 |   ```bash
139 |   lens_scan "path/to/manga.jpg" ja -t ru -to "translated_manga.png" -sx
140 |   ```
141 | 
142 |   ---
143 | 
144 |   **6. Process an Image from a URL as a Single Line**
145 | 
146 |   Fetches an image directly from a URL and joins all recognized text into one continuous line, removing any line breaks.
147 |   ```bash
148 |   lens_scan "https://i.imgur.com/VPd1y6b.png" en --ocr-single-line
149 |   ```
150 | 
151 |   ---
152 | 
153 |   **7. Use a SOCKS5 Proxy**
154 |   
155 |   All requests to the Google API will be routed through the specified proxy server, which is useful for privacy or bypassing region restrictions.
156 |   ```bash
157 |   lens_scan "image.png" --proxy "socks5://127.0.0.1:9050"
158 |   ```
159 | 
160 | </details>
161 | 
162 | <details>
163 |   <summary><b>👨‍💻 Programmatic API Usage (`LensAPI`)</b></summary>
164 |   
165 |   > [!IMPORTANT]
166 |   > The `LensAPI` is fully **asynchronous**. All data retrieval methods must be called with `await` from within an `async` function.
167 | 
168 |   #### **Basic Example (Full Text)**
169 |   
170 |   ```python
171 |   import asyncio
172 |   from chrome_lens_py import LensAPI
173 | 
174 |   async def main():
175 |       # Initialize the API. You can pass a proxy, region, etc. here.
176 |       # By default, an API key is not required.
177 |       api = LensAPI()
178 | 
179 |       image_source = "path/to/your/image.png" # Or a URL, PIL Image, NumPy array
180 | 
181 |       try:
182 |           # Process the image and get a single string of text
183 |           result = await api.process_image(
184 |               image_path=image_source,
185 |               ocr_language="ja",
186 |               target_translation_language="en"
187 |           )
188 | 
189 |           print("--- OCR Text ---")
190 |           print(result.get("ocr_text"))
191 | 
192 |           print("\n--- Translated Text ---")
193 |           print(result.get("translated_text"))
194 |           
195 |       except Exception as e:
196 |           print(f"An error occurred: {e}")
197 | 
198 |   if __name__ == "__main__":
199 |       asyncio.run(main())
200 |   ```
201 |   
202 |   #### **Working with Different Image Sources**
203 | 
204 |   The `process_image` method seamlessly handles various input types.
205 | 
206 |   ```python
207 |   from PIL import Image
208 |   import numpy as np
209 | 
210 |   # ... inside an async function ...
211 |   
212 |   # From a URL
213 |   result_url = await api.process_image("https://i.imgur.com/VPd1y6b.png")
214 | 
215 |   # From a PIL Image object
216 |   with Image.open("path/to/image.png") as img:
217 |       result_pil = await api.process_image(img)
218 | 
219 |   # From a NumPy array (e.g., loaded via OpenCV)
220 |   with Image.open("path/to/image.png") as img:
221 |       numpy_array = np.array(img)
222 |       result_numpy = await api.process_image(numpy_array)
223 |   ```
224 | 
225 |   #### **Getting Segmented Text Blocks**
226 | 
227 |   To get text segmented into logical blocks (like dialog bubbles in a comic), use the `output_format='blocks'` parameter.
228 | 
229 |   ```python
230 |   import asyncio
231 |   from chrome_lens_py import LensAPI
232 | 
233 |   async def process_comics():
234 |       api = LensAPI()
235 |       image_source = "path/to/manga.jpg"
236 |       
237 |       result = await api.process_image(
238 |           image_path=image_source,
239 |           output_format='blocks' # Get segmented blocks instead of a single string
240 |       )
241 | 
242 |       # The result now contains a 'text_blocks' key
243 |       text_blocks = result.get("text_blocks", [])
244 |       print(f"Found {len(text_blocks)} text blocks.")
245 | 
246 |       for i, block in enumerate(text_blocks):
247 |           print(f"\n--- Block #{i+1} ---")
248 |           print(block['text'])
249 |           # block also contains 'lines' and 'geometry' keys
250 |   
251 |   asyncio.run(process_comics())
252 |   ```
253 | 
254 |   #### **Getting Individual Lines and their Geometry**
255 | 
256 |   To get each recognized line of text as a separate item, use the `output_format='lines'` parameter.
257 | 
258 |   ```python
259 |   import asyncio
260 |   from chrome_lens_py import LensAPI
261 | 
262 |   async def process_document_lines():
263 |       api = LensAPI()
264 |       image_source = "path/to/document.png"
265 |       
266 |       result = await api.process_image(
267 |           image_path=image_source,
268 |           output_format='lines' # Get individual lines with their geometry
269 |       )
270 | 
271 |       # The result now contains a 'line_blocks' key
272 |       line_blocks = result.get("line_blocks", [])
273 |       print(f"Found {len(line_blocks)} lines.")
274 | 
275 |       for i, line in enumerate(line_blocks):
276 |           print(f"\n--- Line #{i+1} ---")
277 |           print(f"Text: {line['text']}")
278 |           print(f"Geometry: {line['geometry']}")
279 |   
280 |   asyncio.run(process_document_lines())
281 |   ```
282 | 
283 |   #### **Getting Fully Detailed Text Structures**
284 | 
285 | To get a complete, nested structure of paragraphs, lines, and words with geometry at each level, use `output_format='detailed'`.
286 | 
287 | ```python
288 | import asyncio
289 | from chrome_lens_py import LensAPI
290 | 
291 | async def process_with_details():
292 |     api = LensAPI()
293 |     image_source = "path/to/document.png"
294 |     
295 |     result = await api.process_image(
296 |         image_path=image_source,
297 |         output_format='detailed' # Get the fully nested structure
298 |     )
299 | 
300 |     # The result now contains a 'detailed_blocks' key
301 |     detailed_blocks = result.get("detailed_blocks", [])
302 |     print(f"Found {len(detailed_blocks)} detailed blocks.")
303 | 
304 |     for i, block in enumerate(detailed_blocks):
305 |         print(f"\n--- Block #{i+1} ---")
306 |         print(f"  Geometry: {block['geometry']}")
307 |         for j, line in enumerate(block['lines']):
308 |             print(f"    --- Line #{j+1}: '{line['text']}' ---")
309 |             for k, word in enumerate(line['words']):
310 |                  print(f"      - Word: '{word['text']}', Geometry: {word['geometry']}")
311 | 
312 | asyncio.run(process_with_details())
313 | ```
314 | 
315 | 
316 |   #### **`LensAPI` Constructor**
317 | 
318 |   ```python
319 |   api = LensAPI(
320 |       api_key: str = "YOUR_API_KEY_OR_DEFAULT",
321 |       client_region: Optional[str] = None,
322 |       client_time_zone: Optional[str] = None,
323 |       proxy: Optional[str] = None,
324 |       timeout: int = 60,
325 |       font_path: Optional[str] = None,
326 |       font_size: Optional[int] = None,
327 |       max_concurrent: int = 5
328 |   )
329 |   ```
330 |   
331 |   #### **`process_image` Method**
332 |   
333 |   ```python
334 |   result: dict = await api.process_image(
335 |       image_path: Any,
336 |       ocr_language: Optional[str] = None,
337 |       target_translation_language: Optional[str] = None,
338 |       source_translation_language: Optional[str] = None,
339 |       output_overlay_path: Optional[str] = None,
340 |       ocr_preserve_line_breaks: bool = True,
341 |       output_format: Literal['full_text', 'blocks', 'lines', 'detailed'] = 'full_text'
342 |   )
343 |   ```
344 |   -   **`output_format`**: Controls the structure of the OCR output. `'full_text'` (default) returns a single string in `ocr_text`. `'blocks'` returns a list in `text_blocks`. `'lines'` returns a list in `line_blocks`. `'detailed'` returns a fully nested structure in `detailed_blocks`.
345 |   -   **`ocr_preserve_line_breaks`**: If `False` and `output_format` is `'full_text'`, joins all OCR text into a single line.
346 | 
347 |   **The returned `result` dictionary contains:**
348 |   - `ocr_text` (Optional[str]): The full recognized text (if `output_format='full_text'`).
349 |   - `text_blocks` (Optional[List[dict]]): A list of segmented text blocks (if `output_format='blocks'`). Each block is a dict with `text`, `lines`, and `geometry`.
350 |   - `line_blocks` (Optional[List[dict]]): A list of individual text lines (if `output_format='lines'`). Each block is a dict with `text` and `geometry`.
351 |   - `translated_text` (Optional[str]): The translated text, if requested.
352 |   - `word_data` (List[dict]): A list of dictionaries for every recognized word with its geometry.
353 |   - `detailed_blocks` (Optional[List[dict]]): A list of fully structured text blocks (if `output_format='detailed'`). Each block contains lines, which in turn contain words, with geometry at every level.
354 |   - `raw_response_objects`: The "raw" Protobuf response object for further analysis.
355 | 
356 | </details>
357 | 
358 | <details>
359 |   <summary><b>⚙️ Configuration</b></summary>
360 |   
361 |   Settings are loaded with the following priority: **CLI Arguments > `config.json` File > Library Defaults**.
362 |   
363 |   #### **`config.json`**
364 |   
365 |   A `config.json` file can be placed in your system's default config directory to set persistent options.
366 |   -   **Linux**: `~/.config/chrome-lens-py/config.json`
367 |   -   **macOS**: `~/Library/Application Support/chrome-lens-py/config.json`
368 |   -   **Windows**: `C:\Users\<user>\.config\chrome-lens-py\config.json`
369 | 
370 |   ##### **Example `config.json`**
371 |   ```json
372 |   {
373 |     "api_key": "OPTIONAL! If you don't know what this is, I don't recommend setting it here.",
374 |     "proxy": "socks5://127.0.0.1:9050",
375 |     "client_region": "DE",
376 |     "client_time_zone": "Europe/Berlin",
377 |     "timeout": 90,
378 |     "font_path": "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
379 |     "ocr_preserve_line_breaks": true
380 |   }
381 |   ```
382 | 
383 | </details>
384 | 
385 | ## Sharex Integration
386 | Check [sharex.md](docs/sharex.md) for more information on how to use this library with ShareX.
387 | 
388 | ## ❤️ Support & Acknowledgments
389 | 
390 | -   **OWOCR**: Greatly inspired by and based on [OWOCR](https://github.com/AuroraWright/owocr). Thank you to them for their research into Protobuf and OCR implementation.
391 | -   **Chrome Lens OCR**: For the original implementation and ideas that formed the basis of this library. The update with SHAREX support was originally tested and added by me to [chrome-lens-ocr](https://github.com/dimdenGD/chrome-lens-ocr), thanks for the initial implementation and ideas.
392 | -   **AI Collaboration**: A significant portion of the v3.0 code, including the architectural refactor, asynchronous implementation, and Protobuf integration, was developed in collaboration with an advanced AI assistant.
393 | -   **GOOGLE**: For the convenient and high-quality Lens technology.
394 | -   **Support the Author**: If you find this library useful, you can support the author - **[Boosty](https://boosty.to/pinus)**
395 | 
396 | ## Star History
397 | 
398 | [![Star History Chart](https://api.star-history.com/svg?repos=bropines/chrome-lens-py&type=Date)](https://www.star-history.com/#bropines/chrome-lens-py&Date)
399 | 
400 | ### Disclaimer
401 | 
402 | This project is intended for educational and experimental purposes only. Use of Google's services must comply with their Terms of Service. The author is not responsible for any misuse of this software.


--------------------------------------------------------------------------------
/README_RU.md:
--------------------------------------------------------------------------------
  1 | # Chrome Lens API для Python
  2 | 
  3 | [English](/README.md) | **Русский**
  4 | 
  5 | [![PyPI version](https://badge.fury.io/py/chrome-lens-py.svg)](https://badge.fury.io/py/chrome-lens-py)
  6 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
  7 | [![Python versions](https://img.shields.io/pypi/pyversions/chrome-lens-py.svg)](https://pypi.org/project/chrome-lens-py)
  8 | [![Downloads](https://static.pepy.tech/badge/chrome-lens-py)](https://pepy.tech/project/chrome-lens-py)
  9 | 
 10 | > [!IMPORTANT]
 11 | > **Масштабное обновление (Версия 3.1.0+)**
 12 | > Библиотека была полностью переписана с нуля. Теперь она использует современную асинхронную архитектуру (`async`/`await`) и взаимодействует напрямую с Protobuf эндпоинтом Google для значительно улучшенной надежности и производительности.
 13 | >
 14 | > **Пожалуйста, обновите ваши проекты. Все вызовы API теперь являются `async`.**
 15 | 
 16 | > [!Warning]
 17 | > Также обратите внимание, что библиотека была полностью переписана, и я мог что-то пропустить или не указать. Если вы заметили ошибку, сообщите мне в разделе "Issues"
 18 | 
 19 | Этот проект предоставляет мощную, асинхронную Python-библиотеку и утилиту командной строки для взаимодействия с Google Lens. Она позволяет выполнять продвинутое распознавание текста (OCR), получать сегментированные текстовые блоки (например, для комиксов), переводить текст и получать точные координаты слов.
 20 | 
 21 | ## ✨ Ключевые возможности
 22 | 
 23 | -   **Современный бэкенд**: Использует официальный Protobuf-эндпоинт (`v1/crupload`) Google для получения надежных и точных результатов.
 24 | -   **Асинхронность и безопасность**: Построена на `asyncio` и `httpx`. Включает встроенный семафор для предотвращения злоупотреблений API и банов IP-адресов из-за чрезмерного количества одновременных запросов.
 25 | -   **Мощный OCR и сегментация**:
 26 |     -   Извлекайте текст с изображений в виде единой строки.
 27 |     -   Получайте текст, разделенный на логические блоки (абзацы, диалоговые окна) с их собственными координатами.
 28 |     -   Получайте отдельные строки текста с их собственной точной геометрией.
 29 | -   **Встроенный перевод**: Мгновенно переводите распознанный текст на любой поддерживаемый язык.
 30 | -   **Разные источники изображений**: Обрабатывайте изображения из **файла**, по **URL**, из **байтов**, объекта **PIL Image** или массива **NumPy**.
 31 | -   **Наложение текста**: Автоматически генерируйте и сохраняйте изображения с наложенным на них переводом(работает плохо, увы нет времени сделать лучше).
 32 | -   **Функциональный CLI**: Простой, но мощный интерфейс командной строки (`lens_scan`) для быстрого использования.
 33 | -   **Поддержка прокси**: Полная поддержка HTTP, HTTPS и SOCKS прокси.
 34 | -   **Интеграция с буфером обмена**: Мгновенно копируйте результаты OCR или перевода в буфер обмена с помощью флага `--sharex`.
 35 | -   **Гибкая конфигурация**: Управляйте настройками через файл `config.json`, аргументы CLI или переменные окружения.
 36 | 
 37 | ## 🚀 Установка
 38 | 
 39 | Вы можете установить пакет с помощью `pip`:
 40 | 
 41 | ```bash
 42 | pip install chrome-lens-py
 43 | ```
 44 | 
 45 | Чтобы включить функцию копирования в буфер обмена (флаг `--sharex`), установите библиотеку с `[clipboard]` extra:
 46 | 
 47 | ```bash
 48 | pip install "chrome-lens-py[clipboard]"
 49 | ```
 50 | 
 51 | Или установите последнюю версию напрямую с GitHub:
 52 | ```bash
 53 | pip install git+https://github.com/bropines/chrome-lens-py.git
 54 | ```
 55 | ## 🚀 Использование
 56 | 
 57 | 
 58 | <details>
 59 |   <summary><b>🛠️ Использование CLI (`lens_scan`)</b></summary>
 60 | 
 61 |   Утилита командной строки предоставляет быстрый доступ к возможностям библиотеки прямо из вашего терминала.
 62 | 
 63 |   ```bash
 64 |   lens_scan <источник_изображения> [язык_ocr] [опции]
 65 |   ```
 66 | 
 67 |   -   **`<источник_изображения>`**: Путь к локальному файлу или URL-адрес изображения.
 68 |   -   **`[язык_ocr]`** (опционально): Код языка в формате BCP 47 для OCR (например, 'en', 'ja'). Если не указан, API попытается определить язык автоматически.
 69 | 
 70 |   #### **Опции**
 71 | 
 72 | | Флаг | Алиас | Описание |
 73 | | :--- | :--- | :--- |
 74 | | `--translate <язык>` | `-t` | **Перевести** распознанный текст на целевой язык (например, `en`, `ru`). |
 75 | | `--translate-from <язык>` | | Указать исходный язык для перевода (иначе определяется автоматически). |
 76 | | `--translate-out <путь>` | `-to` | **Сохранить** изображение с наложенным переводом по указанному пути. |
 77 | | `--output-blocks` | `-b` | **Вывести текст OCR в виде сегментированных блоков** (полезно для комиксов). Несовместимо с `--get-coords` и `--output-lines`.|
 78 | | `--output-lines` | `-ol` | **Вывести текст OCR в виде отдельных строк** с их геометрией. Несовместимо с `--output-blocks` и `--get-coords`.|
 79 | | `--get-coords` | | Вывести распознанные слова и их координаты в формате JSON. Несовместимо с `--output-blocks` и `--output-lines`.|
 80 | | `--sharex` | `-sx` | **Скопировать** результат в буфер обмена (перевод или OCR). |
 81 | | `--ocr-single-line` | | Объединить весь распознанный текст в одну строку, удалив переносы. |
 82 | | `--config-file <путь>`| | Путь к кастомному файлу конфигурации в формате JSON. |
 83 | | `--update-config` | | Обновить файл конфигурации по умолчанию настройками из текущей команды. |
 84 | | `--font <путь>` | | Путь к файлу шрифта `.ttf` для наложения текста. |
 85 | | `--font-size <размер>` | | Размер шрифта для наложения (по умолчанию: 20). |
 86 | | `--proxy <url>` | | URL прокси-сервера (например, `socks5://127.0.0.1:9050`). |
 87 | | `--logging-level <ур>`| `-l` | Установить уровень логирования (`DEBUG`, `INFO`, `WARNING`, `ERROR`). |
 88 | | `--help` | `-h` | Показать это справочное сообщение. |
 89 | 
 90 |   #### **Примеры**
 91 | 
 92 |   **1. Базовое распознавание (OCR) и перевод**
 93 |   
 94 |   Автоматически определяет язык на изображении и переводит его на английский. Это самый распространенный сценарий использования.
 95 |   ```bash
 96 |   lens_scan "путь/к/вашему/изображению.png" -t en
 97 |   ```
 98 | 
 99 |   ---
100 |   
101 |   **2. Получение сегментированных текстовых блоков (для комиксов/манги)**
102 | 
103 |   Идеально подходит для изображений с несколькими отдельными текстовыми блоками. Эта команда выводит каждый распознанный блок текста по отдельности, что отлично подходит для перевода комиксов или сложных документов.
104 |   ```bash
105 |   lens_scan "путь/к/манге.jpg" ja -b
106 |   ```
107 |   - `-b` — это короткий псевдоним для `--output-blocks`.
108 | 
109 |   ---
110 |   
111 |   **3. Получение отдельных строк текста**
112 |   
113 |   Выводит каждую распознанную строку текста вместе с ее геометрией.
114 |   ```bash
115 |   lens_scan "путь/к/документу.png" --output-lines
116 |   ```
117 |   - `-ol` — это короткий псевдоним для `--output-lines`.
118 |   
119 |   ---
120 | 
121 |   **4. Получение координат всех отдельных слов**
122 |   
123 |   Выводит подробный массив JSON, содержащий каждое распознанное слово и его точные геометрические данные (центр, размер, угол). Полезно для программного анализа или создания собственных наложений.
124 |   ```bash
125 |   lens_scan "путь/к/схеме.png" --get-coords
126 |   ```
127 |   
128 |   ---
129 | 
130 |   **5. Перевести, сохранить с наложением и скопировать в буфер обмена**
131 |   
132 |   Пример для продвинутых пользователей. Эта команда выполнит несколько действий:
133 |   1. Распознает текст на японском изображении.
134 |   2. Переведет его на русский.
135 |   3. Сохранит новое изображение `перевод_манги.png` с наложенным на него русским текстом.
136 |   4. Скопирует итоговый перевод в буфер обмена.
137 |   ```bash
138 |   lens_scan "путь/к/манге.jpg" ja -t ru -to "перевод_манги.png" -sx
139 |   ```
140 | 
141 |   ---
142 | 
143 |   **6. Обработать изображение по URL и получить текст в одну строку**
144 | 
145 |   Загружает изображение напрямую по URL-адресу и объединяет весь распознанный текст в одну непрерывную строку, удаляя все переносы.
146 |   ```bash
147 |   lens_scan "https://i.imgur.com/VPd1y6b.png" en --ocr-single-line
148 |   ```
149 | 
150 |   ---
151 | 
152 |   **7. Использовать SOCKS5 прокси**
153 |   
154 |   Все запросы к API Google будут направляться через указанный прокси-сервер, что полезно для обеспечения конфиденциальности или обхода региональных ограничений.
155 |   ```bash
156 |   lens_scan "image.png" --proxy "socks5://127.0.0.1:9050"
157 |   ```
158 | 
159 | 
160 | </details>
161 | 
162 | <details>
163 |   <summary><b>👨‍💻 Программное использование (API)</b></summary>
164 |   
165 |   > [!IMPORTANT]
166 |   > `LensAPI` полностью **асинхронный**. Все методы для получения данных должны вызываться с помощью `await` из `async` функции.
167 | 
168 |   #### **Базовый пример (Полный текст)**
169 |   
170 |   ```python
171 |   import asyncio
172 |   from chrome_lens_py import LensAPI
173 | 
174 |   async def main():
175 |       # Инициализируем API. Здесь можно передать прокси, регион и т.д.
176 |       # По умолчанию API ключ не требуется.
177 |       api = LensAPI()
178 | 
179 |       image_source = "путь/к/вашему/изображению.png" # Или URL, PIL Image, NumPy array
180 | 
181 |       try:
182 |           # Обрабатываем изображение и получаем текст единой строкой
183 |           result = await api.process_image(
184 |               image_path=image_source,
185 |               ocr_language="ja",
186 |               target_translation_language="en"
187 |           )
188 | 
189 |           print("--- Распознанный текст (OCR) ---")
190 |           print(result.get("ocr_text"))
191 | 
192 |           print("\n--- Переведенный текст ---")
193 |           print(result.get("translated_text"))
194 |           
195 |       except Exception as e:
196 |           print(f"Произошла ошибка: {e}")
197 | 
198 |   if __name__ == "__main__":
199 |       asyncio.run(main())
200 |   ```
201 | 
202 |   #### **Работа с разными источниками изображений**
203 | 
204 |   Метод `process_image` легко обрабатывает различные типы входных данных.
205 | 
206 |   ```python
207 |   from PIL import Image
208 |   import numpy as np
209 | 
210 |   # ... внутри async функции ...
211 |   
212 |   # Из URL
213 |   result_url = await api.process_image("https://i.imgur.com/VPd1y6b.png")
214 | 
215 |   # Из объекта PIL Image
216 |   with Image.open("путь/к/изображению.png") as img:
217 |       result_pil = await api.process_image(img)
218 | 
219 |   # Из массива NumPy (например, загруженного через OpenCV)
220 |   with Image.open("путь/к/изображению.png") as img:
221 |       numpy_array = np.array(img)
222 |       result_numpy = await api.process_image(numpy_array)
223 |   ```
224 |   
225 |   #### **Получение сегментированных текстовых блоков**
226 | 
227 |   Чтобы получить текст, разделенный на логические блоки (например, диалоговые окна в комиксе), используйте параметр `output_format='blocks'`.
228 | 
229 |   ```python
230 |   import asyncio
231 |   from chrome_lens_py import LensAPI
232 | 
233 |   async def process_comics():
234 |       api = LensAPI()
235 |       image_source = "путь/к/манге.jpg"
236 |       
237 |       result = await api.process_image(
238 |           image_path=image_source,
239 |           output_format='blocks' # Получить сегментированные блоки вместо одной строки
240 |       )
241 | 
242 |       # Результат теперь содержит ключ 'text_blocks'
243 |       text_blocks = result.get("text_blocks", [])
244 |       print(f"Найдено {len(text_blocks)} текстовых блоков.")
245 | 
246 |       for i, block in enumerate(text_blocks):
247 |           print(f"\n--- Блок #{i+1} ---")
248 |           print(block['text'])
249 |           # block также содержит ключи 'lines' и 'geometry'
250 |   
251 |   asyncio.run(process_comics())
252 |   ```
253 | 
254 |   #### **Получение отдельных строк и их геометрии**
255 | 
256 |   Чтобы получить каждую распознанную строку текста как отдельный элемент, используйте параметр `output_format='lines'`.
257 | 
258 |   ```python
259 |   import asyncio
260 |   from chrome_lens_py import LensAPI
261 | 
262 |   async def process_document_lines():
263 |       api = LensAPI()
264 |       image_source = "путь/к/документу.png"
265 |       
266 |       result = await api.process_image(
267 |           image_path=image_source,
268 |           output_format='lines' # Получить отдельные строки с их геометрией
269 |       )
270 | 
271 |       # Результат теперь содержит ключ 'line_blocks'
272 |       line_blocks = result.get("line_blocks", [])
273 |       print(f"Найдено {len(line_blocks)} строк.")
274 | 
275 |       for i, line in enumerate(line_blocks):
276 |           print(f"\n--- Строка #{i+1} ---")
277 |           print(f"Текст: {line['text']}")
278 |           print(f"Геометрия: {line['geometry']}")
279 |   
280 |   asyncio.run(process_document_lines())
281 |   ```
282 | #### **Получение полностью детализированных структур текста**
283 | 
284 | Чтобы получить полную, вложенную структуру из абзацев, строк и слов с геометрией на каждом уровне, используйте `output_format='detailed'`.
285 | 
286 | ```python
287 | import asyncio
288 | from chrome_lens_py import LensAPI
289 | 
290 | async def process_with_details():
291 |     api = LensAPI()
292 |     image_source = "путь/к/документу.png"
293 |     
294 |     result = await api.process_image(
295 |         image_path=image_source,
296 |         output_format='detailed' # Получить полностью вложенную структуру
297 |     )
298 | 
299 |     # Результат теперь содержит ключ 'detailed_blocks'
300 |     detailed_blocks = result.get("detailed_blocks", [])
301 |     print(f"Найдено {len(detailed_blocks)} детализированных блоков.")
302 | 
303 |     for i, block in enumerate(detailed_blocks):
304 |         print(f"\n--- Блок #{i+1} ---")
305 |         print(f"  Геометрия: {block['geometry']}")
306 |         for j, line in enumerate(block['lines']):
307 |             print(f"    --- Строка #{j+1}: '{line['text']}' ---")
308 |             for k, word in enumerate(line['words']):
309 |                  print(f"      - Слово: '{word['text']}', Геометрия: {word['geometry']}")
310 | 
311 | asyncio.run(process_with_details())
312 | ```
313 | 
314 | 
315 |   #### **Конструктор `LensAPI`**
316 | 
317 |   ```python
318 |   api = LensAPI(
319 |       api_key: str = "ВАШ_API_КЛЮЧ_ИЛИ_КЛЮЧ_ПО_УМОЛЧАНИЮ",
320 |       client_region: Optional[str] = None,
321 |       client_time_zone: Optional[str] = None,
322 |       proxy: Optional[str] = None,
323 |       timeout: int = 60,
324 |       font_path: Optional[str] = None,
325 |       font_size: Optional[int] = None,
326 |       max_concurrent: int = 5
327 |   )
328 |   ```
329 | 
330 |   #### **Метод `process_image`**
331 |   
332 |   ```python
333 |   result: dict = await api.process_image(
334 |       image_path: Any,
335 |       ocr_language: Optional[str] = None,
336 |       target_translation_language: Optional[str] = None,
337 |       source_translation_language: Optional[str] = None,
338 |       output_overlay_path: Optional[str] = None,
339 |       ocr_preserve_line_breaks: bool = True,
340 |       output_format: Literal['full_text', 'blocks', 'lines', 'detailed'] = 'full_text''
341 |   )
342 |   ```
343 |   -   **`output_format`**: Управляет структурой OCR-вывода. `'full_text'` (по умолчанию) возвращает одну строку в `ocr_text`. `'blocks'` возвращает список в `text_blocks`. `'lines'` возвращает список в `line_blocks`. `'detailed'` возвращает полностью вложенную структуру в `detailed_blocks`.`
344 |   -   **`ocr_preserve_line_breaks`**: Если `False` и `output_format` равен `'full_text'`, объединяет весь текст OCR в одну строку.
345 | 
346 |   **Возвращаемый словарь `result` содержит:**
347 |   - `ocr_text` (Optional[str]): Полный распознанный текст (если `output_format='full_text'`).
348 |   - `text_blocks` (Optional[List[dict]]): Список сегментированных текстовых блоков (если `output_format='blocks'`). Каждый блок — это словарь с ключами `text`, `lines` и `geometry`.
349 |   - `line_blocks` (Optional[List[dict]]): Список отдельных текстовых строк (если `output_format='lines'`). Каждый блок — это словарь с ключами `text` и `geometry`.
350 |   - `translated_text` (Optional[str]): Переведенный текст, если был запрошен.
351 |   - `word_data` (List[dict]): Список словарей для каждого распознанного слова с его геометрией.
352 |   - `detailed_blocks` (Optional[List[dict]]): Список полностью структурированных текстовых блоков (если `output_format='detailed'`). Каждый блок содержит строки, которые, в свою очередь, содержат слова, с геометрией на каждом уровне.
353 |   - `raw_response_objects`: "Сырой" Protobuf-объект ответа для дальнейшего анализа.
354 | 
355 | </details>
356 | 
357 | <details>
358 |   <summary><b>⚙️ Конфигурация</b></summary>
359 |   
360 |   Настройки загружаются со следующим приоритетом: **Аргументы CLI > Файл `config.json` > Значения по умолчанию**.
361 |   
362 |   #### **`config.json`**
363 |   
364 |   Файл `config.json` можно разместить в директории конфигурации по умолчанию вашей ОС для установки постоянных опций.
365 |   -   **Linux**: `~/.config/chrome-lens-py/config.json`
366 |   -   **macOS**: `~/Library/Application Support/chrome-lens-py/config.json`
367 |   -   **Windows**: `C:\Users\<user>\.config\chrome-lens-py\config.json`
368 | 
369 |   ##### **Пример `config.json`**
370 |   ```json
371 |   {
372 |     "api_key": "ОПЦИОНАЛЬНО! Если вы не знаете что это, то не советую его здесь указывать",
373 |     "proxy": "socks5://127.0.0.1:9050",
374 |     "client_region": "DE",
375 |     "client_time_zone": "Europe/Berlin",
376 |     "timeout": 90,
377 |     "font_path": "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
378 |     "ocr_preserve_line_breaks": true
379 |   }
380 |   ```
381 | 
382 | </details>
383 | 
384 | ## Интеграция Sharex
385 | Посмотрите [sharex.md](docs/sharex.md) для получения дополнительной информации о том, как использовать эту библиотеку с ShareX.
386 | 
387 | ## ❤️ Поддержка и благодарности
388 | 
389 | -   **OWOCR**: В большей степени вдохновлен и основан на [OWOCR](https://github.com/AuroraWright/owocr). Благодарю ребят, за их ресерч protobuf и реализацию OCR.
390 | -   **Chrome Lens OCR**: За изначальную реализацию и идеи, которые легли в основу этой библиотеки. Обновление с поддержкой SHAREX изначально было протестировано и добавлено мной в [chrome-lens-ocr](https://github.com/dimdenGD/chrome-lens-ocr), спасибо за изначальную реализацию и идеи.
391 | -   **Совместная работа с ИИ**: Значительная часть кода версии 3.0, включая рефакторинг архитектуры, асинхронную реализацию и интеграцию с Protobuf, была разработана в сотрудничестве с продвинутым ИИ-ассистентом.
392 | -   **GOOGLE**: За удобную и качественную технологию Lens.
393 | -   **Поддержать автора**: Если эта библиотека оказалась вам полезной, вы можете поддержать автора - **[Boosty](https://boosty.to/pinus)**
394 | 
395 | ## Star History
396 | 
397 | [![Star History Chart](https://api.star-history.com/svg?repos=bropines/chrome-lens-py&type=Date)](https://www.star-history.com/#bropines/chrome-lens-py&Date)
398 | 
399 | ### Отказ от ответственности
400 | 
401 | Этот проект предназначен исключительно для образовательных и экспериментальных целей. Использование сервисов Google должно соответствовать их Условиям предоставления услуг. Автор проекта не несет ответственности за любое неправомерное использование этого программного обеспечения.


--------------------------------------------------------------------------------
/src/chrome_lens_py/api.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | from math import pi
  4 | from pathlib import Path
  5 | from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
  6 | 
  7 | import httpx
  8 | from PIL import ImageFont
  9 | 
 10 | from .constants import (
 11 |     DEFAULT_API_KEY,
 12 |     DEFAULT_CLIENT_REGION,
 13 |     DEFAULT_CLIENT_TIME_ZONE,
 14 |     DEFAULT_OCR_LANG,
 15 | )
 16 | from .core.image_processor import (
 17 |     draw_overlay_on_image,
 18 |     get_word_geometry_data,
 19 |     prepare_image_for_api,
 20 | )
 21 | from .core.protobuf_builder import create_ocr_translate_request
 22 | from .core.request_handler import LensRequestHandler
 23 | from .exceptions import LensException
 24 | 
 25 | if TYPE_CHECKING:
 26 |     from .utils.lens_betterproto import (
 27 |         LensOverlayServerResponse,
 28 |         TextLayoutLine,
 29 |         TextLayoutParagraph,
 30 |         TextLayoutWord,
 31 |         TranslationDataStatusCode,
 32 |     )
 33 | else:
 34 |     from .utils.lens_betterproto import (
 35 |         LensOverlayServerResponse,
 36 |         TextLayoutLine,
 37 |         TextLayoutParagraph,
 38 |         TextLayoutWord,
 39 |         TranslationDataStatusCode,
 40 |     )
 41 | 
 42 | from .utils.font_manager import FontType, get_font
 43 | 
 44 | logger = logging.getLogger(__name__)
 45 | 
 46 | 
 47 | class LensAPI:
 48 |     """
 49 |     Main class for interacting with the Google Lens API.
 50 |     Provides methods for OCR, translation, and text block segmentation.
 51 |     """
 52 | 
 53 |     def __init__(
 54 |         self,
 55 |         api_key: str = DEFAULT_API_KEY,
 56 |         client_region: Optional[str] = None,
 57 |         client_time_zone: Optional[str] = None,
 58 |         proxy: Optional[Union[str, Dict[str, httpx.AsyncBaseTransport]]] = None,
 59 |         timeout: int = 60,
 60 |         font_path: Optional[str] = None,
 61 |         font_size: Optional[int] = None,
 62 |         max_concurrent: int = 10,
 63 |     ):
 64 |         """
 65 |         Initializes the LensAPI client.
 66 | 
 67 |         :param api_key: Your Google API key. Defaults to the library's built-in key.
 68 |         :param client_region: ISO 3166-1 alpha-2 country code (e.g., 'US', 'DE').
 69 |         :param client_time_zone: Time zone name (e.g., 'America/New_York').
 70 |         :param proxy: Proxy server URL or a dictionary for mounting transports.
 71 |         :param timeout: Request timeout in seconds.
 72 |         :param font_path: Path to a custom .ttf font file for text overlays.
 73 |         :param font_size: Font size for text overlays.
 74 |         :param max_concurrent: The maximum number of concurrent requests to prevent API abuse. Defaults to 5.
 75 |         """
 76 |         self.request_handler = LensRequestHandler(
 77 |             api_key=api_key, proxy=proxy, timeout=timeout
 78 |         )
 79 |         self.client_region = client_region
 80 |         self.client_time_zone = client_time_zone
 81 |         self.font_path = font_path
 82 |         self.font_size = font_size
 83 |         self._font_object: Optional[FontType] = None
 84 |         self._semaphore = asyncio.Semaphore(max_concurrent)
 85 |         if max_concurrent > 20:
 86 |             logger.warning(
 87 |                 f"max_concurrent is set to {max_concurrent}, which is very high. "
 88 |                 "This may lead to IP bans. Use with caution."
 89 |             )
 90 | 
 91 |     def _get_font(self) -> FontType:
 92 |         """Lazily loads and returns the font object."""
 93 |         if not self._font_object:
 94 |             self._font_object = get_font(
 95 |                 font_path_override=self.font_path, font_size_override=self.font_size
 96 |             )
 97 |         return self._font_object
 98 | 
 99 |     def _parse_line(self, line: "TextLayoutLine") -> Dict[str, Any]:
100 |         """Parses a single TextLayoutLine into a structured dictionary."""
101 |         line_text = "".join(
102 |             word.plain_text + (word.text_separator or "") for word in line.words
103 |         ).strip()
104 | 
105 |         l_geom = line.geometry.bounding_box
106 |         geometry_dict = {
107 |             "center_x": l_geom.center_x,
108 |             "center_y": l_geom.center_y,
109 |             "width": l_geom.width,
110 |             "height": l_geom.height,
111 |             "angle_deg": l_geom.rotation_z * (180 / pi) if l_geom.rotation_z else 0.0,
112 |         }
113 | 
114 |         return {
115 |             "text": line_text,
116 |             "geometry": geometry_dict,
117 |         }
118 | 
119 |     def _parse_paragraph(self, paragraph: "TextLayoutParagraph") -> Dict[str, Any]:
120 |         """Parses a single TextLayoutParagraph into a structured dictionary."""
121 |         paragraph_lines = []
122 |         for line in paragraph.lines:
123 |             # Fixed Pylance issue: use 'or ""' to handle optional separator
124 |             current_line_text = "".join(
125 |                 word.plain_text + (word.text_separator or "") for word in line.words
126 |             )
127 |             paragraph_lines.append(current_line_text.strip())
128 | 
129 |         full_paragraph_text = "\n".join(paragraph_lines)
130 | 
131 |         p_geom = paragraph.geometry.bounding_box
132 |         geometry_dict = {
133 |             "center_x": p_geom.center_x,
134 |             "center_y": p_geom.center_y,
135 |             "width": p_geom.width,
136 |             "height": p_geom.height,
137 |             "angle_deg": p_geom.rotation_z * (180 / pi) if p_geom.rotation_z else 0.0,
138 |         }
139 | 
140 |         return {
141 |             "text": full_paragraph_text,
142 |             "lines": paragraph_lines,
143 |             "geometry": geometry_dict,
144 |         }
145 | 
146 |     def _extract_ocr_data_from_response(
147 |         self,
148 |         response_proto: "LensOverlayServerResponse",
149 |         preserve_line_breaks: bool = True,
150 |         output_format: Literal[
151 |             "full_text", "blocks", "lines", "detailed"
152 |         ] = "full_text",
153 |     ) -> Tuple[Union[str, List[Dict]], List[Dict[str, Any]]]:
154 |         """
155 |         Extracts OCR data from the response.
156 |         """
157 |         word_data_list: List[Dict[str, Any]] = []
158 |         if not (
159 |             response_proto.objects_response
160 |             and response_proto.objects_response.text
161 |             and response_proto.objects_response.text.text_layout
162 |         ):
163 |             return ("", []) if output_format == "full_text" else ([], [])
164 | 
165 |         text_layout = response_proto.objects_response.text.text_layout
166 | 
167 |         for paragraph in text_layout.paragraphs:
168 |             for line in paragraph.lines:
169 |                 for word in line.words:
170 |                     word_data_list.append(
171 |                         {
172 |                             "word": word.plain_text,
173 |                             "separator": word.text_separator,
174 |                             "geometry": (
175 |                                 get_word_geometry_data(word.geometry.bounding_box)
176 |                                 if word.geometry and word.geometry.bounding_box
177 |                                 else None
178 |                             ),
179 |                         }
180 |                     )
181 | 
182 |         detected_lang = getattr(
183 |             response_proto.objects_response.text, "content_language", "N/A"
184 |         )
185 |         logger.info(
186 |             f"Extracted data for {len(word_data_list)} words. Detected language: {detected_lang}"
187 |         )
188 | 
189 |         if output_format == "detailed":
190 |             detailed_blocks = [
191 |                 self._parse_paragraph_detailed(p) for p in text_layout.paragraphs
192 |             ]
193 |             return detailed_blocks, word_data_list
194 | 
195 |         if output_format == "lines":
196 |             line_blocks = []
197 |             for p in text_layout.paragraphs:
198 |                 for line in p.lines:
199 |                     line_blocks.append(self._parse_line(line))
200 |             return line_blocks, word_data_list
201 | 
202 |         if output_format == "blocks":
203 |             text_blocks = [self._parse_paragraph(p) for p in text_layout.paragraphs]
204 |             return text_blocks, word_data_list
205 |         else:  # 'full_text'
206 |             if preserve_line_breaks:
207 |                 full_ocr_text = "\n".join(
208 |                     "\n".join(self._parse_paragraph(p)["lines"])
209 |                     for p in text_layout.paragraphs
210 |                 )
211 |             else:
212 |                 text_parts = [
213 |                     data["word"] + (data["separator"] or "") for data in word_data_list
214 |                 ]
215 |                 full_ocr_text = "".join(text_parts).strip()
216 |                 full_ocr_text = " ".join(full_ocr_text.split())
217 | 
218 |             return full_ocr_text, word_data_list
219 | 
220 |     def _extract_translation_from_response(
221 |         self, response_proto: "LensOverlayServerResponse"
222 |     ) -> Optional[str]:
223 |         """Extracts and consolidates all successful translations."""
224 |         all_translations = []
225 |         if (
226 |             response_proto.objects_response
227 |             and response_proto.objects_response.deep_gleams
228 |         ):
229 |             for gleam in response_proto.objects_response.deep_gleams:
230 |                 if (
231 |                     gleam.translation
232 |                     and gleam.translation.status.code
233 |                     == TranslationDataStatusCode.SUCCESS
234 |                 ):
235 |                     if gleam.translation.translation:
236 |                         all_translations.append(gleam.translation.translation)
237 |         return "\n".join(all_translations).strip() or None
238 | 
239 |     def _parse_word_detailed(self, word: "TextLayoutWord") -> Dict[str, Any]:
240 |         """Parses a single TextLayoutWord into a detailed dictionary including geometry."""
241 |         geometry_data = (
242 |             get_word_geometry_data(word.geometry.bounding_box)
243 |             if word.geometry and word.geometry.bounding_box
244 |             else None
245 |         )
246 |         return {
247 |             "text": word.plain_text,
248 |             "separator": word.text_separator,
249 |             "geometry": geometry_data,
250 |         }
251 | 
252 |     def _parse_line_detailed(self, line: "TextLayoutLine") -> Dict[str, Any]:
253 |         """Parses a TextLayoutLine into a detailed dictionary with words and geometry."""
254 |         line_text = "".join(
255 |             word.plain_text + (word.text_separator or "") for word in line.words
256 |         ).strip()
257 | 
258 |         l_geom = line.geometry.bounding_box
259 |         geometry_dict = {
260 |             "center_x": l_geom.center_x,
261 |             "center_y": l_geom.center_y,
262 |             "width": l_geom.width,
263 |             "height": l_geom.height,
264 |             "angle_deg": l_geom.rotation_z * (180 / pi) if l_geom.rotation_z else 0.0,
265 |         }
266 | 
267 |         return {
268 |             "text": line_text,
269 |             "geometry": geometry_dict,
270 |             "words": [self._parse_word_detailed(word) for word in line.words],
271 |         }
272 | 
273 |     def _parse_paragraph_detailed(
274 |         self, paragraph: "TextLayoutParagraph"
275 |     ) -> Dict[str, Any]:
276 |         """Parses a TextLayoutParagraph into a detailed dictionary with lines and geometry."""
277 |         full_paragraph_text = "\n".join(
278 |             "".join(
279 |                 word.plain_text + (word.text_separator or "") for word in line.words
280 |             ).strip()
281 |             for line in paragraph.lines
282 |         )
283 | 
284 |         p_geom = paragraph.geometry.bounding_box
285 |         geometry_dict = {
286 |             "center_x": p_geom.center_x,
287 |             "center_y": p_geom.center_y,
288 |             "width": p_geom.width,
289 |             "height": p_geom.height,
290 |             "angle_deg": p_geom.rotation_z * (180 / pi) if p_geom.rotation_z else 0.0,
291 |         }
292 | 
293 |         return {
294 |             "text": full_paragraph_text,
295 |             "geometry": geometry_dict,
296 |             "lines": [self._parse_line_detailed(line) for line in paragraph.lines],
297 |         }
298 | 
299 |     async def process_image(
300 |         self,
301 |         image_path: Any,
302 |         ocr_language: Optional[str] = None,
303 |         target_translation_language: Optional[str] = None,
304 |         source_translation_language: Optional[str] = None,
305 |         output_overlay_path: Optional[str] = None,
306 |         new_session: bool = True,
307 |         ocr_preserve_line_breaks: bool = True,
308 |         output_format: Literal[
309 |             "full_text", "blocks", "lines", "detailed"
310 |         ] = "full_text",
311 |     ) -> Dict[str, Any]:
312 |         """
313 |         Processes an image, performing OCR and optional translation.
314 | 
315 |         :param image_path: Path to a file (str or pathlib.Path), URL, bytes, PIL Image, or NumPy array.
316 |         :param ocr_language: BCP 47 language code for OCR (e.g., 'en', 'ja').
317 |         :param target_translation_language: BCP 47 language code for translation target.
318 |         :param source_translation_language: BCP 47 language code for translation source.
319 |         :param output_overlay_path: Path to save the image with translated text overlaid.
320 |         :param new_session: If True, starts a new server session for the request.
321 |         :param ocr_preserve_line_breaks: If True and output_format is 'full_text', preserves line breaks.
322 |         :param output_format: 'full_text' (default) returns a single string in 'ocr_text'.
323 |                             'blocks' returns a list of dictionaries in 'text_blocks'.
324 |                             'lines' returns a list of dictionaries in 'line_blocks',
325 |                             each representing a single recognized line with its geometry.
326 |         :return: A dictionary containing the processing results.
327 |         """
328 |         # Acquire the semaphore before starting any processing
329 |         async with self._semaphore:
330 |             if isinstance(image_path, Path):
331 |                 image_path = str(image_path)
332 | 
333 |             if isinstance(image_path, str):
334 |                 logger.info(f"Processing image source: {image_path[:120]}...")
335 |             else:
336 |                 logger.info(
337 |                     f"Processing image source of type: {type(image_path).__name__}"
338 |                 )
339 | 
340 |             try:
341 |                 img_bytes, width, height, original_pil_img = (
342 |                     await prepare_image_for_api(image_path)
343 |                 )
344 | 
345 |                 if new_session:
346 |                     self.request_handler.start_new_session()
347 | 
348 |                 session_uuid_for_request, seq_id, img_seq_id = (
349 |                     self.request_handler.get_next_sequence_ids_for_request(
350 |                         is_new_image_payload=new_session
351 |                     )
352 |                 )
353 | 
354 |                 proto_payload, uuid_for_this_request = create_ocr_translate_request(
355 |                     image_bytes=img_bytes,
356 |                     width=width,
357 |                     height=height,
358 |                     ocr_language=ocr_language or DEFAULT_OCR_LANG,
359 |                     target_translation_language=target_translation_language,
360 |                     source_translation_language=source_translation_language,
361 |                     client_region=self.client_region or DEFAULT_CLIENT_REGION,
362 |                     client_time_zone=self.client_time_zone or DEFAULT_CLIENT_TIME_ZONE,
363 |                     session_uuid=session_uuid_for_request,
364 |                     sequence_id=seq_id,
365 |                     image_sequence_id=img_seq_id,
366 |                     routing_info=(
367 |                         self.request_handler.last_cluster_info.routing_info
368 |                         if self.request_handler.last_cluster_info
369 |                         else None
370 |                     ),
371 |                 )
372 | 
373 |                 response_proto = await self.request_handler.send_request(
374 |                     proto_payload, request_uuid_used=uuid_for_this_request
375 |                 )
376 | 
377 |                 ocr_result, word_data = self._extract_ocr_data_from_response(
378 |                     response_proto, ocr_preserve_line_breaks, output_format
379 |                 )
380 | 
381 |                 translated_text = (
382 |                     self._extract_translation_from_response(response_proto)
383 |                     if target_translation_language
384 |                     else None
385 |                 )
386 | 
387 |                 if output_overlay_path and translated_text:
388 |                     word_boxes_norm = []
389 |                     for data in word_data:
390 |                         geom = data.get("geometry")
391 |                         if geom:
392 |                             x1 = geom["center_x"] - geom["width"] / 2
393 |                             y1 = geom["center_y"] - geom["height"] / 2
394 |                             x2 = geom["center_x"] + geom["width"] / 2
395 |                             y2 = geom["center_y"] + geom["height"] / 2
396 |                             word_boxes_norm.append((x1, y1, x2, y2))
397 | 
398 |                     overlay_image = draw_overlay_on_image(
399 |                         original_pil_img,
400 |                         word_boxes_norm,
401 |                         translated_text,
402 |                         self._get_font(),
403 |                     )
404 |                     try:
405 |                         overlay_image.save(output_overlay_path)
406 |                         logger.info(
407 |                             f"Image with overlay saved to: {output_overlay_path}"
408 |                         )
409 |                     except Exception as e_save:
410 |                         logger.error(
411 |                             f"Error saving overlay image to '{output_overlay_path}': {e_save}"
412 |                         )
413 |                 elif output_overlay_path:
414 |                     logger.warning(
415 |                         f"Overlay output path '{output_overlay_path}' specified, but no translated text available."
416 |                     )
417 | 
418 |                 final_result = {
419 |                     "translated_text": translated_text,
420 |                     "word_data": word_data,
421 |                     "raw_response_objects": response_proto.objects_response,
422 |                 }
423 | 
424 |                 if output_format == "detailed":
425 |                     final_result["detailed_blocks"] = ocr_result
426 |                 elif output_format == "blocks":
427 |                     final_result["text_blocks"] = ocr_result
428 |                 elif output_format == "lines":
429 |                     final_result["line_blocks"] = ocr_result
430 |                 else:
431 |                     final_result["ocr_text"] = ocr_result
432 | 
433 |                 return final_result
434 | 
435 |             except LensException as e:
436 |                 logger.error(f"LensAPI processing error: {e}", exc_info=True)
437 |                 raise
438 |             except Exception as e:
439 |                 logger.error(f"Unexpected error in LensAPI: {e}", exc_info=True)
440 |                 raise LensException(f"Unexpected error in LensAPI: {e}") from e
441 | 


--------------------------------------------------------------------------------
/experiments/reverse.py:
--------------------------------------------------------------------------------
  1 | # This is the first prototype of the Google Lens API reverse.
  2 | # It's simpler, and without a lot of garbage, which is suitable for your projects if you want to rewrite it into another language.
  3 | import asyncio
  4 | import io
  5 | import json
  6 | import logging
  7 | import os
  8 | import sys
  9 | import time
 10 | from urllib.parse import parse_qs, urlparse
 11 | 
 12 | import httpx
 13 | 
 14 | # --- JSON Parsing Setup ---
 15 | try:
 16 |     import json5
 17 | 
 18 |     json_loader = json5.loads
 19 |     logging.info("Using json5 for parsing.")
 20 | except ImportError:
 21 |     json_loader = json.loads
 22 |     logging.info("json5 not found, using standard json module.")
 23 | 
 24 | # --- Logging Setup ---
 25 | logging.basicConfig(
 26 |     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 27 | )
 28 | 
 29 | # --- Constants ---
 30 | LENS_UPLOAD_ENDPOINT = "https://lens.google.com/v3/upload"
 31 | LENS_METADATA_ENDPOINT = "https://lens.google.com/qfmetadata"
 32 | HEADERS = {
 33 |     "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
 34 |     "Accept-Language": "ru",
 35 |     "Cache-Control": "max-age=0",
 36 |     "Sec-Ch-Ua": '"Not-A.Brand";v="8", "Chromium";v="135", "Google Chrome";v="135"',
 37 |     "Sec-Ch-Ua-Mobile": "?0",
 38 |     "Sec-Ch-Ua-Platform": '"Windows"',
 39 |     "Upgrade-Insecure-Requests": "1",
 40 |     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
 41 |     "Origin": "https://www.google.com",
 42 |     "Referer": "https://www.google.com/",
 43 |     "Sec-Fetch-Site": "same-site",
 44 |     "Sec-Fetch-Mode": "navigate",
 45 |     "Sec-Fetch-Dest": "document",
 46 |     "Sec-Fetch-User": "?1",
 47 |     "Priority": "u=0, i",
 48 | }
 49 | COOKIE_FILE = "cookies_lens_test.json"
 50 | 
 51 | # --- Helper Functions ---
 52 | 
 53 | 
 54 | async def read_image_data(image_path):
 55 |     """Reads image data from file."""
 56 |     try:
 57 |         with open(image_path, "rb") as f:
 58 |             return f.read()
 59 |     except FileNotFoundError:
 60 |         logging.error(f"Image not found: {image_path}")
 61 |         return None
 62 |     except Exception as e:
 63 |         logging.error(f"Error reading image {image_path}: {e}")
 64 |         return None
 65 | 
 66 | 
 67 | def extract_ids_from_url(url_string):
 68 |     """Extracts vsrid and lsessionid from URL."""
 69 |     try:
 70 |         parsed_url = urlparse(url_string)
 71 |         query_params = parse_qs(parsed_url.query)
 72 |         vsrid = query_params.get("vsrid", [None])[0]
 73 |         lsessionid = query_params.get("lsessionid", [None])[0]
 74 |         return vsrid, lsessionid
 75 |     except Exception as e:
 76 |         logging.error(f"Error extracting IDs from URL {url_string}: {e}")
 77 |         return None, None
 78 | 
 79 | 
 80 | async def save_cookies(cookies, cookie_file):
 81 |     """Saves cookies to JSON file."""
 82 |     try:
 83 |         cookies_dict = {}
 84 |         cookie_jar = getattr(cookies, "jar", cookies)
 85 |         if hasattr(cookie_jar, "items"):
 86 |             for name, value in cookie_jar.items():
 87 |                 if isinstance(value, str):
 88 |                     cookies_dict[name] = value
 89 |         elif hasattr(cookie_jar, "__iter__"):
 90 |             for cookie in cookie_jar:
 91 |                 if hasattr(cookie, "name") and hasattr(cookie, "value"):
 92 |                     cookies_dict[cookie.name] = cookie.value
 93 |         else:
 94 |             logging.warning(
 95 |                 f"Could not determine how to iterate cookies object: {type(cookies)}"
 96 |             )
 97 |             return
 98 | 
 99 |         with open(cookie_file, "w") as f:
100 |             json.dump(cookies_dict, f, indent=2)
101 |         logging.debug(f"Cookies saved to {cookie_file}")
102 |     except Exception as e:
103 |         logging.error(f"Error saving cookies: {e}")
104 | 
105 | 
106 | async def load_cookies(cookie_file):
107 |     """Loads cookies from JSON file."""
108 |     try:
109 |         if os.path.exists(cookie_file):
110 |             with open(cookie_file, "r") as f:
111 |                 cookies_dict = json.load(f)
112 |                 logging.debug(f"Cookies loaded from {cookie_file}")
113 |                 return cookies_dict
114 |     except (json.JSONDecodeError, FileNotFoundError) as e:
115 |         logging.warning(
116 |             f"Error loading cookies from {cookie_file}: {e}. Ignoring cookies."
117 |         )
118 |     except Exception as e:
119 |         logging.warning(
120 |             f"Unexpected error loading cookies from {cookie_file}: {e}. Ignoring cookies."
121 |         )
122 |     return {}
123 | 
124 | 
125 | def adaptive_parse_text_and_language(metadata_json):
126 |     """
127 |     Adaptively parses JSON to extract language, text blocks, and word annotations.
128 |     """
129 |     language = None
130 |     all_word_annotations = []
131 |     reconstructed_blocks = []
132 | 
133 |     try:
134 |         if not isinstance(metadata_json, list) or not metadata_json:
135 |             logging.error(
136 |                 "Invalid JSON structure: metadata_json is not a non-empty list."
137 |             )
138 |             return None, [], []
139 |         response_container = next(
140 |             (
141 |                 item
142 |                 for item in metadata_json
143 |                 if isinstance(item, list)
144 |                 and item
145 |                 and item[0] == "fetch_query_formulation_metadata_response"
146 |             ),
147 |             None,
148 |         )
149 |         if response_container is None:
150 |             logging.error(
151 |                 "Could not find 'fetch_query_formulation_metadata_response' container."
152 |             )
153 |             return None, [], []
154 | 
155 |         # --- Language Extraction ---
156 |         try:
157 |             if len(response_container) > 2 and isinstance(response_container[2], list):
158 |                 lang_section = response_container[2]
159 |                 language = next(
160 |                     (
161 |                         element
162 |                         for element in lang_section
163 |                         if isinstance(element, str) and len(element) == 2
164 |                     ),
165 |                     None,
166 |                 )
167 |                 if language:
168 |                     logging.debug(f"Found language code: '{language}'")
169 |         except (IndexError, TypeError, StopIteration):
170 |             logging.warning("Could not find language code in expected structure.")
171 | 
172 |         # --- Text/Word Extraction ---
173 |         segments_iterable = None
174 |         possible_paths_to_segments_list = [
175 |             lambda rc: rc[2][0][0][0],
176 |             lambda rc: rc[1][0][0][0],
177 |             lambda rc: rc[2][0][0],
178 |         ]
179 |         path_names = ["[2][0][0][0]", "[1][0][0][0]", "[2][0][0]"]
180 | 
181 |         for i, path_func in enumerate(possible_paths_to_segments_list):
182 |             path_name = path_names[i]
183 |             try:
184 |                 candidate_iterable = path_func(response_container)
185 |                 if (
186 |                     isinstance(candidate_iterable, list)
187 |                     and candidate_iterable
188 |                     and isinstance(candidate_iterable[0], list)
189 |                 ):
190 |                     try:
191 |                         first_segment = candidate_iterable[0]
192 |                         if len(first_segment) > 1 and isinstance(
193 |                             first_segment[1], list
194 |                         ):
195 |                             if (
196 |                                 first_segment[1]
197 |                                 and isinstance(first_segment[1][0], list)
198 |                                 and len(first_segment[1][0]) > 0
199 |                                 and isinstance(first_segment[1][0][0], list)
200 |                             ):
201 |                                 segments_iterable = candidate_iterable
202 |                                 logging.debug(
203 |                                     f"Segments list identified at path ending with {path_name}"
204 |                                 )
205 |                                 break
206 |                     except (IndexError, TypeError):
207 |                         pass
208 |             except (IndexError, TypeError):
209 |                 pass
210 | 
211 |         if segments_iterable is None:
212 |             logging.error(
213 |                 f"Could not identify valid text segments list using paths {path_names}."
214 |             )
215 |             return language, [], []
216 | 
217 |         for segment_list in segments_iterable:
218 |             current_block_word_annotations = []
219 |             block_text_builder = io.StringIO()
220 |             last_word_ends_with_space = False
221 | 
222 |             if not isinstance(segment_list, list):
223 |                 logging.warning(
224 |                     f"Skipping segment: Expected list, got {type(segment_list)}."
225 |                 )
226 |                 continue
227 | 
228 |             try:
229 |                 if len(segment_list) > 1 and isinstance(segment_list[1], list):
230 |                     word_groups_list = segment_list[1]
231 | 
232 |                     for group_count, word_group in enumerate(word_groups_list, 1):
233 |                         try:
234 |                             if (
235 |                                 isinstance(word_group, list)
236 |                                 and len(word_group) > 0
237 |                                 and isinstance(word_group[0], list)
238 |                                 and isinstance(word_group[0][0], list)
239 |                             ):
240 | 
241 |                                 word_list = word_group[0]
242 | 
243 |                                 if (
244 |                                     group_count > 1
245 |                                     and block_text_builder.tell() > 0
246 |                                     and not last_word_ends_with_space
247 |                                 ):
248 |                                     block_text_builder.write(" ")
249 |                                     last_word_ends_with_space = True
250 | 
251 |                                 for word_info in word_list:
252 |                                     try:
253 |                                         if (
254 |                                             isinstance(word_info, list)
255 |                                             and len(word_info) > 3
256 |                                             and isinstance(word_info[1], str)
257 |                                             and isinstance(word_info[2], str)
258 |                                             and isinstance(word_info[3], list)
259 |                                             and word_info[3]
260 |                                             and isinstance(word_info[3][0], list)
261 |                                         ):
262 | 
263 |                                             text = word_info[1]
264 |                                             space_indicator = word_info[2]
265 |                                             bbox = word_info[3][0]
266 | 
267 |                                             current_block_word_annotations.append(
268 |                                                 {"text": text, "bbox": bbox}
269 |                                             )
270 | 
271 |                                             block_text_builder.write(text)
272 |                                             block_text_builder.write(space_indicator)
273 |                                             last_word_ends_with_space = (
274 |                                                 space_indicator == " "
275 |                                             )
276 | 
277 |                                     except (IndexError, TypeError):
278 |                                         pass
279 |                         except (IndexError, TypeError):
280 |                             pass
281 |                 else:
282 |                     logging.warning("Word groups list structure [1] not found/invalid.")
283 |             except (IndexError, TypeError):
284 |                 logging.error("Error processing segment structure.")
285 |             except Exception as e:
286 |                 logging.error(f"Unexpected error processing segment: {e}")
287 | 
288 |             reconstructed_text = block_text_builder.getvalue().rstrip(" ")
289 |             block_text_builder.close()
290 | 
291 |             if reconstructed_text or current_block_word_annotations:
292 |                 reconstructed_blocks.append(reconstructed_text)
293 |                 all_word_annotations.extend(current_block_word_annotations)
294 | 
295 |     except Exception as e:
296 |         logging.error(
297 |             f"Critical error during adaptive text extraction: {e}", exc_info=True
298 |         )
299 |         return language, reconstructed_blocks, all_word_annotations
300 | 
301 |     logging.info(
302 |         f"Adaptive parsing complete. Language: '{language}'. Text blocks found: {len(reconstructed_blocks)}. Total word annotations: {len(all_word_annotations)}."
303 |     )
304 |     return language, reconstructed_blocks, all_word_annotations
305 | 
306 | 
307 | async def scan_image(image_path):
308 |     """Scans image via Google Lens, extracts text, language, and coordinates."""
309 |     logging.info(f"Starting image scan: {image_path}")
310 |     image_data = await read_image_data(image_path)
311 |     if not image_data:
312 |         return None, "Failed to read image data"
313 | 
314 |     filename = os.path.basename(image_path)
315 |     _, ext = os.path.splitext(filename.lower())
316 |     content_type = "image/jpeg"
317 |     if ext == ".png":
318 |         content_type = "image/png"
319 |     elif ext == ".webp":
320 |         content_type = "image/webp"
321 |     elif ext == ".gif":
322 |         content_type = "image/gif"
323 |     logging.debug(f"Using content type: {content_type}")
324 | 
325 |     files = {"encoded_image": (filename, image_data, content_type)}
326 |     params_upload = {
327 |         "hl": "ru",
328 |         "re": "av",
329 |         "vpw": "1903",
330 |         "vph": "953",
331 |         "ep": "gsbubb",
332 |         "st": str(int(time.time() * 1000)),
333 |     }
334 | 
335 |     loaded_cookies = await load_cookies(COOKIE_FILE)
336 |     limits = httpx.Limits(max_keepalive_connections=5, max_connections=10)
337 |     timeout = httpx.Timeout(30.0, connect=10.0)
338 | 
339 |     async with httpx.AsyncClient(
340 |         cookies=loaded_cookies,
341 |         follow_redirects=True,
342 |         timeout=timeout,
343 |         limits=limits,
344 |         http2=True,
345 |         verify=True,
346 |     ) as client:
347 |         try:
348 |             # --- 1. Upload Image to Lens ---
349 |             logging.debug(f"POST request to {LENS_UPLOAD_ENDPOINT}")
350 |             response_upload = await client.post(
351 |                 LENS_UPLOAD_ENDPOINT, headers=HEADERS, files=files, params=params_upload
352 |             )
353 |             await save_cookies(client.cookies, COOKIE_FILE)
354 |             response_upload.raise_for_status()
355 | 
356 |             final_url = str(response_upload.url)
357 | 
358 |             # --- 2. Extract Session IDs from URL ---
359 |             vsrid, lsessionid = extract_ids_from_url(final_url)
360 |             if not vsrid or not lsessionid:
361 |                 logging.error(
362 |                     "Failed to extract vsrid or lsessionid from upload redirect URL."
363 |                 )
364 |                 return None, "Failed to get session IDs from upload response"
365 | 
366 |             # --- 3. Fetch Metadata from Lens ---
367 |             metadata_params = {
368 |                 "vsrid": vsrid,
369 |                 "lsessionid": lsessionid,
370 |                 "hl": params_upload["hl"],
371 |                 "qf": "CAI%3D",
372 |                 "st": str(int(time.time() * 1000)),
373 |                 "vpw": params_upload["vpw"],
374 |                 "vph": params_upload["vph"],
375 |                 "source": "lens",
376 |             }
377 |             metadata_headers = HEADERS.copy()
378 |             metadata_headers.update(
379 |                 {
380 |                     "Accept": "*/*",
381 |                     "Referer": final_url,
382 |                     "Sec-Fetch-Site": "same-origin",
383 |                     "Sec-Fetch-Mode": "cors",
384 |                     "Sec-Fetch-Dest": "empty",
385 |                     "Priority": "u=1, i",
386 |                 }
387 |             )
388 |             metadata_headers.pop("Upgrade-Insecure-Requests", None)
389 |             metadata_headers.pop("Sec-Fetch-User", None)
390 |             metadata_headers.pop("Cache-Control", None)
391 |             metadata_headers.pop("Origin", None)
392 | 
393 |             metadata_url_obj = httpx.URL(LENS_METADATA_ENDPOINT, params=metadata_params)
394 |             logging.debug(f"GET request to {str(metadata_url_obj)}")
395 |             response_metadata = await client.get(
396 |                 metadata_url_obj, headers=metadata_headers
397 |             )
398 |             await save_cookies(client.cookies, COOKIE_FILE)
399 |             response_metadata.raise_for_status()
400 | 
401 |             # --- 4. Parse Metadata Response ---
402 |             response_text = response_metadata.text
403 |             if response_text.startswith(")]}'\n"):
404 |                 response_text = response_text[5:]
405 |             elif response_text.startswith(")]}'"):
406 |                 response_text = response_text[4:]
407 | 
408 |             try:
409 |                 metadata_json = json_loader(response_text)
410 | 
411 |                 # --- 5. Extract Data using Adaptive Parser ---
412 |                 language, reconstructed_blocks, all_word_annotations = (
413 |                     adaptive_parse_text_and_language(metadata_json)
414 |                 )
415 |                 full_text = "\n".join(reconstructed_blocks)
416 | 
417 |                 result_data = {
418 |                     "text": full_text,
419 |                     "language": language if language else "und",
420 |                     "text_with_coordinates": json.dumps(
421 |                         all_word_annotations, ensure_ascii=False
422 |                     ),  # JSON as string
423 |                 }
424 |                 return result_data, metadata_json
425 | 
426 |             except Exception as e_parse:
427 |                 logging.error(
428 |                     f"Error parsing JSON or extracting text: {e_parse}", exc_info=True
429 |                 )
430 |                 return None, response_metadata.text
431 | 
432 |         except httpx.HTTPStatusError as e:
433 |             logging.error(
434 |                 f"HTTP error: {e.response.status_code} for URL {e.request.url}"
435 |             )
436 |             return None, f"HTTP Error {e.response.status_code}"
437 |         except httpx.RequestError as e:
438 |             logging.error(f"Request error: {e}")
439 |             return None, f"Request Error: {e}"
440 |         except Exception as e:
441 |             logging.error(f"Unexpected error in scan_image: {e}", exc_info=True)
442 |             return None, f"Unexpected Error: {e}"
443 | 
444 | 
445 | async def main():
446 |     if len(sys.argv) < 2:
447 |         print(f"Usage: python {sys.argv[0]} <image_path>")
448 |         sys.exit(1)
449 | 
450 |     image_path = sys.argv[1]
451 |     if not os.path.isfile(image_path):
452 |         print(f"Error: File not found: {image_path}")
453 |         sys.exit(1)
454 | 
455 |     print(f"Starting Google Lens scan for: {image_path}")
456 |     start_total_time = time.time()
457 | 
458 |     result_dict, raw_data = await scan_image(image_path)
459 | 
460 |     end_total_time = time.time()
461 |     logging.info(
462 |         f"Total scan_image execution time: {end_total_time - start_total_time:.2f} sec."
463 |     )
464 | 
465 |     if result_dict:
466 |         print("\n--- Google Lens Scan Result ---")
467 |         print(
468 |             json.dumps(result_dict, indent=2, ensure_ascii=False)
469 |         )  # Output result as JSON
470 |         print("------------------------------")
471 |     else:
472 |         print("\nGoogle Lens scan failed.")
473 |         logging.error(f"Scan failed. Details: {raw_data}")
474 | 
475 | 
476 | if __name__ == "__main__":
477 |     if sys.platform == "win32":
478 |         asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
479 |     asyncio.run(main())
480 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/cli/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import asyncio
  3 | import json
  4 | import logging
  5 | import os
  6 | import sys
  7 | 
  8 | from rich.console import Console
  9 | from rich.logging import RichHandler
 10 | from rich.table import Table
 11 | from rich.text import Text
 12 | 
 13 | from ..api import LensAPI
 14 | from ..constants import (
 15 |     DEFAULT_API_KEY,
 16 |     DEFAULT_CLIENT_REGION,
 17 |     DEFAULT_CLIENT_TIME_ZONE,
 18 |     DEFAULT_CONFIG_FILENAME,
 19 | )
 20 | from ..exceptions import LensConfigError, LensException
 21 | from ..utils.config_manager import (
 22 |     build_app_config,
 23 |     get_default_config_dir,
 24 |     update_config_file_from_cli,
 25 | )
 26 | from ..utils.general import is_image_file_supported
 27 | from ..utils.sharex import copy_to_clipboard
 28 | 
 29 | console = Console()
 30 | 
 31 | 
 32 | def setup_logging(level_str: str = "WARNING"):
 33 |     log_level = getattr(logging, level_str.upper(), logging.WARNING)
 34 |     log_format = (
 35 |         "[%(levelname)s] %(name)s:%(funcName)s:%(lineno)d - %(message)s"
 36 |         if log_level <= logging.DEBUG
 37 |         else "%(message)s"
 38 |     )
 39 |     logging.basicConfig(
 40 |         level=log_level,
 41 |         format=log_format,
 42 |         handlers=[
 43 |             RichHandler(
 44 |                 console=console,
 45 |                 show_time=False,
 46 |                 show_level=log_level <= logging.INFO,
 47 |                 show_path=log_level <= logging.DEBUG,
 48 |                 markup=True,
 49 |                 rich_tracebacks=True,
 50 |             )
 51 |         ],
 52 |     )
 53 |     if log_level > logging.DEBUG:
 54 |         logging.getLogger("httpx").setLevel(logging.WARNING)
 55 |     logging.debug(f"Logging level set to {level_str.upper()}")
 56 | 
 57 | 
 58 | def print_help():
 59 |     console.print("\n[bold cyan]Google Lens CLI (chrome-lens-py)[/bold cyan]")
 60 |     console.print("Performs OCR and optional translation on an image.")
 61 | 
 62 |     table = Table(show_header=False, box=None, padding=(0, 2))
 63 |     table.add_column(style="green")
 64 |     table.add_column()
 65 |     table.add_row("Usage:", "lens_scan <image_source> [ocr_lang] [options]")
 66 |     table.add_row("\n[bold]Arguments:[/bold]")
 67 |     table.add_row("  image_source", "Path to an image file, a URL, or a directory.")
 68 |     table.add_row(
 69 |         "  ocr_lang",
 70 |         "BCP 47 language code for OCR (e.g., 'en', 'ja'). If omitted, auto-detection is attempted.",
 71 |     )
 72 |     table.add_row("\n[bold]Translation Options:[/bold]")
 73 |     table.add_row(
 74 |         "  -t, --translate TARGET_LANG",
 75 |         "Target language for translation (e.g., 'en', 'ru').",
 76 |     )
 77 |     table.add_row(
 78 |         "  --translate-from SOURCE_LANG",
 79 |         "Source language for translation (auto-detected if omitted).",
 80 |     )
 81 |     table.add_row(
 82 |         "  -to, --translate-out FILE_PATH",
 83 |         "Save the image with translated text overlaid.",
 84 |     )
 85 |     table.add_row("\n[bold]Output and Config Options:[/bold]")
 86 |     table.add_row(
 87 |         "  -b, --output-blocks",
 88 |         "Output OCR text as segmented blocks (useful for comics).",
 89 |     )
 90 |     table.add_row(
 91 |         "  -ol, --output-lines",
 92 |         "Output OCR text as individual lines with their geometry.",
 93 |     )
 94 |     table.add_row(
 95 |         "  --get-coords",
 96 |         "Output recognized words with their coordinates in JSON format.",
 97 |     )
 98 |     table.add_row(
 99 |         "  -q, --quiet",
100 |         "Suppress informational messages and headers, printing only the final result data.",
101 |     )
102 |     table.add_row(
103 |         "  -sx, --sharex", "Copy the result (translation or OCR) to the clipboard."
104 |     )
105 |     table.add_row(
106 |         "  --ocr-single-line",
107 |         "Join all OCR text into a single line (preserves line breaks by default).",
108 |     )
109 |     table.add_row(
110 |         "  --config-file FILE_PATH", "Path to a custom JSON configuration file."
111 |     )
112 |     table.add_row(
113 |         "  --update-config", "Update the default config file with CLI arguments."
114 |     )
115 |     table.add_row("  --font FONT_PATH", "Path to a .ttf font file for the overlay.")
116 |     table.add_row("  --font-size SIZE", "Font size for the overlay (default: 20).")
117 |     table.add_row("\n[bold]Advanced & Debug Options:[/bold]")
118 |     table.add_row("  --api-key KEY", "Google Cloud API key (overrides config).")
119 |     table.add_row(
120 |         "  --proxy URL",
121 |         "Proxy server URL (e.g., http://user:pass@host:port, socks5://host:port).",
122 |     )
123 |     table.add_row("  --timeout SECONDS", "Request timeout in seconds (default: 60).")
124 |     table.add_row(
125 |         "  --concurrency N",
126 |         "Set the maximum number of concurrent requests (default: 5).",
127 |     )
128 |     table.add_row(
129 |         "  --client-region REGION",
130 |         f"Client region code (default: '{DEFAULT_CLIENT_REGION}').",
131 |     )
132 |     table.add_row(
133 |         "  --client-time-zone TZ",
134 |         f"Client time zone ID (default: '{DEFAULT_CLIENT_TIME_ZONE}').",
135 |     )
136 |     table.add_row(
137 |         "  -l, --logging-level LEVEL",
138 |         "Set logging level (DEBUG, INFO, WARNING, ERROR).",
139 |     )
140 |     table.add_row("  -h, --help", "Show this help message and exit.")
141 |     console.print(table)
142 | 
143 | 
144 | async def cli_main():
145 |     parser = argparse.ArgumentParser(description="Google Lens CLI", add_help=False)
146 |     # Positional
147 |     parser.add_argument(
148 |         "image_source", nargs="?", help="Path to the image file, a URL, or a directory."
149 |     )
150 |     parser.add_argument(
151 |         "ocr_lang", nargs="?", default=None, help="BCP 47 code for OCR."
152 |     )
153 |     # Translation
154 |     parser.add_argument("-t", "--translate", dest="target_lang")
155 |     parser.add_argument("--translate-from", dest="source_lang")
156 |     parser.add_argument("-to", "--translate-out", dest="output_overlay_path")
157 |     # Output & Config
158 |     parser.add_argument(
159 |         "-b",
160 |         "--output-blocks",
161 |         action="store_true",
162 |         help="Output OCR text as segmented blocks.",
163 |     )
164 |     parser.add_argument(
165 |         "-ol",
166 |         "--output-lines",
167 |         action="store_true",
168 |         help="Output OCR text as individual lines.",
169 |     )
170 |     parser.add_argument(
171 |         "--get-coords",
172 |         action="store_true",
173 |         help="Output word coordinates in JSON format.",
174 |     )
175 |     parser.add_argument(
176 |         "-q",
177 |         "--quiet",
178 |         action="store_true",
179 |         help="Suppress informational messages, printing only result data.",
180 |     )
181 |     parser.add_argument("-sx", "--sharex", action="store_true")
182 |     parser.add_argument(
183 |         "--ocr-single-line",
184 |         action="store_false",
185 |         dest="ocr_preserve_line_breaks",
186 |         default=None,
187 |     )
188 |     parser.add_argument("--config-file", dest="config_file_path_override")
189 |     parser.add_argument("--update-config", action="store_true")
190 |     parser.add_argument("--font", dest="font_path")
191 |     parser.add_argument("--font-size", type=int)
192 |     # Advanced
193 |     parser.add_argument("--api-key")
194 |     parser.add_argument("--proxy")
195 |     parser.add_argument("--timeout", type=int)
196 |     parser.add_argument(
197 |         "--concurrency",
198 |         type=int,
199 |         default=5,
200 |         help="Maximum number of concurrent requests.",
201 |     )
202 |     parser.add_argument("--client-region")
203 |     parser.add_argument("--client-time-zone")
204 |     # Meta
205 |     parser.add_argument("-l", "--logging-level", dest="logging_level")
206 |     parser.add_argument("-h", "--help", action="store_true")
207 | 
208 |     args = parser.parse_args()
209 | 
210 |     MAX_CONCURRENCY_HARD_LIMIT = 30
211 |     CONCURRENCY_WARNING_THRESHOLD = 20
212 | 
213 |     if args.concurrency > MAX_CONCURRENCY_HARD_LIMIT:
214 |         console.print(
215 |             f"[bold red]Error:[/bold red] The concurrency value cannot be greater than {MAX_CONCURRENCY_HARD_LIMIT}."
216 |         )
217 |         console.print("This is a security measure to prevent IP blocking.")
218 |         sys.exit(1)
219 | 
220 |     if args.concurrency > CONCURRENCY_WARNING_THRESHOLD:
221 |         console.print(
222 |             f"[bold yellow]Warning:[/bold yellow] High concurrency value ({args.concurrency}) set."
223 |         )
224 |         console.print(
225 |             "This may result in a temporary block by Google. Use with caution."
226 |         )
227 | 
228 |     if args.help:
229 |         print_help()
230 |         return
231 |     if not args.image_source:
232 |         console.print(
233 |             "[bold red]Error:[/bold red] The 'image_source' argument is required.\n"
234 |         )
235 |         print_help()
236 |         sys.exit(1)
237 | 
238 |     # Validate mutually exclusive output formats
239 |     output_modes = [args.output_blocks, args.get_coords, args.output_lines]
240 |     if sum(output_modes) > 1:
241 |         console.print(
242 |             "[bold red]Error:[/bold red] --output-blocks, --output-lines, and --get-coords cannot be used together."
243 |         )
244 |         sys.exit(1)
245 | 
246 |     default_config_path = os.path.join(
247 |         get_default_config_dir(), DEFAULT_CONFIG_FILENAME
248 |     )
249 |     config_file_to_load = args.config_file_path_override or default_config_path
250 | 
251 |     try:
252 |         app_config = build_app_config(vars(args), config_file_to_load)
253 |     except LensConfigError as e:
254 |         console.print(f"[bold red]Configuration Error:[/bold red] {e}")
255 |         sys.exit(1)
256 | 
257 |     setup_logging(app_config.get("logging_level", "WARNING"))
258 | 
259 |     if os.path.exists(config_file_to_load):
260 |         logging.info(f"Using config file: {config_file_to_load}")
261 |     elif args.config_file_path_override:
262 |         logging.warning(
263 |             f"Specified config file not found: {args.config_file_path_override}"
264 |         )
265 | 
266 |     image_sources = []
267 |     if os.path.isdir(args.image_source):
268 |         if not args.quiet:
269 |             console.print(f"Processing directory: [cyan]{args.image_source}[/cyan]")
270 |         for filename in sorted(os.listdir(args.image_source)):
271 |             full_path = os.path.join(args.image_source, filename)
272 |             if is_image_file_supported(full_path):
273 |                 image_sources.append(full_path)
274 |         if not image_sources:
275 |             console.print(
276 |                 f"[bold red]Error:[/bold red] No supported image files found in directory '{args.image_source}'."
277 |             )
278 |             sys.exit(1)
279 |     else:
280 |         if not is_image_file_supported(args.image_source):
281 |             console.print(
282 |                 f"[bold red]Error:[/bold red] Source '{args.image_source}' is not a valid URL or supported image file."
283 |             )
284 |             sys.exit(1)
285 |         image_sources.append(args.image_source)
286 | 
287 |     if args.update_config:
288 |         if args.config_file_path_override:
289 |             console.print(
290 |                 "[bold yellow]Warning:[/bold yellow] --update-config only affects the default config file."
291 |             )
292 |         else:
293 |             try:
294 |                 update_config_file_from_cli(vars(args), default_config_path)
295 |             except LensConfigError as e:
296 |                 console.print(f"[bold red]Error updating config:[/bold red] {e}")
297 | 
298 |     api = LensAPI(
299 |         api_key=app_config.get("api_key", DEFAULT_API_KEY),
300 |         client_region=app_config.get("client_region"),
301 |         client_time_zone=app_config.get("client_time_zone"),
302 |         proxy=app_config.get("proxy"),
303 |         timeout=app_config.get("timeout", 60),
304 |         font_path=app_config.get("font_path"),
305 |         font_size=app_config.get("font_size"),
306 |         max_concurrent=args.concurrency,
307 |     )
308 | 
309 |     try:
310 |         output_format = "full_text"
311 |         if args.output_blocks:
312 |             output_format = "blocks"
313 |         elif args.output_lines:
314 |             output_format = "lines"
315 | 
316 |         results_buffer = {}
317 |         next_to_print = 0
318 |         results_ready = asyncio.Condition()
319 | 
320 |         async def worker(queue):
321 |             while True:
322 |                 index, path = await queue.get()
323 |                 try:
324 | 
325 |                     try:
326 |                         result = await api.process_image(
327 |                             image_path=path,
328 |                             ocr_language=args.ocr_lang,
329 |                             target_translation_language=args.target_lang,
330 |                             source_translation_language=args.source_lang,
331 |                             output_overlay_path=args.output_overlay_path,
332 |                             ocr_preserve_line_breaks=app_config.get(
333 |                                 "ocr_preserve_line_breaks", True
334 |                             ),
335 |                             output_format=output_format,
336 |                         )
337 |                     except Exception as e:
338 |                         result = e
339 | 
340 |                     async with results_ready:
341 |                         results_buffer[index] = result
342 |                         results_ready.notify()
343 | 
344 |                 finally:
345 |                     queue.task_done()
346 | 
347 |         job_queue = asyncio.Queue()
348 |         for i, path in enumerate(image_sources):
349 |             job_queue.put_nowait((i, path))
350 | 
351 |         worker_tasks = [
352 |             asyncio.create_task(worker(job_queue)) for _ in range(args.concurrency)
353 |         ]
354 | 
355 |         while next_to_print < len(image_sources):
356 |             async with results_ready:
357 |                 await results_ready.wait_for(lambda: next_to_print in results_buffer)
358 | 
359 |             result = results_buffer.pop(next_to_print)
360 |             image_path = image_sources[next_to_print]
361 | 
362 |             if isinstance(result, Exception):
363 |                 console.print(
364 |                     f"\n- [bold red]({next_to_print + 1}/{len(image_sources)}) Error for: {os.path.basename(image_path)}[/bold red] -"
365 |                 )
366 |                 console.print(f"[red]{result}[/red]")
367 |                 next_to_print += 1
368 |                 continue
369 | 
370 |             if len(image_sources) > 1 and not args.quiet:
371 |                 console.print(
372 |                     f"\n- [bold green]({next_to_print + 1}/{len(image_sources)}) Result for: {os.path.basename(image_path)}[/bold green] -"
373 |                 )
374 | 
375 |             if args.get_coords:
376 |                 word_data = result.get("word_data")
377 |                 if not word_data:
378 |                     console.print("[]")
379 |                     next_to_print += 1
380 |                     continue  # Continue to next image in batch
381 | 
382 |                 processed_coords = []
383 |                 for data in word_data:
384 |                     geom = data.get("geometry")
385 |                     processed_coords.append(
386 |                         {
387 |                             data["word"]: (
388 |                                 {
389 |                                     "center_x": round(geom["center_x"], 4),
390 |                                     "center_y": round(geom["center_y"], 4),
391 |                                     "width": round(geom["width"], 4),
392 |                                     "height": round(geom["height"], 4),
393 |                                     "angle_deg": round(geom["angle_deg"], 2),
394 |                                 }
395 |                                 if geom
396 |                                 else None
397 |                             )
398 |                         }
399 |                     )
400 | 
401 |                 console.print(
402 |                     json.dumps(processed_coords, indent=2, ensure_ascii=False)
403 |                 )
404 | 
405 |             elif args.output_lines:
406 |                 line_blocks = result.get("line_blocks", [])
407 |                 if not args.quiet:
408 |                     console.print(
409 |                         f"\n[bold green]OCR Results ({len(line_blocks)} lines):[/bold green]"
410 |                     )
411 |                 if not line_blocks and not args.quiet:
412 |                     console.print("No lines found.")
413 | 
414 |                 for j, line in enumerate(line_blocks):
415 |                     if not args.quiet:
416 |                         console.print(f"\n--- [cyan]Line #{j+1}[/cyan] ---")
417 |                     console.print(Text(line.get("text", "")))
418 | 
419 |                 translated_text = result.get("translated_text")
420 |                 if translated_text:
421 |                     if not args.quiet:
422 |                         console.print(
423 |                             "\n[bold green]Translated Text (Full):[/bold green]"
424 |                         )
425 |                     console.print(Text(translated_text))
426 | 
427 |             elif args.output_blocks:
428 |                 text_blocks = result.get("text_blocks", [])
429 |                 if not args.quiet:
430 |                     console.print(
431 |                         f"\n[bold green]OCR Results ({len(text_blocks)} blocks):[/bold green]"
432 |                     )
433 |                 if not text_blocks and not args.quiet:
434 |                     console.print("No text blocks found.")
435 | 
436 |                 for j, block in enumerate(text_blocks):
437 |                     if not args.quiet:
438 |                         console.print(f"\n--- [cyan]Block #{j+1}[/cyan] ---")
439 |                     console.print(Text(block.get("text", "")))
440 | 
441 |                 translated_text = result.get("translated_text")
442 |                 if translated_text:
443 |                     if not args.quiet:
444 |                         console.print(
445 |                             "\n[bold green]Translated Text (Full):[/bold green]"
446 |                         )
447 |                     console.print(Text(translated_text))
448 | 
449 |             else:  # Default 'full_text' output
450 |                 ocr_text = result.get("ocr_text")
451 |                 if ocr_text:
452 |                     if not args.quiet:
453 |                         console.print("\n[bold green]OCR Results:[/bold green]")
454 |                     console.print(Text(ocr_text))
455 |                 elif not args.quiet:
456 |                     console.print("\n[bold green]OCR Results:[/bold green]")
457 |                     console.print("No OCR text found.")
458 | 
459 |                 translated_text = result.get("translated_text")
460 |                 if translated_text:
461 |                     if not args.quiet:
462 |                         console.print("\n[bold green]Translated Text:[/bold green]")
463 |                     console.print(Text(translated_text))
464 | 
465 |             translated_text = result.get("translated_text")
466 |             if args.target_lang and not translated_text and not args.quiet:
467 |                 console.print(
468 |                     "\n[yellow]Translation was requested but not found in the response.[/yellow]"
469 |                 )
470 | 
471 |             if args.output_overlay_path and translated_text:
472 |                 if not args.quiet:
473 |                     console.print(
474 |                         f"\nImage with overlay saved to: [cyan]{args.output_overlay_path}[/cyan]"
475 |                     )
476 |                 else:
477 |                     logging.info(
478 |                         f"Image with overlay saved to: {args.output_overlay_path}"
479 |                     )
480 | 
481 |             if args.sharex:
482 |                 source_for_copy, text_to_copy = ("", "")
483 |                 # Prioritize translated text for copying
484 |                 if args.target_lang and translated_text:
485 |                     text_to_copy, source_for_copy = translated_text, "Translated text"
486 |                 elif args.output_blocks:
487 |                     blocks = result.get("text_blocks", [])
488 |                     if blocks:
489 |                         text_to_copy = "\n\n".join([b.get("text", "") for b in blocks])
490 |                         source_for_copy = "OCR text (blocks)"
491 |                 else:
492 |                     ocr_text = result.get("ocr_text")
493 |                     if ocr_text:
494 |                         text_to_copy, source_for_copy = ocr_text, "OCR text"
495 | 
496 |                 if text_to_copy:
497 |                     if copy_to_clipboard(text_to_copy):
498 |                         if not args.quiet:
499 |                             console.print(
500 |                                 f"\n[bold magenta]({source_for_copy} copied to clipboard)[/bold magenta]"
501 |                             )
502 |                         else:
503 |                             logging.info(f"{source_for_copy} copied to clipboard")
504 |                     else:
505 |                         # This is an error/warning, so it should probably stay visible
506 |                         console.print(
507 |                             "\n[bold red]Failed to copy text. Is 'pyperclip' installed? "
508 |                             '(`pip install "chrome-lens-py[clipboard]"`)[/bold red]'
509 |                         )
510 |                 elif not args.quiet:
511 |                     console.print("\n[yellow]No text available to copy.[/yellow]")
512 | 
513 |             next_to_print += 1
514 | 
515 |         await job_queue.join()
516 |         for task in worker_tasks:
517 |             task.cancel()
518 |         await asyncio.gather(*worker_tasks, return_exceptions=True)
519 | 
520 |     except LensException as e:
521 |         console.print(f"\n[bold red]Lens API Error:[/bold red] {e}")
522 |         sys.exit(1)
523 | 
524 | 
525 | def run():
526 |     if sys.platform == "win32" and sys.stdout.encoding != "utf-8":
527 |         try:
528 |             os.system("chcp 65001 > nul")
529 |             logging.debug("Set Windows console to chcp 65001 (UTF-8)")
530 |         except Exception as e:
531 |             print(f"Warning: Failed to set console to UTF-8 (chcp 65001). Error: {e}")
532 |     try:
533 |         asyncio.run(cli_main())
534 |     except KeyboardInterrupt:
535 |         console.print("\n[yellow]Operation cancelled by user.[/yellow]")
536 | 
537 | 
538 | if __name__ == "__main__":
539 |     run()
540 | 


--------------------------------------------------------------------------------
/experiments/test.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import io
  3 | import json
  4 | import logging
  5 | import os
  6 | import sys
  7 | import time
  8 | from urllib.parse import parse_qs, urlparse
  9 | 
 10 | import httpx
 11 | 
 12 | try:
 13 |     import json5
 14 | 
 15 |     json_loader = json5.loads
 16 | except ImportError:
 17 |     json_loader = json.loads
 18 | 
 19 | logging.basicConfig(
 20 |     level=logging.INFO,
 21 |     format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
 22 |     datefmt="%H:%M:%S",
 23 | )
 24 | main_log = logging.getLogger("main")
 25 | scan_log = logging.getLogger("scan_image")
 26 | http_log = logging.getLogger("http_client")
 27 | parse_log = logging.getLogger("parser")
 28 | cookie_log = logging.getLogger("cookies")
 29 | io_log = logging.getLogger("image_io")
 30 | 
 31 | 
 32 | LENS_UPLOAD_ENDPOINT = "https://lens.google.com/v3/upload"
 33 | LENS_METADATA_ENDPOINT = "https://lens.google.com/qfmetadata"
 34 | HEADERS = {
 35 |     "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
 36 |     "Accept-Language": "ru",
 37 |     "Cache-Control": "max-age=0",
 38 |     "Sec-Ch-Ua": '"Not-A.Brand";v="8", "Chromium";v="135", "Google Chrome";v="135"',
 39 |     "Sec-Ch-Ua-Mobile": "?0",
 40 |     "Sec-Ch-Ua-Platform": '"Windows"',
 41 |     "Upgrade-Insecure-Requests": "1",
 42 |     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
 43 |     "X-Client-Data": "CIW2yQEIorbJAQipncoBCIH+ygEIkqHLAQiKo8sBCPWYzQEIhaDNAQji0M4BCLPTzgEI19TOAQjy1c4BCJLYzgEIwNjOAQjM2M4BGM7VzgE=",
 44 |     "Origin": "https://www.google.com",
 45 |     "Referer": "https://www.google.com/",
 46 |     "Sec-Fetch-Site": "same-site",
 47 |     "Sec-Fetch-Mode": "navigate",
 48 |     "Sec-Fetch-Dest": "document",
 49 |     "Sec-Fetch-User": "?1",
 50 |     "Priority": "u=0, i",
 51 |     "Accept-Encoding": "gzip, deflate, br",
 52 |     "Connection": "keep-alive",
 53 | }
 54 | COOKIE_FILE = "cookies_lens_test.json"
 55 | 
 56 | 
 57 | async def read_image_data(image_path):
 58 |     """Reads image data from file."""
 59 |     io_log.debug(f"Attempting to read image: {image_path}")
 60 |     start_time = time.perf_counter()
 61 |     try:
 62 |         with open(image_path, "rb") as f:
 63 |             data = f.read()
 64 |         end_time = time.perf_counter()
 65 |         io_log.debug(
 66 |             f"Read {len(data)} bytes from {image_path} in {end_time - start_time:.4f} sec."
 67 |         )
 68 |         return data
 69 |     except FileNotFoundError:
 70 |         io_log.error(f"Image not found: {image_path}")
 71 |         return None
 72 |     except Exception as e:
 73 |         io_log.error(f"Error reading image {image_path}: {e}", exc_info=True)
 74 |         return None
 75 | 
 76 | 
 77 | def extract_ids_from_url(url_string):
 78 |     """Extracts vsrid and lsessionid from URL."""
 79 |     parse_log.debug(f"Attempting to extract IDs from URL: {url_string}")
 80 |     start_time = time.perf_counter()
 81 |     try:
 82 |         parsed_url = urlparse(url_string)
 83 |         query_params = parse_qs(parsed_url.query)
 84 |         vsrid = query_params.get("vsrid", [None])[0]
 85 |         lsessionid = query_params.get("lsessionid", [None])[0]
 86 |         end_time = time.perf_counter()
 87 |         if vsrid and lsessionid:
 88 |             parse_log.debug(
 89 |                 f"Extracted vsrid='{vsrid}', lsessionid='{lsessionid}' in {end_time - start_time:.4f} sec."
 90 |             )
 91 |         else:
 92 |             parse_log.warning(
 93 |                 f"Could not extract vsrid or lsessionid from URL in {end_time - start_time:.4f} sec."
 94 |             )
 95 |         return vsrid, lsessionid
 96 |     except Exception as e:
 97 |         parse_log.error(
 98 |             f"Error extracting IDs from URL {url_string}: {e}", exc_info=True
 99 |         )
100 |         return None, None
101 | 
102 | 
103 | async def save_cookies(cookies, cookie_file):
104 |     """Saves cookies to JSON file."""
105 |     cookie_log.debug(f"Attempting to save cookies to {cookie_file}")
106 |     start_time = time.perf_counter()
107 |     try:
108 |         cookies_dict = {}
109 |         cookie_jar = getattr(cookies, "jar", cookies)
110 |         if hasattr(cookie_jar, "items"):
111 |             for name, value in cookie_jar.items():
112 |                 cookie_obj = cookie_jar.get(name)
113 |                 if cookie_obj and hasattr(cookie_obj, "value"):
114 |                     cookies_dict[name] = cookie_obj.value
115 |                 elif isinstance(value, str):
116 |                     cookies_dict[name] = value
117 |         elif hasattr(cookie_jar, "__iter__"):
118 |             for cookie in cookie_jar:
119 |                 if hasattr(cookie, "name") and hasattr(cookie, "value"):
120 |                     cookies_dict[cookie.name] = cookie.value
121 |         else:
122 |             cookie_log.warning(
123 |                 f"Could not determine how to iterate cookies object: {type(cookies)}"
124 |             )
125 |             return
126 | 
127 |         with open(cookie_file, "w") as f:
128 |             json.dump(cookies_dict, f, indent=2)
129 |         end_time = time.perf_counter()
130 |         cookie_log.debug(
131 |             f"Cookies saved ({len(cookies_dict)} items) to {cookie_file} in {end_time - start_time:.4f} sec."
132 |         )
133 |     except Exception as e:
134 |         cookie_log.error(f"Error saving cookies: {e}", exc_info=True)
135 | 
136 | 
137 | async def load_cookies(cookie_file):
138 |     """Loads cookies from JSON file."""
139 |     cookie_log.debug(f"Attempting to load cookies from {cookie_file}")
140 |     start_time = time.perf_counter()
141 |     try:
142 |         if os.path.exists(cookie_file):
143 |             with open(cookie_file, "r") as f:
144 |                 cookies_dict = json.load(f)
145 |                 end_time = time.perf_counter()
146 |                 cookie_log.debug(
147 |                     f"Cookies loaded ({len(cookies_dict)} items) from {cookie_file} in {end_time - start_time:.4f} sec."
148 |                 )
149 |                 return cookies_dict
150 |         else:
151 |             cookie_log.debug(f"Cookie file {cookie_file} not found.")
152 |             return {}
153 |     except (json.JSONDecodeError, FileNotFoundError) as e:
154 |         cookie_log.warning(
155 |             f"Error loading cookies from {cookie_file}: {e}. Ignoring cookies."
156 |         )
157 |     except Exception as e:
158 |         cookie_log.warning(
159 |             f"Unexpected error loading cookies from {cookie_file}: {e}. Ignoring cookies."
160 |         )
161 |     return {}
162 | 
163 | 
164 | def adaptive_parse_text_and_language(metadata_json):
165 |     """
166 |     Adaptively parses JSON to extract language, text blocks, and word annotations.
167 |     """
168 |     parse_log.info("Starting adaptive parsing of metadata JSON.")
169 |     start_time = time.perf_counter()
170 |     language = None
171 |     all_word_annotations = []
172 |     reconstructed_blocks = []
173 | 
174 |     try:
175 |         if not isinstance(metadata_json, list) or not metadata_json:
176 |             parse_log.error(
177 |                 "Invalid JSON structure: metadata_json is not a non-empty list."
178 |             )
179 |             return None, [], []
180 |         response_container = next(
181 |             (
182 |                 item
183 |                 for item in metadata_json
184 |                 if isinstance(item, list)
185 |                 and item
186 |                 and item[0] == "fetch_query_formulation_metadata_response"
187 |             ),
188 |             None,
189 |         )
190 |         if response_container is None:
191 |             parse_log.error(
192 |                 "Could not find 'fetch_query_formulation_metadata_response' container."
193 |             )
194 |             return None, [], []
195 |         parse_log.debug("'fetch_query_formulation_metadata_response' container found.")
196 | 
197 |         lang_start_time = time.perf_counter()
198 |         try:
199 |             if len(response_container) > 2 and isinstance(response_container[2], list):
200 |                 lang_section = response_container[2]
201 |                 language = next(
202 |                     (
203 |                         element
204 |                         for element in lang_section
205 |                         if isinstance(element, str) and len(element) == 2
206 |                     ),
207 |                     None,
208 |                 )
209 |                 if language:
210 |                     parse_log.debug(
211 |                         f"Found potential language code: '{language}' in {time.perf_counter() - lang_start_time:.4f} sec."
212 |                     )
213 |                 else:
214 |                     parse_log.debug(
215 |                         f"No direct 2-char language code found in section [2] in {time.perf_counter() - lang_start_time:.4f} sec."
216 |                     )
217 |             else:
218 |                 parse_log.debug(
219 |                     f"Language section [2] not found or not a list in {time.perf_counter() - lang_start_time:.4f} sec."
220 |                 )
221 | 
222 |         except (IndexError, TypeError, StopIteration):
223 |             parse_log.warning(
224 |                 "Could not find language code using primary method.", exc_info=True
225 |             )
226 | 
227 |         parse_log.debug("Searching for text segments list...")
228 |         segments_iterable = None
229 |         possible_paths_to_segments_list = [
230 |             lambda rc: rc[2][0][0][0],
231 |             lambda rc: rc[1][0][0][0],
232 |             lambda rc: rc[2][0][0],
233 |         ]
234 |         path_names = ["[2][0][0][0]", "[1][0][0][0]", "[2][0][0]"]
235 |         path_search_start = time.perf_counter()
236 | 
237 |         for i, path_func in enumerate(possible_paths_to_segments_list):
238 |             path_name = path_names[i]
239 |             parse_log.debug(f"Trying path ending with {path_name}...")
240 |             try:
241 |                 candidate_iterable = path_func(response_container)
242 |                 if (
243 |                     isinstance(candidate_iterable, list)
244 |                     and candidate_iterable
245 |                     and isinstance(candidate_iterable[0], list)
246 |                 ):
247 |                     try:
248 |                         first_segment = candidate_iterable[0]
249 |                         if len(first_segment) > 1 and isinstance(
250 |                             first_segment[1], list
251 |                         ):
252 |                             if (
253 |                                 first_segment[1]
254 |                                 and isinstance(first_segment[1][0], list)
255 |                                 and len(first_segment[1][0]) > 0
256 |                                 and isinstance(first_segment[1][0][0], list)
257 |                             ):
258 |                                 segments_iterable = candidate_iterable
259 |                                 parse_log.debug(
260 |                                     f"Segments list identified at path ending with {path_name}."
261 |                                 )
262 |                                 break
263 |                     except (IndexError, TypeError) as e_check:
264 |                         parse_log.debug(
265 |                             f"Path {path_name} candidate structure check failed: {e_check}"
266 |                         )
267 |                         pass
268 |             except (IndexError, TypeError) as e_path:
269 |                 parse_log.debug(f"Path {path_name} access failed: {e_path}")
270 |                 pass
271 | 
272 |         parse_log.debug(
273 |             f"Path search finished in {time.perf_counter() - path_search_start:.4f} sec."
274 |         )
275 | 
276 |         if segments_iterable is None:
277 |             parse_log.error(
278 |                 f"Could not identify valid text segments list using known paths {path_names}. Full structure might have changed."
279 |             )
280 |             return language, [], []
281 | 
282 |         parse_log.info(
283 |             f"Processing {len(segments_iterable)} potential text segments..."
284 |         )
285 |         # segment_processing_start = time.perf_counter()
286 | 
287 |         for i, segment_list in enumerate(segments_iterable):
288 |             segment_start_time = time.perf_counter()
289 |             current_block_word_annotations = []
290 |             block_text_builder = io.StringIO()
291 |             last_word_ends_with_space = False
292 | 
293 |             if not isinstance(segment_list, list):
294 |                 parse_log.warning(
295 |                     f"Skipping segment #{i}: Expected list, got {type(segment_list)}."
296 |                 )
297 |                 continue
298 | 
299 |             try:
300 |                 if len(segment_list) > 1 and isinstance(segment_list[1], list):
301 |                     word_groups_list = segment_list[1]
302 |                     parse_log.debug(
303 |                         f"Segment #{i}: Found {len(word_groups_list)} word groups."
304 |                     )
305 | 
306 |                     for group_count, word_group in enumerate(word_groups_list, 1):
307 |                         try:
308 |                             if (
309 |                                 isinstance(word_group, list)
310 |                                 and len(word_group) > 0
311 |                                 and isinstance(word_group[0], list)
312 |                             ):
313 | 
314 |                                 word_list = word_group[0]
315 |                                 parse_log.debug(
316 |                                     f"  Group {group_count}: Found {len(word_list)} words."
317 |                                 )
318 | 
319 |                                 if (
320 |                                     group_count > 1
321 |                                     and block_text_builder.tell() > 0
322 |                                     and not last_word_ends_with_space
323 |                                 ):
324 |                                     block_text_builder.write(" ")
325 |                                     last_word_ends_with_space = True
326 | 
327 |                                 for word_idx, word_info in enumerate(word_list):
328 |                                     try:
329 |                                         if (
330 |                                             isinstance(word_info, list)
331 |                                             and len(word_info) > 3
332 |                                             and isinstance(word_info[1], str)
333 |                                             and isinstance(word_info[2], str)
334 |                                             and isinstance(word_info[3], list)
335 |                                             and word_info[3]
336 |                                             and isinstance(word_info[3][0], list)
337 |                                         ):
338 | 
339 |                                             text = word_info[1]
340 |                                             space_indicator = word_info[2]
341 |                                             bbox = word_info[3][0]
342 | 
343 |                                             current_block_word_annotations.append(
344 |                                                 {"text": text, "bbox": bbox}
345 |                                             )
346 | 
347 |                                             block_text_builder.write(text)
348 |                                             if space_indicator == " ":
349 |                                                 block_text_builder.write(
350 |                                                     space_indicator
351 |                                                 )
352 |                                                 last_word_ends_with_space = True
353 |                                             else:
354 |                                                 last_word_ends_with_space = False
355 |                                         else:
356 |                                             parse_log.warning(
357 |                                                 f"Segment #{i}, Group {group_count}, Word {word_idx}: Unexpected word_info structure or type: {word_info}"
358 |                                             )
359 | 
360 |                                     except (IndexError, TypeError) as e_word:
361 |                                         parse_log.warning(
362 |                                             f"Segment #{i}, Group {group_count}, Word {word_idx}: Error processing word_info: {e_word}. Data: {word_info}"
363 |                                         )
364 |                                         pass
365 |                             else:
366 |                                 parse_log.warning(
367 |                                     f"Segment #{i}, Group {group_count}: Unexpected word_group structure: {word_group}"
368 |                                 )
369 | 
370 |                         except (IndexError, TypeError) as e_group:
371 |                             parse_log.warning(
372 |                                 f"Segment #{i}, Group {group_count}: Error processing word_group: {e_group}. Data: {word_group}"
373 |                             )
374 |                             pass
375 |                 else:
376 |                     parse_log.warning(
377 |                         f"Segment #{i}: Word groups list structure segment_list[1] not found or invalid. Segment data: {segment_list}"
378 |                     )
379 |             except (IndexError, TypeError) as e_segment:
380 |                 parse_log.error(
381 |                     f"Segment #{i}: Error processing segment structure: {e_segment}. Data: {segment_list}",
382 |                     exc_info=True,
383 |                 )
384 |             except Exception as e_segment_unexpected:
385 |                 parse_log.error(
386 |                     f"Segment #{i}: Unexpected error processing segment: {e_segment_unexpected}",
387 |                     exc_info=True,
388 |                 )
389 | 
390 |             reconstructed_text = (
391 |                 block_text_builder.getvalue().rstrip(" ")
392 |                 if not last_word_ends_with_space
393 |                 else block_text_builder.getvalue()
394 |             )
395 |             block_text_builder.close()
396 | 
397 |             segment_end_time = time.perf_counter()
398 |             parse_log.debug(
399 |                 f"Segment #{i} processed in {segment_end_time - segment_start_time:.4f} sec. Text length: {len(reconstructed_text)}, Annotations: {len(current_block_word_annotations)}"
400 |             )
401 | 
402 |             if reconstructed_text or current_block_word_annotations:
403 |                 reconstructed_blocks.append(reconstructed_text)
404 |                 all_word_annotations.extend(current_block_word_annotations)
405 |             else:
406 |                 parse_log.debug(f"Segment #{i} resulted in no text or annotations.")
407 | 
408 |     except Exception as e:
409 |         parse_log.error(
410 |             f"Critical error during adaptive text extraction: {e}", exc_info=True
411 |         )
412 |         return language, reconstructed_blocks, all_word_annotations
413 | 
414 |     total_parse_time = time.perf_counter() - start_time
415 |     parse_log.info(
416 |         f"Adaptive parsing finished in {total_parse_time:.4f} sec. Language: '{language}'. Text blocks: {len(reconstructed_blocks)}. Word annotations: {len(all_word_annotations)}."
417 |     )
418 |     return language, reconstructed_blocks, all_word_annotations
419 | 
420 | 
421 | async def scan_image(image_path):
422 |     """Scans image via Google Lens, extracts text, language, and coordinates."""
423 |     scan_log.info(f"Starting image scan process for: {image_path}")
424 |     total_scan_start_time = time.perf_counter()
425 | 
426 |     read_start = time.perf_counter()
427 |     image_data = await read_image_data(image_path)
428 |     read_end = time.perf_counter()
429 |     scan_log.info(f"Image read finished in {read_end - read_start:.4f} sec.")
430 |     if not image_data:
431 |         return None, "Failed to read image data"
432 | 
433 |     filename = os.path.basename(image_path)
434 |     _, ext = os.path.splitext(filename.lower())
435 |     content_type = "image/jpeg"
436 |     if ext == ".png":
437 |         content_type = "image/png"
438 |     elif ext == ".webp":
439 |         content_type = "image/webp"
440 |     elif ext == ".gif":
441 |         content_type = "image/gif"
442 |     scan_log.debug(f"Determined filename: '{filename}', content type: {content_type}")
443 | 
444 |     files = {"encoded_image": (filename, image_data, content_type)}
445 |     params_upload = {
446 |         "hl": "ru",
447 |         "re": "av",
448 |         "vpw": "1903",
449 |         "vph": "953",
450 |         "ep": "gsbubb",
451 |         "st": str(int(time.time() * 1000)),
452 |     }
453 | 
454 |     cookie_load_start = time.perf_counter()
455 |     loaded_cookies = await load_cookies(COOKIE_FILE)
456 |     cookie_load_end = time.perf_counter()
457 |     scan_log.info(
458 |         f"Cookie loading finished in {cookie_load_end - cookie_load_start:.4f} sec."
459 |     )
460 | 
461 |     limits = httpx.Limits(max_keepalive_connections=5, max_connections=10)
462 |     timeout = httpx.Timeout(60.0, connect=15.0)
463 |     http_log.debug(f"Configuring httpx client: timeout={timeout}, limits={limits}")
464 | 
465 |     async with httpx.AsyncClient(
466 |         cookies=loaded_cookies,
467 |         follow_redirects=True,
468 |         timeout=timeout,
469 |         limits=limits,
470 |         http2=True,
471 |         verify=True,
472 |     ) as client:
473 |         try:
474 |             http_log.info(f"POST request initiated to {LENS_UPLOAD_ENDPOINT}")
475 |             upload_start_time = time.perf_counter()
476 |             response_upload = await client.post(
477 |                 LENS_UPLOAD_ENDPOINT, headers=HEADERS, files=files, params=params_upload
478 |             )
479 |             upload_end_time = time.perf_counter()
480 |             http_log.info(
481 |                 f"POST request to {LENS_UPLOAD_ENDPOINT} finished in {upload_end_time - upload_start_time:.4f} sec. "
482 |                 f"Status: {response_upload.status_code}. Final URL: {response_upload.url}"
483 |             )
484 | 
485 |             cookie_save_start = time.perf_counter()
486 |             await save_cookies(client.cookies, COOKIE_FILE)
487 |             cookie_save_end = time.perf_counter()
488 |             http_log.debug(
489 |                 f"Cookies saved after upload in {cookie_save_end - cookie_save_start:.4f} sec."
490 |             )
491 | 
492 |             response_upload.raise_for_status()
493 | 
494 |             final_url = str(response_upload.url)
495 | 
496 |             extract_start = time.perf_counter()
497 |             vsrid, lsessionid = extract_ids_from_url(final_url)
498 |             extract_end = time.perf_counter()
499 |             scan_log.info(
500 |                 f"ID extraction finished in {extract_end - extract_start:.4f} sec."
501 |             )
502 |             if not vsrid or not lsessionid:
503 |                 scan_log.error(
504 |                     "Failed to extract vsrid or lsessionid from upload redirect URL."
505 |                 )
506 |                 return None, f"Failed to get session IDs from URL: {final_url}"
507 | 
508 |             scan_log.info("Waiting for 1 second before metadata request...")
509 |             await asyncio.sleep(1)
510 |             scan_log.info("Wait finished. Proceeding with metadata request.")
511 | 
512 |             metadata_params = {
513 |                 "vsrid": vsrid,
514 |                 "lsessionid": lsessionid,
515 |             }
516 |             metadata_headers = HEADERS.copy()
517 |             metadata_headers.update(
518 |                 {
519 |                     "Accept": "*/*",
520 |                     "Referer": final_url,
521 |                     "Sec-Fetch-Site": "same-origin",
522 |                     "Sec-Fetch-Mode": "cors",
523 |                     "Sec-Fetch-Dest": "empty",
524 |                     "Priority": "u=1, i",
525 |                 }
526 |             )
527 |             metadata_headers.pop("Upgrade-Insecure-Requests", None)
528 |             metadata_headers.pop("Sec-Fetch-User", None)
529 |             metadata_headers.pop("Cache-Control", None)
530 |             metadata_headers.pop("Origin", None)
531 | 
532 |             metadata_url_obj = httpx.URL(LENS_METADATA_ENDPOINT, params=metadata_params)
533 |             metadata_url_str = str(metadata_url_obj)
534 |             http_log.info(
535 |                 f"GET request initiated to {LENS_METADATA_ENDPOINT} (URL: {metadata_url_str})"
536 |             )
537 |             http_log.debug(f"Metadata request headers: {metadata_headers}")
538 |             metadata_start_time = time.perf_counter()
539 | 
540 |             response_metadata = await client.get(
541 |                 metadata_url_obj, headers=metadata_headers
542 |             )
543 |             metadata_end_time = time.perf_counter()
544 |             http_log.info(
545 |                 f"GET request to {LENS_METADATA_ENDPOINT} finished in {metadata_end_time - metadata_start_time:.4f} sec. "
546 |                 f"Status: {response_metadata.status_code}"
547 |             )
548 | 
549 |             cookie_save_start = time.perf_counter()
550 |             await save_cookies(client.cookies, COOKIE_FILE)
551 |             cookie_save_end = time.perf_counter()
552 |             http_log.debug(
553 |                 f"Cookies saved after metadata fetch in {cookie_save_end - cookie_save_start:.4f} sec."
554 |             )
555 | 
556 |             response_metadata.raise_for_status()
557 | 
558 |             parse_log.info("Starting metadata response processing.")
559 |             process_start_time = time.perf_counter()
560 | 
561 |             response_text = response_metadata.text
562 |             original_len = len(response_text)
563 |             if response_text.startswith(")]}'\n"):
564 |                 response_text = response_text[5:]
565 |                 parse_log.debug("Removed ')]}'\\n prefix")
566 |             elif response_text.startswith(")]}'"):
567 |                 response_text = response_text[4:]
568 |                 parse_log.debug("Removed ')]}' prefix")
569 |             stripped_len = len(response_text)
570 |             parse_log.debug(f"Response text length: {original_len} -> {stripped_len}")
571 | 
572 |             try:
573 |                 json_parse_start = time.perf_counter()
574 |                 metadata_json = json_loader(response_text)
575 |                 json_parse_end = time.perf_counter()
576 |                 parse_log.info(
577 |                     f"JSON parsing finished in {json_parse_end - json_parse_start:.4f} sec."
578 |                 )
579 | 
580 |                 # extract_start_time = time.perf_counter()
581 |                 language, reconstructed_blocks, all_word_annotations = (
582 |                     adaptive_parse_text_and_language(metadata_json)
583 |                 )
584 |                 # extract_end_time = time.perf_counter()
585 | 
586 |                 full_text = "\n".join(reconstructed_blocks)
587 | 
588 |                 result_data = {
589 |                     "text": full_text,
590 |                     "language": language if language else "und",
591 |                     "text_with_coordinates": json.dumps(
592 |                         all_word_annotations, ensure_ascii=False, indent=None
593 |                     ),
594 |                 }
595 |                 process_end_time = time.perf_counter()
596 |                 parse_log.info(
597 |                     f"Total metadata processing (strip + JSON parse + adaptive extract) finished in {process_end_time - process_start_time:.4f} sec."
598 |                 )
599 | 
600 |                 total_scan_end_time = time.perf_counter()
601 |                 scan_log.info(
602 |                     f"Image scan process completed successfully in {total_scan_end_time - total_scan_start_time:.4f} sec."
603 |                 )
604 |                 return result_data, metadata_json
605 | 
606 |             except Exception as e_parse:
607 |                 parse_log.error(
608 |                     f"Error parsing JSON or extracting text: {e_parse}", exc_info=True
609 |                 )
610 |                 log_snippet = (
611 |                     response_text[:500] + "..."
612 |                     if len(response_text) > 500
613 |                     else response_text
614 |                 )
615 |                 parse_log.error(f"Problematic text snippet (start): {log_snippet}")
616 |                 total_scan_end_time = time.perf_counter()
617 |                 scan_log.error(
618 |                     f"Image scan process failed during parsing/extraction after {total_scan_end_time - total_scan_start_time:.4f} sec."
619 |                 )
620 |                 return None, response_metadata.text
621 | 
622 |         except httpx.HTTPStatusError as e:
623 |             http_log.error(
624 |                 f"HTTP error: {e.response.status_code} for URL {e.request.url}",
625 |                 exc_info=True,
626 |             )
627 |             try:
628 |                 body_snippet = (
629 |                     e.response.text[:500] + "..."
630 |                     if len(e.response.text) > 500
631 |                     else e.response.text
632 |                 )
633 |                 http_log.error(f"Response body snippet: {body_snippet}")
634 |             except Exception:
635 |                 http_log.error("Could not read response body.")
636 |             total_scan_end_time = time.perf_counter()
637 |             scan_log.error(
638 |                 f"Image scan process failed due to HTTP error after {total_scan_end_time - total_scan_start_time:.4f} sec."
639 |             )
640 |             return None, f"HTTP Error {e.response.status_code}: {e.request.url}"
641 |         except httpx.RequestError as e:
642 |             http_log.error(f"Request error for URL {e.request.url}: {e}", exc_info=True)
643 |             total_scan_end_time = time.perf_counter()
644 |             scan_log.error(
645 |                 f"Image scan process failed due to request error after {total_scan_end_time - total_scan_start_time:.4f} sec."
646 |             )
647 |             return None, f"Request Error: {e}"
648 |         except Exception as e:
649 |             scan_log.error(f"Unexpected error in scan_image: {e}", exc_info=True)
650 |             total_scan_end_time = time.perf_counter()
651 |             scan_log.error(
652 |                 f"Image scan process failed unexpectedly after {total_scan_end_time - total_scan_start_time:.4f} sec."
653 |             )
654 |             return None, f"Unexpected Error: {e}"
655 | 
656 | 
657 | async def main():
658 |     if len(sys.argv) < 2:
659 |         print(f"Usage: python {sys.argv[0]} <image_path>")
660 |         sys.exit(1)
661 | 
662 |     image_path = sys.argv[1]
663 |     if not os.path.isfile(image_path):
664 |         main_log.error(f"Error: File not found: {image_path}")
665 |         sys.exit(1)
666 | 
667 |     main_log.info("========================================")
668 |     main_log.info(f"Starting Google Lens scan for: {image_path}")
669 |     main_log.info(
670 |         f"Using log level: {logging.getLevelName(logging.getLogger().getEffectiveLevel())}"
671 |     )
672 |     main_log.info("========================================")
673 |     start_total_time = time.perf_counter()
674 | 
675 |     result_dict, raw_data_or_error = await scan_image(image_path)
676 | 
677 |     end_total_time = time.perf_counter()
678 |     main_log.info(
679 |         f"--- Total execution time for scan_image call: {end_total_time - start_total_time:.4f} sec. ---"
680 |     )
681 | 
682 |     if result_dict:
683 |         print("\n--- Google Lens Scan Result ---")
684 |         try:
685 |             print(f"Language: {result_dict.get('language', 'N/A')}")
686 |             print("\nText:")
687 |             print(result_dict.get("text", "N/A"))
688 |             print("\nText with Coordinates (JSON String):")
689 |             coords_json_str = result_dict.get("text_with_coordinates", "[]")
690 |             try:
691 |                 coords_data = json.loads(coords_json_str)
692 |                 print(json.dumps(coords_data, indent=2, ensure_ascii=False))
693 |             except json.JSONDecodeError:
694 |                 print(coords_json_str)
695 |             print("------------------------------")
696 |             main_log.info("Scan successful. Results printed.")
697 |         except Exception as e:
698 |             main_log.error(f"Error printing results: {e}")
699 |             print("\n--- Raw Result Dictionary ---")
700 |             print(result_dict)
701 |     else:
702 |         print("\nGoogle Lens scan failed.")
703 |         main_log.error(
704 |             f"Scan failed. See previous logs for details. Error context/data: {raw_data_or_error}"
705 |         )
706 | 
707 | 
708 | if __name__ == "__main__":
709 |     if sys.platform == "win32" and sys.version_info >= (3, 8):
710 |         asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
711 |     elif sys.platform == "win32":
712 |         pass
713 | 
714 |     asyncio.run(main())
715 | 


--------------------------------------------------------------------------------
/src/chrome_lens_py/utils/lens_betterproto.py:
--------------------------------------------------------------------------------
   1 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
   2 | # sources: lens_overlay_client_context.proto, lens_overlay_client_logs.proto, lens_overlay_client_platform.proto, lens_overlay_cluster_info.proto, lens_overlay_deep_gleam_data.proto, lens_overlay_document.proto, lens_overlay_filters.proto, lens_overlay_geometry.proto, lens_overlay_image_crop.proto, lens_overlay_image_data.proto, lens_overlay_interaction_request_metadata.proto, lens_overlay_knowledge_intent_query.proto, lens_overlay_knowledge_query.proto, lens_overlay_math_solver_query.proto, lens_overlay_message_set.proto, lens_overlay_overlay_object.proto, lens_overlay_payload.proto, lens_overlay_phase_latencies_metadata.proto, lens_overlay_platform.proto, lens_overlay_polygon.proto, lens_overlay_request_id.proto, lens_overlay_routing_info.proto, lens_overlay_selection_type.proto, lens_overlay_server.proto, lens_overlay_service_deps.proto, lens_overlay_stickiness_signals.proto, lens_overlay_surface.proto, lens_overlay_text.proto, lens_overlay_text_query.proto, lens_overlay_translate_stickiness_signals.proto, lens_overlay_video_context_input_params.proto, lens_overlay_video_params.proto, lens_overlay_visual_search_interaction_data.proto, lens_overlay_visual_search_interaction_log_data.proto
   3 | # plugin: python-betterproto
   4 | # This file has been @generated
   5 | 
   6 | from dataclasses import dataclass
   7 | from typing import List, Optional
   8 | 
   9 | import betterproto
  10 | 
  11 | 
  12 | class LensOverlayFilterType(betterproto.Enum):
  13 |     """Supported filter types."""
  14 | 
  15 |     UNKNOWN_FILTER_TYPE = 0
  16 |     TRANSLATE = 2
  17 |     AUTO_FILTER = 7
  18 | 
  19 | 
  20 | class Platform(betterproto.Enum):
  21 |     UNSPECIFIED = 0
  22 |     WEB = 3
  23 | 
  24 | 
  25 | class Surface(betterproto.Enum):
  26 |     UNSPECIFIED = 0
  27 |     CHROMIUM = 4
  28 | 
  29 | 
  30 | class LensRenderingEnvironment(betterproto.Enum):
  31 |     """The possible rendering environments."""
  32 | 
  33 |     RENDERING_ENV_UNSPECIFIED = 0
  34 |     RENDERING_ENV_LENS_OVERLAY = 14
  35 | 
  36 | 
  37 | class LensOverlayPhaseLatenciesMetadataImageType(betterproto.Enum):
  38 |     UNKNOWN = 0
  39 |     JPEG = 1
  40 |     PNG = 2
  41 |     WEBP = 3
  42 | 
  43 | 
  44 | class LensOverlayClientLogsLensOverlayEntryPoint(betterproto.Enum):
  45 |     UNKNOWN_ENTRY_POINT = 0
  46 |     APP_MENU = 1
  47 |     PAGE_CONTEXT_MENU = 2
  48 |     IMAGE_CONTEXT_MENU = 3
  49 |     OMNIBOX_BUTTON = 4
  50 |     TOOLBAR_BUTTON = 5
  51 |     FIND_IN_PAGE = 6
  52 | 
  53 | 
  54 | class ClientPlatform(betterproto.Enum):
  55 |     UNSPECIFIED = 0
  56 |     LENS_OVERLAY = 2
  57 | 
  58 | 
  59 | class CoordinateType(betterproto.Enum):
  60 |     """Specifies the coordinate system used for geometry protos."""
  61 | 
  62 |     UNSPECIFIED = 0
  63 |     """Unspecified default value, per proto best practice."""
  64 | 
  65 |     NORMALIZED = 1
  66 |     """Normalized coordinates."""
  67 | 
  68 |     IMAGE = 2
  69 |     """Image pixel coordinates."""
  70 | 
  71 | 
  72 | class PolygonVertexOrdering(betterproto.Enum):
  73 |     """Specifies the vertex ordering."""
  74 | 
  75 |     VERTEX_ORDERING_UNSPECIFIED = 0
  76 |     CLOCKWISE = 1
  77 |     COUNTER_CLOCKWISE = 2
  78 | 
  79 | 
  80 | class WritingDirection(betterproto.Enum):
  81 |     """The text reading order."""
  82 | 
  83 |     LEFT_TO_RIGHT = 0
  84 |     RIGHT_TO_LEFT = 1
  85 |     TOP_TO_BOTTOM = 2
  86 | 
  87 | 
  88 | class Alignment(betterproto.Enum):
  89 |     """The text alignment."""
  90 | 
  91 |     DEFAULT_LEFT_ALIGNED = 0
  92 |     RIGHT_ALIGNED = 1
  93 |     CENTER_ALIGNED = 2
  94 | 
  95 | 
  96 | class TextLayoutWordType(betterproto.Enum):
  97 |     TEXT = 0
  98 |     """Printed text."""
  99 | 
 100 |     FORMULA = 1
 101 |     """Formula type, including mathematical or chemical formulas."""
 102 | 
 103 | 
 104 | class TranslationDataStatusCode(betterproto.Enum):
 105 |     UNKNOWN = 0
 106 |     SUCCESS = 1
 107 |     SERVER_ERROR = 2
 108 |     UNSUPPORTED_LANGUAGE_PAIR = 3
 109 |     SAME_LANGUAGE = 4
 110 |     UNKNOWN_SOURCE_LANGUAGE = 5
 111 |     INVALID_REQUEST = 6
 112 |     DEADLINE_EXCEEDED = 7
 113 |     EMPTY_TRANSLATION = 8
 114 |     NO_OP_TRANSLATION = 9
 115 | 
 116 | 
 117 | class TranslationDataBackgroundImageDataFileFormat(betterproto.Enum):
 118 |     """File format of the bytes in background_image."""
 119 | 
 120 |     UNKNOWN = 0
 121 |     RAW_BYTES_RGBA = 1
 122 |     PNG_RGBA = 2
 123 |     WEBP_RGBA = 3
 124 |     JPEG_RGB_PNG_MASK = 4
 125 | 
 126 | 
 127 | class LensOverlayInteractionRequestMetadataType(betterproto.Enum):
 128 |     UNKNOWN = 0
 129 |     TAP = 1
 130 |     """User's tap on the screen."""
 131 | 
 132 |     REGION = 2
 133 |     """User's region selection on the screenshot."""
 134 | 
 135 |     TEXT_SELECTION = 3
 136 |     """User's text selection on the screenshot."""
 137 | 
 138 |     REGION_SEARCH = 4
 139 |     """User selected a bounding box to region search."""
 140 | 
 141 |     OBJECT_FULFILLMENT = 5
 142 |     """Requests selection and fulfillment of a specific object."""
 143 | 
 144 |     CONTEXTUAL_SEARCH_QUERY = 9
 145 |     """User sent a query in the contextual search box."""
 146 | 
 147 |     PDF_QUERY = 10
 148 |     """User sent a query about a pdf."""
 149 | 
 150 |     WEBPAGE_QUERY = 11
 151 |     """User sent a query about a website."""
 152 | 
 153 | 
 154 | class OverlayObjectRenderingMetadataRenderType(betterproto.Enum):
 155 |     DEFAULT = 0
 156 |     GLEAM = 1
 157 | 
 158 | 
 159 | class LensOverlaySelectionType(betterproto.Enum):
 160 |     """Possible selection types for Lens overlay."""
 161 | 
 162 |     UNKNOWN_SELECTION_TYPE = 0
 163 |     TAP_ON_EMPTY = 1
 164 |     SELECT_TEXT_HIGHLIGHT = 3
 165 |     REGION_SEARCH = 7
 166 |     INJECTED_IMAGE = 10
 167 |     TAP_ON_REGION_GLEAM = 15
 168 |     MULTIMODAL_SEARCH = 18
 169 |     SELECT_TRANSLATED_TEXT = 21
 170 |     TAP_ON_OBJECT = 22
 171 |     MULTIMODAL_SUGGEST_TYPEAHEAD = 25
 172 |     MULTIMODAL_SUGGEST_ZERO_PREFIX = 26
 173 |     TRANSLATE_CHIP = 52
 174 |     SYMBOLIC_MATH_OBJECT = 53
 175 | 
 176 | 
 177 | class PayloadRequestType(betterproto.Enum):
 178 |     """The type of the request the payload is sent in."""
 179 | 
 180 |     REQUEST_TYPE_DEFAULT = 0
 181 |     """Unset Request type."""
 182 | 
 183 |     REQUEST_TYPE_PDF = 1
 184 |     """Request is for PDF."""
 185 | 
 186 |     REQUEST_TYPE_EARLY_PARTIAL_PDF = 3
 187 |     """Request is for partial PDF upload."""
 188 | 
 189 |     REQUEST_TYPE_WEBPAGE = 2
 190 |     """Request is for webpage."""
 191 | 
 192 | 
 193 | class PayloadCompressionType(betterproto.Enum):
 194 |     """Possible compression types for content_data."""
 195 | 
 196 |     UNCOMPRESSED = 0
 197 |     """Default value. File is not compressed."""
 198 | 
 199 |     ZSTD = 1
 200 |     """ZSTD compression."""
 201 | 
 202 | 
 203 | class LensOverlayServerErrorErrorType(betterproto.Enum):
 204 |     UNKNOWN_TYPE = 0
 205 |     MISSING_REQUEST = 1
 206 | 
 207 | 
 208 | class StickinessSignalsNamespace(betterproto.Enum):
 209 |     UNKNOWN = 0
 210 |     TRANSLATE_LITE = 56
 211 |     EDUCATION_INPUT = 79
 212 | 
 213 | 
 214 | @dataclass(eq=False, repr=False)
 215 | class AppliedFilter(betterproto.Message):
 216 |     """Supported filter types."""
 217 | 
 218 |     filter_type: "LensOverlayFilterType" = betterproto.enum_field(1)
 219 |     translate: "AppliedFilterTranslate" = betterproto.message_field(
 220 |         3, group="filter_payload"
 221 |     )
 222 | 
 223 | 
 224 | @dataclass(eq=False, repr=False)
 225 | class AppliedFilterTranslate(betterproto.Message):
 226 |     target_language: str = betterproto.string_field(1)
 227 |     source_language: str = betterproto.string_field(2)
 228 | 
 229 | 
 230 | @dataclass(eq=False, repr=False)
 231 | class AppliedFilters(betterproto.Message):
 232 |     """Supported filter types."""
 233 | 
 234 |     filter: List["AppliedFilter"] = betterproto.message_field(1)
 235 | 
 236 | 
 237 | @dataclass(eq=False, repr=False)
 238 | class LensOverlayClientContext(betterproto.Message):
 239 |     """Context information of the client sending the request."""
 240 | 
 241 |     platform: "Platform" = betterproto.enum_field(1)
 242 |     """Required. Client platform."""
 243 | 
 244 |     surface: "Surface" = betterproto.enum_field(2)
 245 |     """Optional. Client surface."""
 246 | 
 247 |     locale_context: "LocaleContext" = betterproto.message_field(4)
 248 |     """Required. Locale specific context."""
 249 | 
 250 |     app_id: str = betterproto.string_field(6)
 251 |     """
 252 |     Required. Name of the package which sends the request to Lens Frontend.
 253 |     """
 254 | 
 255 |     client_filters: "AppliedFilters" = betterproto.message_field(17)
 256 |     """Filters that are enabled on the client side."""
 257 | 
 258 |     rendering_context: "RenderingContext" = betterproto.message_field(20)
 259 |     """The rendering context info."""
 260 | 
 261 |     client_logging_data: "ClientLoggingData" = betterproto.message_field(23)
 262 |     """Logging data."""
 263 | 
 264 | 
 265 | @dataclass(eq=False, repr=False)
 266 | class LocaleContext(betterproto.Message):
 267 |     """Describes locale context."""
 268 | 
 269 |     language: str = betterproto.string_field(1)
 270 |     """The BCP 47 language tag used to identify the language of the client."""
 271 | 
 272 |     region: str = betterproto.string_field(2)
 273 |     """The CLDR region tag used to identify the region of the client."""
 274 | 
 275 |     time_zone: str = betterproto.string_field(3)
 276 |     """The CLDR time zone ID used to identify the timezone of the client."""
 277 | 
 278 | 
 279 | @dataclass(eq=False, repr=False)
 280 | class RenderingContext(betterproto.Message):
 281 |     rendering_environment: "LensRenderingEnvironment" = betterproto.enum_field(2)
 282 |     """The rendering environment."""
 283 | 
 284 | 
 285 | @dataclass(eq=False, repr=False)
 286 | class ClientLoggingData(betterproto.Message):
 287 |     """Contains data that can be used for logging purposes."""
 288 | 
 289 |     is_history_eligible: bool = betterproto.bool_field(1)
 290 |     """Whether history is enabled."""
 291 | 
 292 | 
 293 | @dataclass(eq=False, repr=False)
 294 | class LensOverlayPhaseLatenciesMetadata(betterproto.Message):
 295 |     """Phase latency metadata for the Lens Overlay."""
 296 | 
 297 |     phase: List["LensOverlayPhaseLatenciesMetadataPhase"] = betterproto.message_field(1)
 298 | 
 299 | 
 300 | @dataclass(eq=False, repr=False)
 301 | class LensOverlayPhaseLatenciesMetadataPhase(betterproto.Message):
 302 |     """
 303 |     Represents a single point in time during the image preprocessing flow.
 304 |     """
 305 | 
 306 |     image_downscale_data: "LensOverlayPhaseLatenciesMetadataPhaseImageDownscaleData" = (
 307 |         betterproto.message_field(3, group="phase_data")
 308 |     )
 309 |     """Data specifically only relevant for IMAGE_DOWNSCALE_END PhaseType."""
 310 | 
 311 |     image_encode_data: "LensOverlayPhaseLatenciesMetadataPhaseImageEncodeData" = (
 312 |         betterproto.message_field(4, group="phase_data")
 313 |     )
 314 |     """Data specifically only relevant for IMAGE_ENCODE_END PhaseType."""
 315 | 
 316 | 
 317 | @dataclass(eq=False, repr=False)
 318 | class LensOverlayPhaseLatenciesMetadataPhaseImageDownscaleData(betterproto.Message):
 319 |     original_image_size: int = betterproto.int64_field(1)
 320 |     """The size of the original image, in pixels."""
 321 | 
 322 |     downscaled_image_size: int = betterproto.int64_field(2)
 323 |     """The size of the downscaled image, in pixels."""
 324 | 
 325 | 
 326 | @dataclass(eq=False, repr=False)
 327 | class LensOverlayPhaseLatenciesMetadataPhaseImageEncodeData(betterproto.Message):
 328 |     original_image_type: "LensOverlayPhaseLatenciesMetadataImageType" = (
 329 |         betterproto.enum_field(1)
 330 |     )
 331 |     """
 332 |     The type of the original Image. This only applies to IMAGE_ENCODE_END
 333 |      PhaseTypes
 334 |     """
 335 | 
 336 |     encoded_image_size_bytes: int = betterproto.int64_field(2)
 337 |     """The bytes size of the encoded image."""
 338 | 
 339 | 
 340 | @dataclass(eq=False, repr=False)
 341 | class LensOverlayClientLogs(betterproto.Message):
 342 |     phase_latencies_metadata: "LensOverlayPhaseLatenciesMetadata" = (
 343 |         betterproto.message_field(1)
 344 |     )
 345 |     """
 346 |     The phase latency metadata for any image preprocessing required for the
 347 |      request.
 348 |     """
 349 | 
 350 |     lens_overlay_entry_point: "LensOverlayClientLogsLensOverlayEntryPoint" = (
 351 |         betterproto.enum_field(2)
 352 |     )
 353 |     """The Lens Overlay entry point used to access lens."""
 354 | 
 355 |     paella_id: int = betterproto.uint64_field(3)
 356 |     """
 357 |     A unique identifier for associating events logged by lens asynchronously.
 358 |     """
 359 | 
 360 |     metrics_collection_disabled: bool = betterproto.bool_field(5)
 361 |     """Whether the user has disabled metrics collection."""
 362 | 
 363 | 
 364 | @dataclass(eq=False, repr=False)
 365 | class LensOverlayRoutingInfo(betterproto.Message):
 366 |     """Information about where to route the request."""
 367 | 
 368 |     server_address: str = betterproto.string_field(1)
 369 |     """Address to route the request to."""
 370 | 
 371 |     cell_address: str = betterproto.string_field(3)
 372 |     """Cell to route the request to."""
 373 | 
 374 |     blade_target: str = betterproto.string_field(2)
 375 |     """Blade target to route the request to."""
 376 | 
 377 | 
 378 | @dataclass(eq=False, repr=False)
 379 | class LensOverlayClusterInfo(betterproto.Message):
 380 |     """The cluster info for a Lens Overlay session."""
 381 | 
 382 |     server_session_id: str = betterproto.string_field(1)
 383 |     """ID for subsequent server requests."""
 384 | 
 385 |     search_session_id: str = betterproto.string_field(2)
 386 |     """ID for subsequent search requests."""
 387 | 
 388 |     routing_info: "LensOverlayRoutingInfo" = betterproto.message_field(6)
 389 |     """Info used for routing subsequent requests."""
 390 | 
 391 | 
 392 | @dataclass(eq=False, repr=False)
 393 | class Polygon(betterproto.Message):
 394 |     """Information about a polygon."""
 395 | 
 396 |     vertex: List["PolygonVertex"] = betterproto.message_field(1)
 397 |     vertex_ordering: "PolygonVertexOrdering" = betterproto.enum_field(2)
 398 |     coordinate_type: "CoordinateType" = betterproto.enum_field(3)
 399 |     """Specifies the coordinate type of vertices."""
 400 | 
 401 | 
 402 | @dataclass(eq=False, repr=False)
 403 | class PolygonVertex(betterproto.Message):
 404 |     """Represents a single vertex in the polygon."""
 405 | 
 406 |     x: float = betterproto.float_field(1)
 407 |     y: float = betterproto.float_field(2)
 408 | 
 409 | 
 410 | @dataclass(eq=False, repr=False)
 411 | class CenterRotatedBox(betterproto.Message):
 412 |     """Information about a center bounding box rotated around its center."""
 413 | 
 414 |     center_x: float = betterproto.float_field(1)
 415 |     center_y: float = betterproto.float_field(2)
 416 |     width: float = betterproto.float_field(3)
 417 |     height: float = betterproto.float_field(4)
 418 |     rotation_z: float = betterproto.float_field(5)
 419 |     """
 420 |     Clockwise rotation around the center in radians. The rotation angle is
 421 |      computed before normalizing the coordinates.
 422 |     """
 423 | 
 424 |     coordinate_type: "CoordinateType" = betterproto.enum_field(6)
 425 |     """
 426 |     Specifies the coordinate type of center and size.
 427 |      @note default is COORDINATE_TYPE_UNSPECIFIED, please initialize this value
 428 |      to NORMALIZED or IMAGE for Lens detection API usage.
 429 |     """
 430 | 
 431 | 
 432 | @dataclass(eq=False, repr=False)
 433 | class Geometry(betterproto.Message):
 434 |     """Geometric shape(s) used for tracking and detection."""
 435 | 
 436 |     bounding_box: "CenterRotatedBox" = betterproto.message_field(1)
 437 |     """Specifies the bounding box for this geometry."""
 438 | 
 439 |     segmentation_polygon: List["Polygon"] = betterproto.message_field(5)
 440 |     """
 441 |     Specifies the segmentation polygon. The vertices of the outer-boundaries
 442 |      are in clockwise, and the ones of inner-boundaries are in counter-clockwise
 443 |      ordering.
 444 |     """
 445 | 
 446 | 
 447 | @dataclass(eq=False, repr=False)
 448 | class ZoomedCrop(betterproto.Message):
 449 |     """
 450 |     A cropped and potentially re-scaled image region, rectangular subregion of a
 451 |      canonical image.
 452 |     """
 453 | 
 454 |     crop: "CenterRotatedBox" = betterproto.message_field(1)
 455 |     """The cropped region of the parent image in parent coordinates."""
 456 | 
 457 |     parent_width: int = betterproto.int32_field(2)
 458 |     """Width of the parent image."""
 459 | 
 460 |     parent_height: int = betterproto.int32_field(3)
 461 |     """Height of the parent image."""
 462 | 
 463 |     zoom: float = betterproto.float_field(4)
 464 |     """
 465 |     The ratio of the pixel dimensions of the child image to the pixel
 466 |      dimensions of the 'crop' in parent coordinates.
 467 |     """
 468 | 
 469 | 
 470 | @dataclass(eq=False, repr=False)
 471 | class Text(betterproto.Message):
 472 |     text_layout: "TextLayout" = betterproto.message_field(1)
 473 |     """Optional. Information describing the text."""
 474 | 
 475 |     content_language: str = betterproto.string_field(2)
 476 |     """
 477 |     Optional. Dominant content language of the text. Language
 478 |      code is CLDR/BCP-47.
 479 |     """
 480 | 
 481 | 
 482 | @dataclass(eq=False, repr=False)
 483 | class TextLayout(betterproto.Message):
 484 |     """Nested text structure."""
 485 | 
 486 |     paragraphs: List["TextLayoutParagraph"] = betterproto.message_field(1)
 487 |     """Optional. List of paragraphs in natural reading order."""
 488 | 
 489 | 
 490 | @dataclass(eq=False, repr=False)
 491 | class TextLayoutWord(betterproto.Message):
 492 |     id: "TextEntityIdentifier" = betterproto.message_field(1)
 493 |     """Required. Unique id within TextLayout."""
 494 | 
 495 |     plain_text: str = betterproto.string_field(2)
 496 |     """Optional. The text in a plain text."""
 497 | 
 498 |     text_separator: Optional[str] = betterproto.string_field(3, optional=True)
 499 |     """
 500 |     Optional. The text separator that should be appended after this word when
 501 |      it is concatenated with the subsequent word in the same or next
 502 |      line/paragraph into a single-line string. This is specified as optional
 503 |      because there is a distinction between the absence of a separator and
 504 |      the empty string as a separator.
 505 |     """
 506 | 
 507 |     geometry: "Geometry" = betterproto.message_field(4)
 508 |     """Optional. The geometry of the word."""
 509 | 
 510 |     type: "TextLayoutWordType" = betterproto.enum_field(5)
 511 |     """Optional. The type of this word."""
 512 | 
 513 |     formula_metadata: "TextLayoutWordFormulaMetadata" = betterproto.message_field(6)
 514 |     """
 515 |     Optional. Metadata for formulas. This is populated for entities of
 516 |      `type=FORMULA`.
 517 |     """
 518 | 
 519 | 
 520 | @dataclass(eq=False, repr=False)
 521 | class TextLayoutWordFormulaMetadata(betterproto.Message):
 522 |     latex: str = betterproto.string_field(1)
 523 |     """
 524 |     Optional. LaTeX representation of a formula. Can be the same as
 525 |      `plain_text`. Example: "\frac{2}{x}=y". The plain text
 526 |      representation of this is available in Word.plain_text.
 527 |     """
 528 | 
 529 | 
 530 | @dataclass(eq=False, repr=False)
 531 | class TextLayoutLine(betterproto.Message):
 532 |     words: List["TextLayoutWord"] = betterproto.message_field(1)
 533 |     """Optional. List of words in natural reading order."""
 534 | 
 535 |     geometry: "Geometry" = betterproto.message_field(2)
 536 |     """Optional. The geometry of the line."""
 537 | 
 538 | 
 539 | @dataclass(eq=False, repr=False)
 540 | class TextLayoutParagraph(betterproto.Message):
 541 |     id: "TextEntityIdentifier" = betterproto.message_field(1)
 542 |     """Required. Unique id within TextLayout."""
 543 | 
 544 |     lines: List["TextLayoutLine"] = betterproto.message_field(2)
 545 |     """
 546 |     Optional. List of lines in natural reading order (see also
 547 |      `writing_direction`).
 548 |     """
 549 | 
 550 |     geometry: "Geometry" = betterproto.message_field(3)
 551 |     """Optional. Geometry of the paragraph."""
 552 | 
 553 |     writing_direction: "WritingDirection" = betterproto.enum_field(4)
 554 |     """Optional. The text writing direction (aka reading order)."""
 555 | 
 556 |     content_language: str = betterproto.string_field(5)
 557 |     """
 558 |     Optional. BCP-47 language code of the dominant language in this
 559 |      paragraph.
 560 |     """
 561 | 
 562 | 
 563 | @dataclass(eq=False, repr=False)
 564 | class TextEntityIdentifier(betterproto.Message):
 565 |     id: int = betterproto.int64_field(1)
 566 |     """
 567 |     Required. Unique entity id used to reference (and match) text entities and
 568 |      ranges.
 569 |     """
 570 | 
 571 | 
 572 | @dataclass(eq=False, repr=False)
 573 | class DeepGleamData(betterproto.Message):
 574 |     translation: "TranslationData" = betterproto.message_field(
 575 |         10, group="rendering_oneof"
 576 |     )
 577 |     visual_object_id: List[str] = betterproto.string_field(11)
 578 | 
 579 | 
 580 | @dataclass(eq=False, repr=False)
 581 | class TranslationData(betterproto.Message):
 582 |     status: "TranslationDataStatus" = betterproto.message_field(1)
 583 |     target_language: str = betterproto.string_field(2)
 584 |     source_language: str = betterproto.string_field(3)
 585 |     translation: str = betterproto.string_field(4)
 586 |     """The translated text."""
 587 | 
 588 |     line: List["TranslationDataLine"] = betterproto.message_field(5)
 589 |     writing_direction: "WritingDirection" = betterproto.enum_field(7)
 590 |     """The original writing direction of the source text."""
 591 | 
 592 |     alignment: "Alignment" = betterproto.enum_field(8)
 593 |     justified: bool = betterproto.bool_field(9)
 594 |     """Whether the text is justified."""
 595 | 
 596 | 
 597 | @dataclass(eq=False, repr=False)
 598 | class TranslationDataStatus(betterproto.Message):
 599 |     code: "TranslationDataStatusCode" = betterproto.enum_field(1)
 600 | 
 601 | 
 602 | @dataclass(eq=False, repr=False)
 603 | class TranslationDataTextStyle(betterproto.Message):
 604 |     """
 605 |     Style as the aggregation of the styles of the words in the original text.
 606 |     """
 607 | 
 608 |     text_color: int = betterproto.uint32_field(1)
 609 |     """The foreground color of text in aRGB format."""
 610 | 
 611 |     background_primary_color: int = betterproto.uint32_field(2)
 612 |     """The background color of text in aRGB format."""
 613 | 
 614 | 
 615 | @dataclass(eq=False, repr=False)
 616 | class TranslationDataBackgroundImageData(betterproto.Message):
 617 |     """Properties of the image used to inpaint the source text."""
 618 | 
 619 |     background_image: bytes = betterproto.bytes_field(1)
 620 |     """
 621 |     Image bytes to inpaint the source text. Contains image bytes in the
 622 |      format specified in file_format.
 623 |     """
 624 | 
 625 |     image_width: int = betterproto.int32_field(2)
 626 |     """Width of background_image in pixels."""
 627 | 
 628 |     image_height: int = betterproto.int32_field(3)
 629 |     """Height of background_image in pixels."""
 630 | 
 631 |     vertical_padding: float = betterproto.float_field(4)
 632 |     """
 633 |     Vertical padding to apply to the text box before drawing the background
 634 |      image. Expressed as a fraction of the text box height, i.e. 1.0 means
 635 |      that the height should be doubled. Half of the padding should be added on
 636 |      the top and half on the bottom.
 637 |     """
 638 | 
 639 |     horizontal_padding: float = betterproto.float_field(5)
 640 |     """
 641 |     Horizontal padding to apply to the text box before drawing the background
 642 |      image. Expressed as a fraction of the text box height. Half of the
 643 |      padding should be added on the left and half on the right.
 644 |     """
 645 | 
 646 |     file_format: "TranslationDataBackgroundImageDataFileFormat" = (
 647 |         betterproto.enum_field(6)
 648 |     )
 649 |     text_mask: bytes = betterproto.bytes_field(7)
 650 |     """Text mask for the generated background image."""
 651 | 
 652 | 
 653 | @dataclass(eq=False, repr=False)
 654 | class TranslationDataLine(betterproto.Message):
 655 |     start: int = betterproto.int32_field(1)
 656 |     """
 657 |     A substring from the translation from start to end (exclusive),
 658 |      that needs to be distributed on this line, measured in Unicode
 659 |      characters. If not set, the Line doesn't have any translation.
 660 |     """
 661 | 
 662 |     end: int = betterproto.int32_field(2)
 663 |     style: "TranslationDataTextStyle" = betterproto.message_field(3)
 664 |     word: List["TranslationDataLineWord"] = betterproto.message_field(5)
 665 |     background_image_data: "TranslationDataBackgroundImageData" = (
 666 |         betterproto.message_field(9)
 667 |     )
 668 |     """Background image data is set only when inpainting is computed."""
 669 | 
 670 | 
 671 | @dataclass(eq=False, repr=False)
 672 | class TranslationDataLineWord(betterproto.Message):
 673 |     start: int = betterproto.int32_field(1)
 674 |     """
 675 |     A substring from the translation from start to end (exclusive),
 676 |      representing a word (without separator), measured in Unicode
 677 |      characters.
 678 |     """
 679 | 
 680 |     end: int = betterproto.int32_field(2)
 681 | 
 682 | 
 683 | @dataclass(eq=False, repr=False)
 684 | class LensOverlayDocument(betterproto.Message):
 685 |     """
 686 |     Top-level PDF representation extracted using Pdfium.
 687 |      Next ID: 6
 688 |     """
 689 | 
 690 |     pages: List["Page"] = betterproto.message_field(1)
 691 |     """Ordered pdf pages."""
 692 | 
 693 | 
 694 | @dataclass(eq=False, repr=False)
 695 | class Page(betterproto.Message):
 696 |     """
 697 |     Represents a single page of the PDF.
 698 |      Next ID: 10
 699 |     """
 700 | 
 701 |     page_number: int = betterproto.int32_field(1)
 702 |     """Page number in the pdf (indexed starting at 1)."""
 703 | 
 704 |     text_segments: List[str] = betterproto.string_field(4)
 705 |     """List of text segments of the page."""
 706 | 
 707 | 
 708 | @dataclass(eq=False, repr=False)
 709 | class ClientImage(betterproto.Message):
 710 |     """Image data from the client."""
 711 | 
 712 |     image_content: bytes = betterproto.bytes_field(1)
 713 |     """Required. A byte array encoding an image."""
 714 | 
 715 | 
 716 | @dataclass(eq=False, repr=False)
 717 | class ImageCrop(betterproto.Message):
 718 |     """User-selected / auto-detected cropped image region."""
 719 | 
 720 |     crop_id: str = betterproto.string_field(1)
 721 |     """The ID of the cropped image region."""
 722 | 
 723 |     image: "ClientImage" = betterproto.message_field(2)
 724 |     """The image content of the cropped image region."""
 725 | 
 726 |     zoomed_crop: "ZoomedCrop" = betterproto.message_field(3)
 727 |     """The zoomed crop properties of the cropped image region."""
 728 | 
 729 | 
 730 | @dataclass(eq=False, repr=False)
 731 | class ImageData(betterproto.Message):
 732 |     """
 733 |     Data representing image. Contains image bytes or image retrieval identifier.
 734 |     """
 735 | 
 736 |     payload: "ImagePayload" = betterproto.message_field(1)
 737 |     """Image payload to process. This contains image bytes."""
 738 | 
 739 |     image_metadata: "ImageMetadata" = betterproto.message_field(3)
 740 |     """Required. Context of the given image."""
 741 | 
 742 |     significant_regions: List["Geometry"] = betterproto.message_field(4)
 743 |     """The bounds of significant regions in the image."""
 744 | 
 745 | 
 746 | @dataclass(eq=False, repr=False)
 747 | class ImagePayload(betterproto.Message):
 748 |     image_bytes: bytes = betterproto.bytes_field(1)
 749 |     """Required. Image byte array."""
 750 | 
 751 | 
 752 | @dataclass(eq=False, repr=False)
 753 | class ImageMetadata(betterproto.Message):
 754 |     width: int = betterproto.int32_field(1)
 755 |     """
 756 |     Required. Image width in pixels. Should reflect the actual size of
 757 |      image_bytes.
 758 |     """
 759 | 
 760 |     height: int = betterproto.int32_field(2)
 761 |     """
 762 |     Required. Image height in pixels. Should reflect the actual size of
 763 |      image_bytes.
 764 |     """
 765 | 
 766 | 
 767 | @dataclass(eq=False, repr=False)
 768 | class TextQuery(betterproto.Message):
 769 |     """Contains an unstructured text query to add to an image query."""
 770 | 
 771 |     query: str = betterproto.string_field(1)
 772 |     """The unstructured text query, such as "blue" or "blouse"."""
 773 | 
 774 |     is_primary: bool = betterproto.bool_field(2)
 775 | 
 776 | 
 777 | @dataclass(eq=False, repr=False)
 778 | class LensOverlayInteractionRequestMetadata(betterproto.Message):
 779 |     """Metadata associated with an interaction request."""
 780 | 
 781 |     type: "LensOverlayInteractionRequestMetadataType" = betterproto.enum_field(1)
 782 |     selection_metadata: "LensOverlayInteractionRequestMetadataSelectionMetadata" = (
 783 |         betterproto.message_field(2)
 784 |     )
 785 |     query_metadata: "LensOverlayInteractionRequestMetadataQueryMetadata" = (
 786 |         betterproto.message_field(4)
 787 |     )
 788 | 
 789 | 
 790 | @dataclass(eq=False, repr=False)
 791 | class LensOverlayInteractionRequestMetadataSelectionMetadata(betterproto.Message):
 792 |     """
 793 |     Metadata related to the selection associated with this interaction request.
 794 |     """
 795 | 
 796 |     point: "LensOverlayInteractionRequestMetadataSelectionMetadataPoint" = (
 797 |         betterproto.message_field(1, group="selection")
 798 |     )
 799 |     region: "LensOverlayInteractionRequestMetadataSelectionMetadataRegion" = (
 800 |         betterproto.message_field(2, group="selection")
 801 |     )
 802 |     object: "LensOverlayInteractionRequestMetadataSelectionMetadataObject" = (
 803 |         betterproto.message_field(3, group="selection")
 804 |     )
 805 | 
 806 | 
 807 | @dataclass(eq=False, repr=False)
 808 | class LensOverlayInteractionRequestMetadataSelectionMetadataPoint(betterproto.Message):
 809 |     x: float = betterproto.float_field(1)
 810 |     y: float = betterproto.float_field(2)
 811 | 
 812 | 
 813 | @dataclass(eq=False, repr=False)
 814 | class LensOverlayInteractionRequestMetadataSelectionMetadataRegion(betterproto.Message):
 815 |     region: "CenterRotatedBox" = betterproto.message_field(1)
 816 | 
 817 | 
 818 | @dataclass(eq=False, repr=False)
 819 | class LensOverlayInteractionRequestMetadataSelectionMetadataObject(betterproto.Message):
 820 |     object_id: str = betterproto.string_field(1)
 821 |     geometry: "Geometry" = betterproto.message_field(2)
 822 | 
 823 | 
 824 | @dataclass(eq=False, repr=False)
 825 | class LensOverlayInteractionRequestMetadataQueryMetadata(betterproto.Message):
 826 |     """Metadata related to query."""
 827 | 
 828 |     text_query: "TextQuery" = betterproto.message_field(2)
 829 |     """The text query information."""
 830 | 
 831 | 
 832 | @dataclass(eq=False, repr=False)
 833 | class TranslateStickinessSignals(betterproto.Message):
 834 |     """
 835 |     Signals specific to queries coming from translate stickiness extension.
 836 |     """
 837 | 
 838 |     translate_suppress_echo_for_sticky: bool = betterproto.bool_field(1)
 839 | 
 840 | 
 841 | @dataclass(eq=False, repr=False)
 842 | class FunctionCall(betterproto.Message):
 843 |     """A message representing the function call of an answers intent query."""
 844 | 
 845 |     name: str = betterproto.string_field(1)
 846 |     """Name of this function call."""
 847 | 
 848 |     argument: List["Argument"] = betterproto.message_field(2)
 849 |     """A list of arguments of this function call."""
 850 | 
 851 |     signals: "FunctionCallSignals" = betterproto.message_field(4)
 852 |     """Signals at the function call level"""
 853 | 
 854 | 
 855 | @dataclass(eq=False, repr=False)
 856 | class FunctionCallSignals(betterproto.Message):
 857 |     """Signals at the function call level"""
 858 | 
 859 |     translate_stickiness_signals: "TranslateStickinessSignals" = (
 860 |         betterproto.message_field(311378150)
 861 |     )
 862 | 
 863 | 
 864 | @dataclass(eq=False, repr=False)
 865 | class Argument(betterproto.Message):
 866 |     """A message representing the function argument."""
 867 | 
 868 |     name: str = betterproto.string_field(1)
 869 |     """Name of this argument."""
 870 | 
 871 |     value: "ArgumentValue" = betterproto.message_field(2)
 872 |     """The value of this argument."""
 873 | 
 874 | 
 875 | @dataclass(eq=False, repr=False)
 876 | class ArgumentValue(betterproto.Message):
 877 |     """A message representing the value of an argument."""
 878 | 
 879 |     simple_value: "SimpleValue" = betterproto.message_field(3, group="value")
 880 | 
 881 | 
 882 | @dataclass(eq=False, repr=False)
 883 | class SimpleValue(betterproto.Message):
 884 |     """A message representing a simple literal value."""
 885 | 
 886 |     string_value: str = betterproto.string_field(1, group="value")
 887 | 
 888 | 
 889 | @dataclass(eq=False, repr=False)
 890 | class Query(betterproto.Message):
 891 |     """A Query is a representation of the meaning of the user query."""
 892 | 
 893 |     intent_query: "FunctionCall" = betterproto.message_field(56249026)
 894 | 
 895 | 
 896 | @dataclass(eq=False, repr=False)
 897 | class MathSolverQuery(betterproto.Message):
 898 |     math_input_equation: str = betterproto.string_field(3)
 899 | 
 900 | 
 901 | @dataclass(eq=False, repr=False)
 902 | class MessageSet(betterproto.Message):
 903 |     """This is proto2's version of MessageSet."""
 904 | 
 905 |     message_set_extension: "Query" = betterproto.message_field(41401449)
 906 | 
 907 | 
 908 | @dataclass(eq=False, repr=False)
 909 | class OverlayObject(betterproto.Message):
 910 |     """Overlay Object."""
 911 | 
 912 |     id: str = betterproto.string_field(1)
 913 |     """The id."""
 914 | 
 915 |     geometry: "Geometry" = betterproto.message_field(2)
 916 |     """The object geometry."""
 917 | 
 918 |     rendering_metadata: "OverlayObjectRenderingMetadata" = betterproto.message_field(8)
 919 |     """The rendering metadata for the object."""
 920 | 
 921 |     interaction_properties: "OverlayObjectInteractionProperties" = (
 922 |         betterproto.message_field(4)
 923 |     )
 924 |     is_fulfilled: bool = betterproto.bool_field(9)
 925 |     """
 926 |     Indicates to the client that this object is eligible to be an object
 927 |      fulfillment request.
 928 |     """
 929 | 
 930 | 
 931 | @dataclass(eq=False, repr=False)
 932 | class OverlayObjectRenderingMetadata(betterproto.Message):
 933 |     """Rendering metadata for the object."""
 934 | 
 935 |     render_type: "OverlayObjectRenderingMetadataRenderType" = betterproto.enum_field(1)
 936 | 
 937 | 
 938 | @dataclass(eq=False, repr=False)
 939 | class OverlayObjectInteractionProperties(betterproto.Message):
 940 |     select_on_tap: bool = betterproto.bool_field(1)
 941 |     """Whether an object can be tapped"""
 942 | 
 943 | 
 944 | @dataclass(eq=False, repr=False)
 945 | class LensOverlayRequestId(betterproto.Message):
 946 |     """
 947 |     Request Id definition to support request sequencing and state lookup.
 948 |     """
 949 | 
 950 |     uuid: int = betterproto.uint64_field(1)
 951 |     """A unique identifier for a sequence of related Lens requests."""
 952 | 
 953 |     sequence_id: int = betterproto.int32_field(2)
 954 |     """
 955 |     An id to indicate the order of the current request within a sequence of
 956 |      requests sharing the same uuid. Starts from 1, increments by 1 if there is
 957 |      a new request with the same uuid.
 958 |     """
 959 | 
 960 |     image_sequence_id: int = betterproto.int32_field(3)
 961 |     """
 962 |     An id to indicate the order of image payload sent within a sequence of
 963 |      requests sharing the same uuid. Starts from 1, increments by 1 if there is
 964 |      a new request with an image payload with the same uuid.
 965 |      Note, region search request does not increment this id.
 966 |     """
 967 | 
 968 |     analytics_id: bytes = betterproto.bytes_field(4)
 969 |     """
 970 |     Analytics ID for the Lens request. Will be updated on the initial request
 971 |      and once per interaction request.
 972 |     """
 973 | 
 974 |     routing_info: "LensOverlayRoutingInfo" = betterproto.message_field(6)
 975 |     """Information about where to route the request."""
 976 | 
 977 | 
 978 | @dataclass(eq=False, repr=False)
 979 | class LensOverlayRequestContext(betterproto.Message):
 980 |     """Request context for a Lens Overlay request."""
 981 | 
 982 |     request_id: "LensOverlayRequestId" = betterproto.message_field(3)
 983 |     """Required. Identifiers for this request."""
 984 | 
 985 |     client_context: "LensOverlayClientContext" = betterproto.message_field(4)
 986 |     """The client context for the request."""
 987 | 
 988 | 
 989 | @dataclass(eq=False, repr=False)
 990 | class LensOverlayObjectsRequest(betterproto.Message):
 991 |     request_context: "LensOverlayRequestContext" = betterproto.message_field(1)
 992 |     """Required. Basic information and context for the request."""
 993 | 
 994 |     image_data: "ImageData" = betterproto.message_field(3)
 995 |     """Required. Image Data to process."""
 996 | 
 997 |     payload: "Payload" = betterproto.message_field(4)
 998 |     """
 999 |     Optional. Data payload of the request.
1000 |      TODO(b/359638436): Mark required when clients have migrated to use Payload
1001 |      field.
1002 |     """
1003 | 
1004 | 
1005 | @dataclass(eq=False, repr=False)
1006 | class LensOverlayObjectsResponse(betterproto.Message):
1007 |     overlay_objects: List["OverlayObject"] = betterproto.message_field(2)
1008 |     """Overlay objects."""
1009 | 
1010 |     text: "Text" = betterproto.message_field(3)
1011 |     """Text."""
1012 | 
1013 |     deep_gleams: List["DeepGleamData"] = betterproto.message_field(4)
1014 |     """Gleams."""
1015 | 
1016 |     cluster_info: "LensOverlayClusterInfo" = betterproto.message_field(7)
1017 |     """The cluster info."""
1018 | 
1019 | 
1020 | @dataclass(eq=False, repr=False)
1021 | class LensOverlayInteractionRequest(betterproto.Message):
1022 |     request_context: "LensOverlayRequestContext" = betterproto.message_field(1)
1023 |     """Basic information and context for the request."""
1024 | 
1025 |     interaction_request_metadata: "LensOverlayInteractionRequestMetadata" = (
1026 |         betterproto.message_field(2)
1027 |     )
1028 |     """Metadata associated with an interaction request."""
1029 | 
1030 |     image_crop: "ImageCrop" = betterproto.message_field(3)
1031 |     """The image crop data."""
1032 | 
1033 | 
1034 | @dataclass(eq=False, repr=False)
1035 | class LensOverlayInteractionResponse(betterproto.Message):
1036 |     encoded_response: str = betterproto.string_field(3)
1037 | 
1038 | 
1039 | @dataclass(eq=False, repr=False)
1040 | class Payload(betterproto.Message):
1041 |     """Next ID: 9"""
1042 | 
1043 |     request_type: "PayloadRequestType" = betterproto.enum_field(6)
1044 |     """Optional. The type of the request."""
1045 | 
1046 |     image_data: "ImageData" = betterproto.message_field(2)
1047 |     """
1048 |     Currently unset, use image_data in ObjectsRequest.
1049 |      TODO(b/359638436): Move ObjectsRequest clients onto Payload.ImageData.
1050 |     """
1051 | 
1052 |     content_data: bytes = betterproto.bytes_field(3)
1053 |     """
1054 |     Data for non-image payloads. May be sent with or without an image in the
1055 |      image_data field. If content_data is set, content_type must also be set.
1056 |     """
1057 | 
1058 |     content_type: str = betterproto.string_field(4)
1059 |     """
1060 |     The media type/MIME type of the data represented i content_data, e.g.
1061 |      "application/pdf". If content_type is set, content_data should also be set.
1062 |     """
1063 | 
1064 |     page_url: str = betterproto.string_field(5)
1065 |     """The page url this request was made on."""
1066 | 
1067 |     partial_pdf_document: "LensOverlayDocument" = betterproto.message_field(7)
1068 |     """
1069 |     The partially parsed PDF document. Used to get early suggest signals. This
1070 |      is only set for REQUEST_TYPE_EARLY_PARTIAL_PDF.
1071 |     """
1072 | 
1073 |     compression_type: "PayloadCompressionType" = betterproto.enum_field(8)
1074 |     """
1075 |     Compression format of content_data. Currently only used for PDF data.
1076 |     """
1077 | 
1078 | 
1079 | @dataclass(eq=False, repr=False)
1080 | class LensOverlayServerClusterInfoRequest(betterproto.Message):
1081 |     """The cluster info request for a Lens Overlay session."""
1082 | 
1083 |     enable_search_session_id: bool = betterproto.bool_field(1)
1084 |     """
1085 |     Whether to return a search session id alongside the server session id.
1086 |     """
1087 | 
1088 | 
1089 | @dataclass(eq=False, repr=False)
1090 | class LensOverlayServerClusterInfoResponse(betterproto.Message):
1091 |     server_session_id: str = betterproto.string_field(1)
1092 |     """ID for subsequent server requests."""
1093 | 
1094 |     search_session_id: str = betterproto.string_field(2)
1095 |     """ID for subsequent search requests."""
1096 | 
1097 |     routing_info: "LensOverlayRoutingInfo" = betterproto.message_field(3)
1098 |     """The routing info for the server session."""
1099 | 
1100 | 
1101 | @dataclass(eq=False, repr=False)
1102 | class LensOverlayServerError(betterproto.Message):
1103 |     """
1104 |     An error encountered while handling a request.
1105 |      Next ID: 2
1106 |     """
1107 | 
1108 |     error_type: "LensOverlayServerErrorErrorType" = betterproto.enum_field(1)
1109 |     """The error type."""
1110 | 
1111 | 
1112 | @dataclass(eq=False, repr=False)
1113 | class LensOverlayServerRequest(betterproto.Message):
1114 |     """Next ID: 4"""
1115 | 
1116 |     objects_request: "LensOverlayObjectsRequest" = betterproto.message_field(1)
1117 |     """Options for fetching objects."""
1118 | 
1119 |     interaction_request: "LensOverlayInteractionRequest" = betterproto.message_field(2)
1120 |     """Options for fetching interactions."""
1121 | 
1122 |     client_logs: "LensOverlayClientLogs" = betterproto.message_field(3)
1123 |     """Client logs for the request."""
1124 | 
1125 | 
1126 | @dataclass(eq=False, repr=False)
1127 | class LensOverlayServerResponse(betterproto.Message):
1128 |     """
1129 |     Response details for an LensOverlay request.
1130 |      Next ID: 4
1131 |     """
1132 | 
1133 |     error: "LensOverlayServerError" = betterproto.message_field(1)
1134 |     """The encountered error."""
1135 | 
1136 |     objects_response: "LensOverlayObjectsResponse" = betterproto.message_field(2)
1137 |     """The objects response."""
1138 | 
1139 |     interaction_response: "LensOverlayInteractionResponse" = betterproto.message_field(
1140 |         3
1141 |     )
1142 |     """The interaction response."""
1143 | 
1144 | 
1145 | @dataclass(eq=False, repr=False)
1146 | class StickinessSignals(betterproto.Message):
1147 |     id_namespace: "StickinessSignalsNamespace" = betterproto.enum_field(1)
1148 |     interpretation: "MessageSet" = betterproto.message_field(28)
1149 |     education_input_extension: "EducationInputExtension" = betterproto.message_field(
1150 |         121
1151 |     )
1152 | 
1153 | 
1154 | @dataclass(eq=False, repr=False)
1155 | class EducationInputExtension(betterproto.Message):
1156 |     math_solver_query: "MathSolverQuery" = betterproto.message_field(1)
1157 | 
1158 | 
1159 | @dataclass(eq=False, repr=False)
1160 | class LensOverlayVideoContextInputParams(betterproto.Message):
1161 |     url: str = betterproto.string_field(1)
1162 |     """Url of the video."""
1163 | 
1164 | 
1165 | @dataclass(eq=False, repr=False)
1166 | class LensOverlayVideoParams(betterproto.Message):
1167 |     video_context_input_params: "LensOverlayVideoContextInputParams" = (
1168 |         betterproto.message_field(1)
1169 |     )
1170 |     """Video context params from input."""
1171 | 
1172 | 
1173 | @dataclass(eq=False, repr=False)
1174 | class LensOverlayVisualSearchInteractionLogData(betterproto.Message):
1175 |     """Log data for a Lens Overlay visual search interaction."""
1176 | 
1177 |     filter_data: "FilterData" = betterproto.message_field(1)
1178 |     """Filter related metadata."""
1179 | 
1180 |     user_selection_data: "UserSelectionData" = betterproto.message_field(2)
1181 |     """User Selection metadata."""
1182 | 
1183 |     is_parent_query: bool = betterproto.bool_field(3)
1184 |     """Whether the query is a parent query."""
1185 | 
1186 |     client_platform: "ClientPlatform" = betterproto.enum_field(4)
1187 |     """The client platform this query was originated from."""
1188 | 
1189 | 
1190 | @dataclass(eq=False, repr=False)
1191 | class FilterData(betterproto.Message):
1192 |     """
1193 |     Filter data.
1194 |      Next ID: 2
1195 |     """
1196 | 
1197 |     filter_type: "LensOverlayFilterType" = betterproto.enum_field(1)
1198 |     """
1199 |     The filter type associated with this interaction (auto, translate, etc.).
1200 |     """
1201 | 
1202 | 
1203 | @dataclass(eq=False, repr=False)
1204 | class UserSelectionData(betterproto.Message):
1205 |     """
1206 |     User selection data.
1207 |      Next ID: 2
1208 |     """
1209 | 
1210 |     selection_type: "LensOverlaySelectionType" = betterproto.enum_field(1)
1211 |     """
1212 |     The selection type associated with this interaction (e.g. region search).
1213 |     """
1214 | 
1215 | 
1216 | @dataclass(eq=False, repr=False)
1217 | class LensOverlayVisualSearchInteractionData(betterproto.Message):
1218 |     """Metadata associated with a Lens Visual Search request."""
1219 | 
1220 |     interaction_type: "LensOverlayInteractionRequestMetadataType" = (
1221 |         betterproto.enum_field(1)
1222 |     )
1223 |     """The type of interaction."""
1224 | 
1225 |     zoomed_crop: "ZoomedCrop" = betterproto.message_field(7)
1226 |     """The selected region for this interaction, instead of the object id."""
1227 | 
1228 |     object_id: str = betterproto.string_field(3)
1229 |     """
1230 |     The selected object id for this interaction, instead of the zoomed crop.
1231 |      Currently unsupported and should not be populated.
1232 |     """
1233 | 
1234 |     log_data: "LensOverlayVisualSearchInteractionLogData" = betterproto.message_field(5)
1235 |     """Logging-specific data."""
1236 | 


--------------------------------------------------------------------------------