├── src └── chrome_lens_py │ ├── cli │ ├── __init__.py │ └── main.py │ ├── core │ ├── __init__.py │ ├── protobuf_builder.py │ ├── request_handler.py │ └── image_processor.py │ ├── utils │ ├── __init__.py │ ├── sharex.py │ ├── general.py │ ├── font_manager.py │ ├── config_manager.py │ └── lens_betterproto.py │ ├── __init__.py │ ├── constants.py │ ├── exceptions.py │ └── api.py ├── setup.py ├── experiments ├── exp.md ├── reverse.py └── test.py ├── requirements.txt ├── LICENSE ├── setup.cfg ├── .gitignore ├── docs └── sharex.md ├── .github └── workflows │ └── python-publish.yml ├── README.md └── README_RU.md /src/chrome_lens_py/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/chrome_lens_py/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/chrome_lens_py/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup() 4 | -------------------------------------------------------------------------------- /experiments/exp.md: -------------------------------------------------------------------------------- 1 | The folder is just for experimentation, testing old, new methods. Stupid ideas, etc. -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | Pillow 3 | filetype 4 | json5 5 | PySocks 6 | httpx[socks] 7 | numpy 8 | rich 9 | betterproto >= 2.0.0b6 -------------------------------------------------------------------------------- /src/chrome_lens_py/__init__.py: -------------------------------------------------------------------------------- 1 | from .api import LensAPI 2 | from .exceptions import LensAPIError, LensException, LensImageError, LensProtobufError 3 | 4 | __all__ = [ 5 | "LensAPI", 6 | "LensException", 7 | "LensAPIError", 8 | "LensImageError", 9 | "LensProtobufError", 10 | ] 11 | -------------------------------------------------------------------------------- /src/chrome_lens_py/utils/sharex.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import platform 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | 7 | def copy_to_clipboard(text: str) -> bool: 8 | """Copies the provided text to the clipboard.""" 9 | system = platform.system() 10 | try: 11 | import pyperclip # type: ignore 12 | 13 | pyperclip.copy(text) 14 | logger.info("Text copied to clipboard.") 15 | return True 16 | except ImportError: 17 | logger.error( 18 | "Module 'pyperclip' not found. Please install it to use clipboard functionality (pip install 'chrome-lens-py[clipboard]')." 19 | ) 20 | if system == "Linux": 21 | logger.info( 22 | "On Linux, you might also need to install xclip or xsel: sudo apt-get install xclip (or xsel)" 23 | ) 24 | return False 25 | except Exception as e: 26 | logger.error(f"Failed to copy text to clipboard: {e}") 27 | return False 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Bropines 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/chrome_lens_py/constants.py: -------------------------------------------------------------------------------- 1 | # API 2 | LENS_CRUPLOAD_ENDPOINT = "https://lensfrontend-pa.googleapis.com/v1/crupload" 3 | DEFAULT_API_KEY = "AIzaSyDr2UxVnv_U85AbhhY8XSHSIavUW0DC-sY" 4 | # https://github.com/AuroraWright/owocr 5 | 6 | 7 | # headers 8 | DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" 9 | DEFAULT_HEADERS = { 10 | "Content-Type": "application/x-protobuf", 11 | "X-Goog-Api-Key": DEFAULT_API_KEY, 12 | "User-Agent": DEFAULT_USER_AGENT, 13 | "Accept-Encoding": "gzip, deflate, br", 14 | "Accept": "*/*", 15 | } 16 | 17 | # img types 18 | SUPPORTED_MIMES_FOR_PREPARE = [ 19 | "image/jpeg", 20 | "image/png", 21 | "image/webp", 22 | "image/bmp", 23 | "image/gif", 24 | "image/tiff", 25 | ] 26 | DEFAULT_IMAGE_MAX_DIMENSION = 1500 27 | 28 | # region and time zone 29 | DEFAULT_CLIENT_REGION = "US" 30 | DEFAULT_CLIENT_TIME_ZONE = "America/New_York" 31 | DEFAULT_OCR_LANG = "" 32 | 33 | # Fonts 34 | DEFAULT_FONT_SIZE_OVERLAY = 20 35 | DEFAULT_FONT_PATH_WINDOWS = "arial.ttf" 36 | DEFAULT_FONT_PATH_LINUX = "DejaVuSans.ttf" 37 | DEFAULT_FONT_PATH_MACOS = "Arial.ttf" 38 | 39 | # Configuration 40 | APP_NAME_FOR_CONFIG = "chrome-lens-py" 41 | DEFAULT_CONFIG_FILENAME = "config.json" 42 | -------------------------------------------------------------------------------- /src/chrome_lens_py/exceptions.py: -------------------------------------------------------------------------------- 1 | class LensException(Exception): 2 | """Base class for exceptions of this library.""" 3 | 4 | pass 5 | 6 | 7 | class LensAPIError(LensException): 8 | """Exception for errors related to HTTP requests to the Lens API.""" 9 | 10 | def __init__(self, message, status_code=None, response_body=None): 11 | super().__init__(message) 12 | self.status_code = status_code 13 | self.response_body = response_body 14 | 15 | def __str__(self): 16 | msg = super().__str__() 17 | if self.status_code: 18 | msg += f" (Status Code: {self.status_code})" 19 | if self.response_body: 20 | response_body_str = str(self.response_body) 21 | if len(response_body_str) > 200: 22 | response_body_str = response_body_str[:200] + "..." 23 | msg += f"\nResponse Body (partial): {response_body_str}" 24 | return msg 25 | 26 | 27 | class LensImageError(LensException): 28 | """Exception for errors related to image processing.""" 29 | 30 | pass 31 | 32 | 33 | class LensProtobufError(LensException): 34 | """Exception for errors related to the creation or parsing of Protobuf messages.""" 35 | 36 | pass 37 | 38 | 39 | class LensFontError(LensException): 40 | """Exception for font-related errors.""" 41 | 42 | pass 43 | 44 | 45 | class LensConfigError(LensException): 46 | """Exception for configuration-related errors.""" 47 | 48 | pass 49 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = chrome_lens_py 3 | version = 3.3.1 4 | author = Bropines 5 | author_email = bropines@gmail.com 6 | description = Python library for Google Lens OCR and Translation using the crupload endpoint. 7 | long_description = file: README.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/bropines/chrome-lens-py 10 | license = MIT License 11 | classifiers = 12 | Programming Language :: Python :: 3 13 | Programming Language :: Python :: 3.8 14 | Programming Language :: Python :: 3.9 15 | Programming Language :: Python :: 3.10 16 | Programming Language :: Python :: 3.11 17 | Programming Language :: Python :: 3.12 18 | Operating System :: OS Independent 19 | Topic :: Scientific/Engineering :: Image Recognition 20 | Topic :: Software Development :: Libraries :: Python Modules 21 | Intended Audience :: Developers 22 | 23 | [options] 24 | package_dir = 25 | = src 26 | packages = find: 27 | python_requires = >=3.8 28 | install_requires = 29 | httpx[socks] >= 0.20 30 | Pillow >= 9.0 31 | betterproto >= 2.0.0b6 32 | filetype >= 1.0 33 | rich >= 10.0 34 | numpy 35 | 36 | [options.packages.find] 37 | where = src 38 | exclude = 39 | tests* 40 | 41 | [options.entry_points] 42 | console_scripts = 43 | lens_scan = chrome_lens_py.cli.main:run 44 | 45 | [options.extras_require] 46 | dev = 47 | black 48 | isort 49 | flake8 50 | mypy 51 | pytest 52 | pytest-asyncio 53 | clipboard = 54 | pyperclip>=1.8 55 | 56 | [isort] 57 | profile = black 58 | 59 | [flake8] 60 | max-line-length = 140 61 | extend-ignore = 62 | E203, 63 | W503, 64 | F401 65 | exclude = 66 | .git, 67 | __pycache__, 68 | build, 69 | dist, 70 | venv, 71 | .venv, 72 | lens_betterproto.py -------------------------------------------------------------------------------- /src/chrome_lens_py/utils/general.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from urllib.parse import urlparse 4 | 5 | import filetype # type: ignore 6 | 7 | from ..constants import SUPPORTED_MIMES_FOR_PREPARE 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def is_url(string: str) -> bool: 13 | """Проверяет, является ли строка валидным URL.""" 14 | try: 15 | result = urlparse(string) 16 | return all([result.scheme, result.netloc]) 17 | except (ValueError, AttributeError): 18 | return False 19 | 20 | 21 | def is_image_file_supported(path_or_url: str) -> bool: 22 | """ 23 | Checks if the string is a URL or a supported image file. 24 | Used in the CLI for quick validation before passing to the API. 25 | """ 26 | if is_url(path_or_url): 27 | logger.debug( 28 | f"'{path_or_url}' is a URL, assuming it's a valid image source for the API." 29 | ) 30 | return True 31 | 32 | if not os.path.isfile(path_or_url): 33 | return False 34 | 35 | try: 36 | kind = filetype.guess(path_or_url) 37 | if kind and kind.mime in SUPPORTED_MIMES_FOR_PREPARE: 38 | return True 39 | 40 | ext = os.path.splitext(path_or_url)[1].lower() 41 | pillow_common_exts = [ 42 | ".png", 43 | ".jpg", 44 | ".jpeg", 45 | ".gif", 46 | ".bmp", 47 | ".webp", 48 | ".tif", 49 | ".tiff", 50 | ] 51 | if ext in pillow_common_exts: 52 | logger.debug( 53 | f"File '{path_or_url}' has a common Pillow extension '{ext}', assuming supported." 54 | ) 55 | return True 56 | 57 | except Exception as e: 58 | logger.warning(f"Could not guess file type for '{path_or_url}': {e}") 59 | return True 60 | 61 | return False 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | *.pkl 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | older/ 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | response_debug.txt 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | Pipfile.lock 90 | 91 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 92 | __pypackages__/ 93 | 94 | # Environments 95 | .env 96 | .venv 97 | env/ 98 | venv/ 99 | ENV/ 100 | env.bak/ 101 | venv.bak/ 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | .dmypy.json 109 | dmypy.json 110 | 111 | # Pyre type checker 112 | .pyre/ 113 | 114 | # Editor / IDE specific 115 | .vscode/ 116 | .idea/ 117 | *.sublime-project 118 | *.sublime-workspace 119 | 120 | # egg-info and related files 121 | *.egg-info/ 122 | src/chrome_lens_py.egg-info/ 123 | 124 | # mylist 125 | tree.txt 126 | test.py 127 | -------------------------------------------------------------------------------- /docs/sharex.md: -------------------------------------------------------------------------------- 1 | ## Custom ShareX OCR with Google Lens 2 | 3 | It's possible to use the `chrome-lens-py` package with ShareX to OCR images using the Google Lens API, providing a significant upgrade over the default OCR in ShareX. Here's how to set it up: 4 | 5 | 0. Get [ShareX](https://getsharex.com/) if you don't have it already. 6 | 1. Install Python 3.10+ from the [Python Official website](https://www.python.org/downloads/) or via [Pyenv-WIN](https://github.com/pyenv-win/pyenv-win). 7 | **IMPORTANT:** During installation, you **must** check the "Add Python to PATH" option, otherwise this will not work. 8 | 9 | 2. Install the `chrome-lens-py` library with clipboard support: 10 | ```bash 11 | pip install "chrome-lens-py[clipboard]" 12 | ``` 13 | 3. Find the path to the installed `lens_scan` executable. Run the following command in PowerShell: 14 | ```powershell 15 | (Get-Command lens_scan).Source 16 | ``` 17 | You will get a path similar to this: 18 | ``` 19 | C:\Users\bropi\.pyenv\pyenv-win\shims\lens_scan.bat 20 | ``` 21 | 22 | Copy this path for the next steps. 23 | 24 | 4. Open the ShareX main window and navigate to `Hotkey settings...`. Create a new hotkey. For the task, select `Screen capture` -> `Capture region (Light)`. 25 | 26 | 5. Now, open the settings for that new hotkey (the gear icon). 27 | - Under the **Tasks** tab, ensure `Capture region (Light)` is selected. 28 | - Go to the **Actions** tab and check the `Override actions` box. 29 | - Click **Add...** and set up a new action with the following details: 30 | 31 | ![Screenshot of ShareX Action settings](https://github.com/user-attachments/assets/38ac5d3c-0119-496a-92ab-02a63dd2152c) 32 | 33 | - **Name:** `Lens OCR` (or any name you prefer) 34 | - **File path:** Paste the path you copied in step 3. For example: 35 | - `C:\Users\bropi\.pyenv\pyenv-win\shims\lens_scan.bat` 36 | - **Arguments:** Enter `"$input" --sharex` 37 | - Uncheck `Hidden window` if you need to troubleshoot later. Otherwise, leaving it checked is fine. 38 | 39 | 6. Save the action. Back in the Hotkey settings, make sure your new `Lens OCR` action is checked in the list. 40 | 41 | 7. You can now close the settings windows. Use your new hotkey to capture a region of your screen. The image will be processed, and the recognized text will be automatically copied to your clipboard. 42 | 43 | ![GIF demonstrating the OCR process](https://lune.dimden.dev/1bf28abae5b0.gif) 44 | 45 | ## Troubleshooting 46 | If it takes a long time to process the image and nothing gets copied to your clipboard, an error might be occurring in the script. To see the error, go back to your `Lens OCR` Action settings (step 5), uncheck the **"Hidden window"** option, and run the hotkey again. A console window will appear showing any error messages. 47 | 48 | ## Updating 49 | To update the package to the latest version, simply run the following command in your terminal: 50 | ```bash 51 | pip install --upgrade "chrome-lens-py[clipboard]" 52 | ``` 53 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | push: 13 | paths: 14 | - 'src/**' 15 | - 'requirements.txt' 16 | 17 | pull_request: 18 | paths: 19 | - 'src/**' 20 | - 'requirements.txt' 21 | workflow_dispatch: 22 | release: 23 | types: [published] 24 | 25 | 26 | permissions: 27 | contents: read 28 | 29 | jobs: 30 | formating: 31 | name: Checking formatting 32 | runs-on: ubuntu-latest 33 | steps: 34 | - uses: actions/checkout@v4 35 | 36 | - name: Set up Python 37 | uses: actions/setup-python@v5.5.0 38 | with: 39 | python-version: '3.11' 40 | cache: 'pip' 41 | 42 | - name: Install dependencies 43 | run: pip install black isort 44 | 45 | - name: Check formatting with Black 46 | run: black --check src/ 47 | 48 | - name: Check imports sorting with isort 49 | run: isort --check-only src/ --profile black 50 | 51 | 52 | lint: 53 | name: Checking linting errors 54 | runs-on: ubuntu-latest 55 | steps: 56 | - uses: actions/checkout@v4 57 | 58 | - name: Set up Python 59 | uses: actions/setup-python@v5.5.0 60 | with: 61 | python-version: '3.11' 62 | cache: 'pip' 63 | 64 | - name: Install dependencies 65 | run: pip install flake8 66 | 67 | - name: Lint with flake8 68 | run: flake8 src/ 69 | 70 | security: 71 | name: Checking security 72 | runs-on: ubuntu-latest 73 | steps: 74 | - uses: actions/checkout@v4 75 | 76 | - uses: actions/setup-python@v5 77 | with: 78 | python-version: '3.11' 79 | cache: 'pip' 80 | 81 | - name: Install dependencies 82 | run: pip install bandit 83 | 84 | - name: Run bandit 85 | run: bandit -r src/ -lll 86 | 87 | build: 88 | name: Build packages 89 | runs-on: ubuntu-latest 90 | needs: [lint, security, formating] 91 | steps: 92 | - uses: actions/checkout@v4 93 | 94 | - name: Set up Python 95 | uses: actions/setup-python@v5 96 | with: 97 | python-version: '3.8' 98 | cache: 'pip' 99 | 100 | - name: Install build 101 | run: pip install build 102 | 103 | - name: Build a binary wheel and a source tarball 104 | run: python3 -m build 105 | 106 | - name: Store the distribution packages 107 | uses: actions/upload-artifact@v4 108 | with: 109 | name: python-package-distributions 110 | path: dist/ 111 | 112 | publish-to-pypi: 113 | name: Publish Python distribution to PyPI 114 | if: startsWith(github.ref, 'refs/tags/') 115 | needs: 116 | - build 117 | runs-on: ubuntu-latest 118 | environment: 119 | name: pypi 120 | url: https://pypi.org/p/chrome_lens_py 121 | permissions: 122 | id-token: write 123 | steps: 124 | - name: Download all the dists 125 | uses: actions/download-artifact@v4 126 | with: 127 | name: python-package-distributions 128 | path: dist/ 129 | - name: Publish distribution to PyPI 130 | uses: pypa/gh-action-pypi-publish@release/v1.12 131 | -------------------------------------------------------------------------------- /src/chrome_lens_py/utils/font_manager.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | from typing import Optional, Union 5 | 6 | from PIL import ImageFont 7 | 8 | from ..constants import ( 9 | DEFAULT_FONT_PATH_LINUX, 10 | DEFAULT_FONT_PATH_MACOS, 11 | DEFAULT_FONT_PATH_WINDOWS, 12 | DEFAULT_FONT_SIZE_OVERLAY, 13 | ) 14 | from ..exceptions import LensFontError 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | FontType = Union[ImageFont.FreeTypeFont, ImageFont.ImageFont] 19 | 20 | 21 | def get_default_system_font_path() -> Optional[str]: 22 | if sys.platform.startswith("win"): 23 | font_path = os.path.join( 24 | os.environ.get("SystemRoot", "C:\\Windows"), 25 | "Fonts", 26 | DEFAULT_FONT_PATH_WINDOWS, 27 | ) 28 | if os.path.exists(font_path): 29 | return font_path 30 | elif sys.platform == "darwin": 31 | potential_paths = [ 32 | f"/System/Library/Fonts/Supplemental/{DEFAULT_FONT_PATH_MACOS}", 33 | f"/Library/Fonts/{DEFAULT_FONT_PATH_MACOS}", 34 | DEFAULT_FONT_PATH_MACOS, 35 | ] 36 | for path in potential_paths: 37 | try: 38 | ImageFont.truetype(path, DEFAULT_FONT_SIZE_OVERLAY) 39 | return path 40 | except IOError: 41 | continue 42 | else: # Linux 43 | try: 44 | import subprocess 45 | 46 | result = subprocess.run( 47 | ["fc-match", "-f", "%{file}", DEFAULT_FONT_PATH_LINUX], 48 | capture_output=True, 49 | text=True, 50 | check=False, 51 | ) 52 | if result.returncode == 0 and result.stdout.strip(): 53 | return result.stdout.strip() 54 | except (FileNotFoundError, Exception) as e: 55 | logger.debug(f"Could not find font via fc-match: {e}") 56 | return DEFAULT_FONT_PATH_LINUX 57 | 58 | logger.warning( 59 | "Could not automatically determine a default system font path. Please specify via config or --font." 60 | ) 61 | return None 62 | 63 | 64 | def get_font( 65 | font_path_override: Optional[str] = None, font_size_override: Optional[int] = None 66 | ) -> FontType: 67 | font_size = ( 68 | font_size_override 69 | if font_size_override is not None 70 | else DEFAULT_FONT_SIZE_OVERLAY 71 | ) 72 | font_path = font_path_override 73 | 74 | if not font_path: 75 | font_path = get_default_system_font_path() 76 | if font_path: 77 | logger.debug(f"Using system default font: {font_path}") 78 | else: 79 | logger.warning( 80 | "No font path specified and system default not found. Pillow will use its built-in default font." 81 | ) 82 | try: 83 | return ImageFont.load_default() 84 | except Exception as e: 85 | logger.error(f"Error loading Pillow's default font: {e}") 86 | raise LensFontError(f"Error loading Pillow's default font: {e}") 87 | 88 | if not font_path: 89 | logger.error("Font path is not defined. Cannot load font.") 90 | raise LensFontError("The path to the font is not defined.") 91 | 92 | try: 93 | logger.debug(f"Attempting to load font: '{font_path}' with size {font_size}") 94 | return ImageFont.truetype(font_path, font_size) 95 | except IOError: 96 | logger.error( 97 | f"Font file not found or cannot be read: {font_path}. Pillow will try its default." 98 | ) 99 | try: 100 | return ImageFont.load_default() 101 | except Exception as e: 102 | logger.error( 103 | f"Critical: Could not load specified font '{font_path}' nor Pillow's default font: {e}" 104 | ) 105 | raise LensFontError( 106 | f"Failed to load the '{font_path}' font or the default Pillow font: {e}" 107 | ) 108 | except Exception as e: 109 | logger.error(f"Unexpected error loading font '{font_path}': {e}", exc_info=True) 110 | raise LensFontError(f"Unexpected error while loading font '{font_path}': {e}") 111 | -------------------------------------------------------------------------------- /src/chrome_lens_py/core/protobuf_builder.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import random 3 | from typing import TYPE_CHECKING, Optional, Tuple 4 | 5 | from ..constants import ( 6 | DEFAULT_CLIENT_REGION, 7 | DEFAULT_CLIENT_TIME_ZONE, 8 | DEFAULT_OCR_LANG, 9 | ) 10 | from ..exceptions import LensProtobufError 11 | 12 | if TYPE_CHECKING: 13 | from ..utils.lens_betterproto import ( 14 | AppliedFilter, 15 | AppliedFilters, 16 | AppliedFilterTranslate, 17 | ImageData, 18 | ImageMetadata, 19 | ImagePayload, 20 | LensOverlayClientContext, 21 | LensOverlayClusterInfo, 22 | LensOverlayFilterType, 23 | LensOverlayObjectsRequest, 24 | LensOverlayRequestContext, 25 | LensOverlayRequestId, 26 | LensOverlayRoutingInfo, 27 | LensOverlayServerRequest, 28 | LocaleContext, 29 | Platform, 30 | Surface, 31 | ) 32 | else: 33 | from ..utils.lens_betterproto import ( 34 | AppliedFilter, 35 | AppliedFilters, 36 | AppliedFilterTranslate, 37 | ImageData, 38 | ImageMetadata, 39 | ImagePayload, 40 | LensOverlayClientContext, 41 | LensOverlayClusterInfo, 42 | LensOverlayFilterType, 43 | LensOverlayObjectsRequest, 44 | LensOverlayRequestContext, 45 | LensOverlayRequestId, 46 | LensOverlayRoutingInfo, 47 | LensOverlayServerRequest, 48 | LocaleContext, 49 | Platform, 50 | Surface, 51 | ) 52 | 53 | logger = logging.getLogger(__name__) 54 | 55 | 56 | def create_ocr_translate_request( 57 | image_bytes: bytes, 58 | width: int, 59 | height: int, 60 | ocr_language: str, 61 | target_translation_language: Optional[str] = None, 62 | source_translation_language: Optional[str] = None, 63 | client_region: Optional[str] = None, 64 | client_time_zone: Optional[str] = None, 65 | session_uuid: Optional[int] = None, 66 | sequence_id: int = 1, 67 | image_sequence_id: int = 1, 68 | routing_info: Optional["LensOverlayRoutingInfo"] = None, 69 | ) -> Tuple[bytes, int]: 70 | try: 71 | server_request = LensOverlayServerRequest() 72 | objects_request = LensOverlayObjectsRequest() 73 | request_context = LensOverlayRequestContext() 74 | 75 | uuid_to_use = ( 76 | session_uuid 77 | if session_uuid is not None 78 | else random.randint(0, (1 << 63) - 1) 79 | ) 80 | if session_uuid is None: 81 | logger.debug( 82 | f"ProtobufBuilder: No session_uuid provided, generated new one: {uuid_to_use}" 83 | ) 84 | else: 85 | logger.debug(f"ProtobufBuilder: Using provided session_uuid: {uuid_to_use}") 86 | 87 | request_id_obj = LensOverlayRequestId( 88 | uuid=uuid_to_use, 89 | sequence_id=sequence_id, 90 | image_sequence_id=image_sequence_id, 91 | ) 92 | if routing_info: 93 | request_id_obj.routing_info = routing_info 94 | request_context.request_id = request_id_obj 95 | 96 | effective_client_region = ( 97 | client_region if client_region is not None else DEFAULT_CLIENT_REGION 98 | ) 99 | effective_client_time_zone = ( 100 | client_time_zone 101 | if client_time_zone is not None 102 | else DEFAULT_CLIENT_TIME_ZONE 103 | ) 104 | 105 | locale_ctx = LocaleContext( 106 | language=ocr_language, 107 | region=effective_client_region, 108 | time_zone=effective_client_time_zone, 109 | ) 110 | client_ctx = LensOverlayClientContext( 111 | platform=Platform.WEB, surface=Surface.CHROMIUM, locale_context=locale_ctx 112 | ) 113 | 114 | if target_translation_language: 115 | translate_options = AppliedFilterTranslate( 116 | target_language=target_translation_language 117 | ) 118 | if source_translation_language: 119 | translate_options.source_language = source_translation_language 120 | 121 | applied_filter_translate = AppliedFilter( 122 | filter_type=LensOverlayFilterType.TRANSLATE, translate=translate_options 123 | ) 124 | client_ctx.client_filters = AppliedFilters( 125 | filter=[applied_filter_translate] 126 | ) 127 | 128 | request_context.client_context = client_ctx 129 | objects_request.request_context = request_context 130 | 131 | image_payload_obj = ImagePayload(image_bytes=image_bytes) 132 | image_metadata_obj = ImageMetadata(width=width, height=height) 133 | image_data_obj = ImageData( 134 | payload=image_payload_obj, image_metadata=image_metadata_obj 135 | ) 136 | objects_request.image_data = image_data_obj 137 | server_request.objects_request = objects_request 138 | 139 | protobuf_payload_bytes = bytes(server_request) 140 | logger.debug( 141 | "Protobuf request created. UUID: %s, SeqID: %s, ImgSeqID: %s, Size: %d bytes.", 142 | uuid_to_use, 143 | sequence_id, 144 | image_sequence_id, 145 | len(protobuf_payload_bytes), 146 | ) 147 | return protobuf_payload_bytes, uuid_to_use 148 | 149 | except TypeError as te: 150 | logger.error(f"TypeError during Protobuf request creation: {te}", exc_info=True) 151 | raise LensProtobufError( 152 | f"Type error when creating a Protobuf request: {te}" 153 | ) from te 154 | except Exception as e: 155 | logger.error(f"Error creating Protobuf request: {e}", exc_info=True) 156 | raise LensProtobufError(f"Error while creating Protobuf request: {e}") from e 157 | -------------------------------------------------------------------------------- /src/chrome_lens_py/utils/config_manager.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from typing import Any, Dict, Optional 5 | 6 | from ..constants import APP_NAME_FOR_CONFIG, DEFAULT_CONFIG_FILENAME 7 | from ..exceptions import LensConfigError 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def get_default_config_dir(app_name: str = APP_NAME_FOR_CONFIG) -> str: 13 | """Returns the default configuration directory path for the application.""" 14 | home_dir = os.path.expanduser("~") 15 | # This structure is a common convention 16 | config_dir_base = os.getenv("XDG_CONFIG_HOME", os.path.join(home_dir, ".config")) 17 | return os.path.join(config_dir_base, app_name) 18 | 19 | 20 | def load_config(config_file_path: str) -> Dict[str, Any]: 21 | """ 22 | Loads configuration from a JSON file. 23 | Returns an empty dictionary if the file is not found. 24 | Raises LensConfigError on parsing or I/O errors. 25 | """ 26 | if os.path.isfile(config_file_path): 27 | try: 28 | with open(config_file_path, "r", encoding="utf-8") as f: 29 | return json.load(f) 30 | except json.JSONDecodeError as e: 31 | raise LensConfigError( 32 | f"Error decoding JSON from config file '{config_file_path}': {e}" 33 | ) 34 | except IOError as e: 35 | raise LensConfigError( 36 | f"I/O error reading config file '{config_file_path}': {e}" 37 | ) 38 | return {} 39 | 40 | 41 | def get_effective_config_value( 42 | cli_arg_value: Optional[Any], config_file_value: Optional[Any], default_value: Any 43 | ) -> Any: 44 | """Determines the effective configuration value. Priority: CLI > Config File > Default.""" 45 | if cli_arg_value is not None: 46 | return cli_arg_value 47 | if config_file_value is not None: 48 | return config_file_value 49 | return default_value 50 | 51 | 52 | def build_app_config( 53 | cli_args: Optional[Dict[str, Any]] = None, config_file_path: Optional[str] = None 54 | ) -> Dict[str, Any]: 55 | """ 56 | Builds the final application config by merging values from CLI args and a config file. 57 | """ 58 | cli = cli_args or {} 59 | loaded_config = load_config(config_file_path) if config_file_path else {} 60 | 61 | if loaded_config: 62 | logging.info("Applying settings from config file:") 63 | for key, value in loaded_config.items(): 64 | if key.lower() not in ["api_key", "proxy"]: 65 | logging.info(f" - {key}: {value}") 66 | 67 | # Priority: CLI > Config File > Default (handled by get_effective_config_value) 68 | # Defaults are defined in constants.py or as literals here. 69 | from ..constants import ( 70 | DEFAULT_API_KEY, 71 | DEFAULT_CLIENT_REGION, 72 | DEFAULT_CLIENT_TIME_ZONE, 73 | ) 74 | 75 | final_config = { 76 | "api_key": get_effective_config_value( 77 | cli.get("api_key"), loaded_config.get("api_key"), DEFAULT_API_KEY 78 | ), 79 | "client_region": get_effective_config_value( 80 | cli.get("client_region"), 81 | loaded_config.get("client_region"), 82 | DEFAULT_CLIENT_REGION, 83 | ), 84 | "client_time_zone": get_effective_config_value( 85 | cli.get("client_time_zone"), 86 | loaded_config.get("client_time_zone"), 87 | DEFAULT_CLIENT_TIME_ZONE, 88 | ), 89 | "proxy": get_effective_config_value( 90 | cli.get("proxy"), loaded_config.get("proxy"), None 91 | ), 92 | "timeout": int( 93 | get_effective_config_value( 94 | cli.get("timeout"), loaded_config.get("timeout"), 60 95 | ) 96 | ), 97 | "font_path": get_effective_config_value( 98 | cli.get("font_path"), loaded_config.get("font_path"), None 99 | ), 100 | "font_size": ( 101 | int( 102 | get_effective_config_value( 103 | cli.get("font_size"), loaded_config.get("font_size"), 20 104 | ) 105 | ) 106 | if get_effective_config_value( 107 | cli.get("font_size"), loaded_config.get("font_size"), None 108 | ) 109 | is not None 110 | else None 111 | ), 112 | "logging_level": get_effective_config_value( 113 | cli.get("logging_level"), loaded_config.get("logging_level"), "WARNING" 114 | ).upper(), 115 | "ocr_preserve_line_breaks": get_effective_config_value( 116 | cli.get("ocr_preserve_line_breaks"), 117 | loaded_config.get("ocr_preserve_line_breaks"), 118 | True, 119 | ), 120 | } 121 | return final_config 122 | 123 | 124 | def update_config_file_from_cli(cli_args: Dict[str, Any], config_file_path: str): 125 | """Updates the config file with values from CLI args (only safe fields).""" 126 | current_config = load_config(config_file_path) 127 | 128 | fields_to_update = [ 129 | "client_region", 130 | "client_time_zone", 131 | "proxy", 132 | "timeout", 133 | "font_path", 134 | "font_size", 135 | "logging_level", 136 | "ocr_preserve_line_breaks", 137 | ] 138 | updated = False 139 | for field in fields_to_update: 140 | cli_value = cli_args.get(field) 141 | if cli_value is not None and current_config.get(field) != cli_value: 142 | current_config[field] = cli_value 143 | updated = True 144 | 145 | if not updated: 146 | logging.info("No configuration changes to save from CLI arguments.") 147 | return 148 | 149 | config_dir = os.path.dirname(config_file_path) 150 | try: 151 | if not os.path.exists(config_dir): 152 | os.makedirs(config_dir) 153 | with open(config_file_path, "w", encoding="utf-8") as f: 154 | json.dump(current_config, f, indent=4, ensure_ascii=False) 155 | logging.info(f"Configuration file updated: {config_file_path}") 156 | except (IOError, TypeError) as e: 157 | raise LensConfigError(f"Error saving config file '{config_file_path}': {e}") 158 | -------------------------------------------------------------------------------- /src/chrome_lens_py/core/request_handler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union 3 | 4 | import httpx 5 | 6 | from ..constants import DEFAULT_HEADERS, LENS_CRUPLOAD_ENDPOINT 7 | from ..exceptions import LensAPIError, LensProtobufError 8 | 9 | if TYPE_CHECKING: 10 | from ..utils.lens_betterproto import ( 11 | LensOverlayClusterInfo, 12 | LensOverlayRoutingInfo, 13 | LensOverlayServerResponse, 14 | ) 15 | else: 16 | from ..utils.lens_betterproto import ( 17 | LensOverlayClusterInfo, 18 | LensOverlayRoutingInfo, 19 | LensOverlayServerResponse, 20 | ) 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | class LensRequestHandler: 26 | def __init__( 27 | self, 28 | api_key: str, 29 | proxy: Optional[Union[str, Dict[str, httpx.AsyncBaseTransport]]] = None, 30 | timeout: int = 60, 31 | ): 32 | self.api_key = api_key 33 | self.proxy_settings: Dict[str, Any] = {} 34 | self.timeout = timeout 35 | 36 | if proxy: 37 | if isinstance(proxy, str): 38 | self.proxy_settings["proxy"] = proxy 39 | logger.info(f"Using single proxy URL: {proxy}") 40 | elif isinstance(proxy, dict): 41 | self.proxy_settings["mounts"] = proxy 42 | logger.info(f"Using proxy mounts configuration: {proxy}") 43 | else: 44 | logger.warning( 45 | f"Invalid proxy type: {type(proxy)}. Proxy will not be used." 46 | ) 47 | 48 | self.current_session_uuid: Optional[int] = None 49 | self.current_sequence_id: int = 0 50 | self.current_image_sequence_id: int = 0 51 | self.last_cluster_info: Optional["LensOverlayClusterInfo"] = None 52 | 53 | def _get_headers(self) -> dict: 54 | headers = DEFAULT_HEADERS.copy() 55 | headers["X-Goog-Api-Key"] = self.api_key 56 | return headers 57 | 58 | def start_new_session(self): 59 | self.current_session_uuid = None 60 | self.current_sequence_id = 0 61 | self.current_image_sequence_id = 0 62 | self.last_cluster_info = None 63 | logger.info("LensRequestHandler: New session initiated (state reset).") 64 | 65 | def get_next_sequence_ids_for_request( 66 | self, is_new_image_payload: bool 67 | ) -> Tuple[Optional[int], int, int]: 68 | self.current_sequence_id += 1 69 | if is_new_image_payload: 70 | self.current_image_sequence_id += 1 71 | 72 | logger.debug( 73 | f"RequestHandler: Providing IDs for request: " 74 | f"SessionUUID (current): {self.current_session_uuid}, " 75 | f"Next SeqID: {self.current_sequence_id}, " 76 | f"Next ImgSeqID: {self.current_image_sequence_id} (is_new_image: {is_new_image_payload})" 77 | ) 78 | return ( 79 | self.current_session_uuid, 80 | self.current_sequence_id, 81 | self.current_image_sequence_id, 82 | ) 83 | 84 | async def send_request( 85 | self, protobuf_payload: bytes, request_uuid_used: int 86 | ) -> "LensOverlayServerResponse": 87 | headers = self._get_headers() 88 | 89 | if self.current_session_uuid is None: 90 | self.current_session_uuid = request_uuid_used 91 | logger.info( 92 | f"RequestHandler: Session UUID initialized by this request: {self.current_session_uuid}" 93 | ) 94 | 95 | logger.info( 96 | "Sending request to %s (UUID: %s, SeqID: %s) with payload size: %d bytes.", 97 | LENS_CRUPLOAD_ENDPOINT, 98 | self.current_session_uuid, 99 | self.current_sequence_id, 100 | len(protobuf_payload), 101 | ) 102 | 103 | response_bytes = b"" 104 | async with httpx.AsyncClient(**self.proxy_settings, http2=True) as client: 105 | try: 106 | response = await client.post( 107 | LENS_CRUPLOAD_ENDPOINT, 108 | content=protobuf_payload, 109 | headers=headers, 110 | timeout=self.timeout, 111 | ) 112 | logger.debug(f"Response status: {response.status_code}") 113 | response_bytes = await response.aread() 114 | response.raise_for_status() 115 | 116 | logger.debug(f"Response content length: {len(response_bytes)} bytes.") 117 | 118 | server_response_proto = LensOverlayServerResponse().parse( 119 | response_bytes 120 | ) 121 | 122 | if ( 123 | server_response_proto.error 124 | and server_response_proto.error.error_type != 0 125 | ): 126 | error_msg = f"Lens API server error. Type: {server_response_proto.error.error_type}" 127 | logger.error(error_msg) 128 | raise LensAPIError( 129 | error_msg, 130 | status_code=response.status_code, 131 | response_body=response_bytes.decode(errors="replace"), 132 | ) 133 | 134 | if ( 135 | server_response_proto.objects_response 136 | and server_response_proto.objects_response.cluster_info 137 | ): 138 | self.last_cluster_info = ( 139 | server_response_proto.objects_response.cluster_info 140 | ) 141 | if ( 142 | self.last_cluster_info 143 | and self.last_cluster_info.server_session_id 144 | ): 145 | logger.debug( 146 | f"RequestHandler: Updated last_cluster_info. ServerSessionID: {self.last_cluster_info.server_session_id}, " 147 | f"RoutingInfo available: {bool(self.last_cluster_info.routing_info)}" 148 | ) 149 | else: 150 | self.last_cluster_info = None 151 | logger.debug( 152 | "RequestHandler: No cluster_info in response or no objects_response." 153 | ) 154 | 155 | return server_response_proto 156 | 157 | except httpx.HTTPStatusError as e_http: 158 | response_text_content = e_http.response.text 159 | logger.error( 160 | f"HTTP error: {e_http.response.status_code} - {response_text_content[:500]}", 161 | exc_info=True, 162 | ) 163 | raise LensAPIError( 164 | f"HTTP ошибка: {e_http.response.status_code}", 165 | status_code=e_http.response.status_code, 166 | response_body=response_text_content, 167 | ) from e_http 168 | except httpx.RequestError as e_req: 169 | logger.error( 170 | f"Request error (possibly proxy-related): {e_req}", exc_info=True 171 | ) 172 | raise LensAPIError( 173 | f"Ошибка сети или запроса (возможно, связана с прокси): {e_req}" 174 | ) from e_req 175 | except (LensProtobufError, ValueError) as e_parse: 176 | logger.error( 177 | f"Error parsing Protobuf response: {e_parse}", exc_info=True 178 | ) 179 | try: 180 | decoded_for_error = response_bytes.decode(errors="replace") 181 | except AttributeError: 182 | decoded_for_error = str(response_bytes) 183 | raise LensProtobufError( 184 | f"Protobuf response parsing error: {e_parse}", 185 | response_body=decoded_for_error, 186 | ) from e_parse 187 | except Exception as e_gen: 188 | logger.error(f"Unexpected error during request: {e_gen}", exc_info=True) 189 | raise LensAPIError( 190 | f"Unexpected error while executing the request: {e_gen}" 191 | ) 192 | -------------------------------------------------------------------------------- /src/chrome_lens_py/core/image_processor.py: -------------------------------------------------------------------------------- 1 | import io 2 | import logging 3 | import math 4 | from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple 5 | 6 | import httpx 7 | import numpy as np 8 | from PIL import Image, ImageDraw, ImageFile, ImageFont 9 | 10 | from ..constants import DEFAULT_IMAGE_MAX_DIMENSION 11 | from ..exceptions import LensImageError 12 | from ..utils.font_manager import FontType 13 | from ..utils.general import is_url 14 | 15 | if TYPE_CHECKING: 16 | from ..utils.lens_betterproto import CenterRotatedBox, CoordinateType 17 | else: 18 | from ..utils.lens_betterproto import CenterRotatedBox, CoordinateType 19 | 20 | ImageFile.LOAD_TRUNCATED_IMAGES = True 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | async def _get_pil_from_source(image_source: Any) -> Image.Image: 25 | """ 26 | Takes any supported source and returns a PIL.Image object. 27 | Raises LensImageError if the source is unsupported or an error occurs. 28 | """ 29 | if isinstance(image_source, Image.Image): 30 | logger.debug("Processing PIL.Image object source.") 31 | return image_source.copy() 32 | 33 | if isinstance(image_source, str): 34 | if is_url(image_source): 35 | logger.debug(f"Processing URL source: {image_source}") 36 | try: 37 | async with httpx.AsyncClient(timeout=30) as client: 38 | response = await client.get(image_source, follow_redirects=True) 39 | response.raise_for_status() 40 | return Image.open(io.BytesIO(response.content)) 41 | except httpx.RequestError as e: 42 | raise LensImageError( 43 | f"Network error downloading URL '{image_source}': {e}" 44 | ) from e 45 | except Exception as e: 46 | raise LensImageError( 47 | f"Error processing URL '{image_source}': {e}" 48 | ) from e 49 | else: # It's a file path 50 | logger.debug(f"Processing file path source: {image_source}") 51 | try: 52 | return Image.open(image_source) 53 | except FileNotFoundError: 54 | raise LensImageError(f"File not found at path: {image_source}") 55 | except Exception as e: 56 | raise LensImageError( 57 | f"Error opening file path '{image_source}': {e}" 58 | ) from e 59 | 60 | if isinstance(image_source, np.ndarray): 61 | logger.debug("Processing NumPy array source.") 62 | try: 63 | return Image.fromarray(image_source) 64 | except Exception as e: 65 | raise LensImageError(f"Error converting NumPy array to image: {e}") from e 66 | 67 | if isinstance(image_source, bytes): 68 | logger.debug("Processing bytes source.") 69 | try: 70 | return Image.open(io.BytesIO(image_source)) 71 | except Exception as e: 72 | raise LensImageError(f"Error opening image from bytes: {e}") from e 73 | 74 | raise LensImageError(f"Unsupported image source type: {type(image_source)}") 75 | 76 | 77 | def _resize_and_serialize_pil_image(pil_image: Image.Image) -> Tuple[bytes, int, int]: 78 | """Resizes (if necessary) and serializes a PIL.Image to PNG bytes.""" 79 | if pil_image.mode != "RGBA": 80 | pil_image = pil_image.convert("RGBA") 81 | 82 | if ( 83 | pil_image.width > DEFAULT_IMAGE_MAX_DIMENSION 84 | or pil_image.height > DEFAULT_IMAGE_MAX_DIMENSION 85 | ): 86 | pil_image.thumbnail( 87 | (DEFAULT_IMAGE_MAX_DIMENSION, DEFAULT_IMAGE_MAX_DIMENSION), 88 | Image.Resampling.LANCZOS, 89 | ) 90 | 91 | img_byte_arr = io.BytesIO() 92 | pil_image.save(img_byte_arr, format="PNG") 93 | 94 | return img_byte_arr.getvalue(), pil_image.width, pil_image.height 95 | 96 | 97 | async def prepare_image_for_api( 98 | image_source: Any, 99 | ) -> Tuple[bytes, int, int, Image.Image]: 100 | """ 101 | Main preparation function. Takes any source, processes it, and returns API-ready data and the original image. 102 | """ 103 | try: 104 | pil_image = await _get_pil_from_source(image_source) 105 | original_pil_image = pil_image.copy() 106 | img_bytes, width, height = _resize_and_serialize_pil_image(pil_image) 107 | return img_bytes, width, height, original_pil_image 108 | except LensImageError as e: 109 | raise e 110 | except Exception as e: 111 | raise LensImageError( 112 | f"An unexpected error occurred during image preparation: {e}" 113 | ) from e 114 | 115 | 116 | def get_word_geometry_data(box: "CenterRotatedBox") -> Optional[Dict[str, Any]]: 117 | """Extracts detailed, user-friendly geometry data from a CenterRotatedBox object.""" 118 | if not (hasattr(box, "center_x") and hasattr(box, "center_y")): 119 | return None 120 | 121 | angle_rad = getattr(box, "rotation_z", 0.0) 122 | angle_deg = math.degrees(angle_rad) 123 | 124 | coord_type_enum = getattr(box, "coordinate_type", 0) 125 | coord_type_str = "NORMALIZED" if coord_type_enum == 1 else "IMAGE" 126 | 127 | return { 128 | "center_x": box.center_x, 129 | "center_y": box.center_y, 130 | "width": getattr(box, "width", 0.0), 131 | "height": getattr(box, "height", 0.0), 132 | "angle_deg": angle_deg, 133 | "coordinate_type": coord_type_str, 134 | } 135 | 136 | 137 | def draw_overlay_on_image( 138 | original_image: Image.Image, 139 | ocr_boxes_norm: list[Tuple[float, float, float, float]], 140 | translated_text: Optional[str], 141 | font: FontType, 142 | fill_color: str = "white", 143 | text_color: str = "black", 144 | ) -> Image.Image: 145 | """Draws an overlay on the image: fills OCR areas and writes translated text.""" 146 | img_draw = original_image.copy() 147 | if img_draw.mode != "RGBA": 148 | img_draw = img_draw.convert("RGBA") 149 | draw = ImageDraw.Draw(img_draw) 150 | img_width, img_height = img_draw.size 151 | 152 | if not ocr_boxes_norm: 153 | return img_draw 154 | 155 | for norm_x1, norm_y1, norm_x2, norm_y2 in ocr_boxes_norm: 156 | draw.rectangle( 157 | ( 158 | int(norm_x1 * img_width), 159 | int(norm_y1 * img_height), 160 | int(norm_x2 * img_width), 161 | int(norm_y2 * img_height), 162 | ), 163 | fill=fill_color, 164 | ) 165 | 166 | if not translated_text: 167 | return img_draw 168 | 169 | overall_ocr_min_x = min(b[0] for b in ocr_boxes_norm) 170 | overall_ocr_min_y = min(b[1] for b in ocr_boxes_norm) 171 | overall_ocr_max_x = max(b[2] for b in ocr_boxes_norm) 172 | overall_ocr_max_y = max(b[3] for b in ocr_boxes_norm) 173 | 174 | px_overall_x1 = int(overall_ocr_min_x * img_width) 175 | px_overall_y1 = int(overall_ocr_min_y * img_height) 176 | px_overall_x2 = int(overall_ocr_max_x * img_width) 177 | px_overall_y2 = int(overall_ocr_max_y * img_height) 178 | 179 | overlay_width_px = px_overall_x2 - px_overall_x1 180 | if overlay_width_px <= 0: 181 | return img_draw 182 | 183 | padding = 4 184 | available_width_for_text = overlay_width_px - 2 * padding 185 | if available_width_for_text <= 0: 186 | return img_draw 187 | 188 | lines_to_draw = [] 189 | current_line = "" 190 | for word in translated_text.split(): 191 | test_line = f"{current_line} {word}".strip() 192 | try: 193 | line_width = draw.textlength(test_line, font=font) 194 | except AttributeError: 195 | bbox = draw.textbbox((0, 0), test_line, font=font) 196 | line_width = bbox[2] - bbox[0] 197 | 198 | if line_width <= available_width_for_text: 199 | current_line = test_line 200 | else: 201 | if current_line: 202 | lines_to_draw.append(current_line) 203 | current_line = word 204 | if current_line: 205 | lines_to_draw.append(current_line) 206 | 207 | current_y = px_overall_y1 + padding 208 | line_spacing = 2 209 | for line_str in lines_to_draw: 210 | try: 211 | bbox = draw.textbbox((0, 0), line_str, font=font) 212 | line_height = bbox[3] - bbox[1] 213 | line_width = bbox[2] - bbox[0] 214 | 215 | if current_y + line_height > px_overall_y2 - padding: 216 | break 217 | 218 | pos_x = px_overall_x1 + (overlay_width_px - line_width) / 2 219 | draw.text( 220 | (pos_x, current_y), 221 | line_str, 222 | fill=text_color, 223 | font=font, 224 | ) 225 | current_y += line_height + line_spacing 226 | except Exception as e: 227 | logger.warning(f"Could not draw line '{line_str}': {e}") 228 | if hasattr(font, "size"): 229 | line_height = font.size # type: ignore [attr-defined] 230 | else: 231 | line_height = 12 232 | current_y += line_height + line_spacing 233 | continue 234 | 235 | return img_draw 236 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Chrome Lens API for Python 2 | 3 | **English** | [Русский](/README_RU.md) 4 | 5 | [![PyPI version](https://badge.fury.io/py/chrome-lens-py.svg)](https://badge.fury.io/py/chrome-lens-py) 6 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 7 | [![Python versions](https://img.shields.io/pypi/pyversions/chrome-lens-py.svg)](https://pypi.org/project/chrome-lens-py) 8 | [![Downloads](https://static.pepy.tech/badge/chrome-lens-py)](https://pepy.tech/project/chrome-lens-py) 9 | 10 | > [!IMPORTANT] 11 | > **Major Rewrite (Version 3.1.0+)** 12 | > This library has been completely rewritten from the ground up. It now uses a modern asynchronous architecture (`async`/`await`) and communicates directly with Google's Protobuf endpoint for significantly improved reliability and performance. 13 | > 14 | > **Please update your projects accordingly. All API calls are now `async`.** 15 | > 16 | 17 | > [!Warning] 18 | > Also, please note that the library has been completely rewritten, and I could have missed something, or not spelled it out. If you notice an error, please let me know in Issues 19 | 20 | This project provides a powerful, asynchronous Python library and command-line tool for interacting with Google Lens. It allows you to perform advanced Optical Character Recognition (OCR), get segmented text blocks (e.g., for comics), translate text, and get precise word coordinates. 21 | 22 | ## ✨ Key Features 23 | 24 | - **Modern Backend**: Utilizes Google's official Protobuf endpoint (`v1/crupload`) for robust and accurate results. 25 | - **Asynchronous & Safe**: Built with `asyncio` and `httpx`. Includes a built-in semaphore to prevent API abuse and IP bans from excessive concurrent requests. 26 | - **Powerful OCR & Segmentation**: 27 | - Extract text from images as a single string. 28 | - Get text segmented into logical blocks (paragraphs, dialog bubbles) with their own coordinates. 29 | - Get individual text lines with their own precise geometry. 30 | - **Built-in Translation**: Instantly translate recognized text into any supported language. 31 | - **Versatile Image Sources**: Process images from a **file path**, **URL**, **bytes**, **PIL Image** object, or **NumPy array**. 32 | - **Text Overlay**: Automatically generate and save images with the translated text rendered over them(works poorly, alas, no time to do better). 33 | - **Feature-Rich CLI**: A simple yet powerful command-line interface (`lens_scan`) for quick use. 34 | - **Proxy Support**: Full support for HTTP, HTTPS, and SOCKS proxies. 35 | - **Clipboard Integration**: Instantly copy OCR or translation results to your clipboard with the `--sharex` flag. 36 | - **Flexible Configuration**: Manage settings via a `config.json` file, CLI arguments, or environment variables. 37 | 38 | ## 🚀 Installation 39 | 40 | You can install the package using `pip`: 41 | 42 | ```bash 43 | pip install chrome-lens-py 44 | ``` 45 | 46 | To enable clipboard functionality (the `--sharex` flag), install the library with the `[clipboard]` extra: 47 | 48 | ```bash 49 | pip install "chrome-lens-py[clipboard]" 50 | ``` 51 | 52 | Or, install the latest version directly from GitHub: 53 | ```bash 54 | pip install git+https://github.com/bropines/chrome-lens-py.git 55 | ``` 56 | 57 | ## 🚀 Usage 58 | 59 |
60 | 🛠️ CLI Usage (`lens_scan`) 61 | 62 | The command-line tool provides quick access to the library's features directly from your terminal. 63 | 64 | ```bash 65 | lens_scan [ocr_lang] [options] 66 | ``` 67 | 68 | - **``**: Path to a local image file or an image URL. 69 | - **`[ocr_lang]`** (optional): BCP 47 language code for OCR (e.g., 'en', 'ja'). If omitted, the API will attempt to auto-detect the language. 70 | 71 | #### **Options** 72 | 73 | | Flag | Alias | Description | 74 | | :--- | :--- | :--- | 75 | | `--translate ` | `-t` | **Translate** the OCR text to the target language code (e.g., `en`, `ru`). | 76 | | `--translate-from ` | | Specify the source language for translation (otherwise auto-detected). | 77 | | `--translate-out ` | `-to` | **Save** the image with the translated text overlaid to the specified file path. | 78 | | `--output-blocks` | `-b` | **Output OCR text as segmented blocks** (useful for comics). Incompatible with `--get-coords` and `--output-lines`.| 79 | | `--output-lines` | `-ol` | **Output OCR text as individual lines** with their geometry. Incompatible with `--output-blocks` and `--get-coords`.| 80 | | `--get-coords` | | Output recognized words and their coordinates in JSON format. Incompatible with `--output-blocks` and `--output-lines`. | 81 | | `--sharex` | `-sx` | **Copy** the result (translation or OCR) to the clipboard. | 82 | | `--ocr-single-line` | | Join all recognized OCR text into a single line, removing line breaks. | 83 | | `--config-file `| | Path to a custom JSON configuration file. | 84 | | `--update-config` | | Update the default config file with settings from the current command. | 85 | | `--font ` | | Path to a `.ttf` font file for the text overlay. | 86 | | `--font-size ` | | Font size for the text overlay (default: 20). | 87 | | `--proxy ` | | Proxy server URL (e.g., `socks5://127.0.0.1:9050`). | 88 | | `--logging-level `| `-l` | Set logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`). | 89 | | `--help` | `-h` | Show this help message and exit. | 90 | 91 | #### **Examples** 92 | 93 | **1. Basic OCR and Translation** 94 | 95 | Auto-detects the source language on the image and translates it to English. This is the most common use case. 96 | ```bash 97 | lens_scan "path/to/your/image.png" -t en 98 | ``` 99 | 100 | --- 101 | 102 | **2. Get Segmented Text Blocks (for Comics/Manga)** 103 | 104 | Ideal for images with multiple, separate text boxes. This command outputs each recognized text block individually, making it perfect for translating comics or complex documents. 105 | ```bash 106 | lens_scan "path/to/manga.jpg" ja -b 107 | ``` 108 | - `-b` is the alias for `--output-blocks`. 109 | 110 | --- 111 | 112 | **3. Get Individual Text Lines** 113 | 114 | Outputs each recognized line of text along with its geometry. 115 | ```bash 116 | lens_scan "path/to/document.png" --output-lines 117 | ``` 118 | - `-ol` is the alias for `--output-lines`. 119 | 120 | --- 121 | 122 | **4. Get Coordinates of All Individual Words** 123 | 124 | Outputs a detailed JSON array containing every single recognized word and its precise geometric data (center, size, angle). Useful for programmatic analysis or custom overlays. 125 | ```bash 126 | lens_scan "path/to/diagram.png" --get-coords 127 | ``` 128 | 129 | --- 130 | 131 | **5. Translate, Save Overlay, and Copy to Clipboard** 132 | 133 | A power-user workflow. This command will: 134 | 1. OCR a Japanese image. 135 | 2. Translate it to Russian. 136 | 3. Save a new image named `translated_manga.png` with the Russian text rendered on it. 137 | 4. Copy the final translation to your clipboard. 138 | ```bash 139 | lens_scan "path/to/manga.jpg" ja -t ru -to "translated_manga.png" -sx 140 | ``` 141 | 142 | --- 143 | 144 | **6. Process an Image from a URL as a Single Line** 145 | 146 | Fetches an image directly from a URL and joins all recognized text into one continuous line, removing any line breaks. 147 | ```bash 148 | lens_scan "https://i.imgur.com/VPd1y6b.png" en --ocr-single-line 149 | ``` 150 | 151 | --- 152 | 153 | **7. Use a SOCKS5 Proxy** 154 | 155 | All requests to the Google API will be routed through the specified proxy server, which is useful for privacy or bypassing region restrictions. 156 | ```bash 157 | lens_scan "image.png" --proxy "socks5://127.0.0.1:9050" 158 | ``` 159 | 160 |
161 | 162 |
163 | 👨‍💻 Programmatic API Usage (`LensAPI`) 164 | 165 | > [!IMPORTANT] 166 | > The `LensAPI` is fully **asynchronous**. All data retrieval methods must be called with `await` from within an `async` function. 167 | 168 | #### **Basic Example (Full Text)** 169 | 170 | ```python 171 | import asyncio 172 | from chrome_lens_py import LensAPI 173 | 174 | async def main(): 175 | # Initialize the API. You can pass a proxy, region, etc. here. 176 | # By default, an API key is not required. 177 | api = LensAPI() 178 | 179 | image_source = "path/to/your/image.png" # Or a URL, PIL Image, NumPy array 180 | 181 | try: 182 | # Process the image and get a single string of text 183 | result = await api.process_image( 184 | image_path=image_source, 185 | ocr_language="ja", 186 | target_translation_language="en" 187 | ) 188 | 189 | print("--- OCR Text ---") 190 | print(result.get("ocr_text")) 191 | 192 | print("\n--- Translated Text ---") 193 | print(result.get("translated_text")) 194 | 195 | except Exception as e: 196 | print(f"An error occurred: {e}") 197 | 198 | if __name__ == "__main__": 199 | asyncio.run(main()) 200 | ``` 201 | 202 | #### **Working with Different Image Sources** 203 | 204 | The `process_image` method seamlessly handles various input types. 205 | 206 | ```python 207 | from PIL import Image 208 | import numpy as np 209 | 210 | # ... inside an async function ... 211 | 212 | # From a URL 213 | result_url = await api.process_image("https://i.imgur.com/VPd1y6b.png") 214 | 215 | # From a PIL Image object 216 | with Image.open("path/to/image.png") as img: 217 | result_pil = await api.process_image(img) 218 | 219 | # From a NumPy array (e.g., loaded via OpenCV) 220 | with Image.open("path/to/image.png") as img: 221 | numpy_array = np.array(img) 222 | result_numpy = await api.process_image(numpy_array) 223 | ``` 224 | 225 | #### **Getting Segmented Text Blocks** 226 | 227 | To get text segmented into logical blocks (like dialog bubbles in a comic), use the `output_format='blocks'` parameter. 228 | 229 | ```python 230 | import asyncio 231 | from chrome_lens_py import LensAPI 232 | 233 | async def process_comics(): 234 | api = LensAPI() 235 | image_source = "path/to/manga.jpg" 236 | 237 | result = await api.process_image( 238 | image_path=image_source, 239 | output_format='blocks' # Get segmented blocks instead of a single string 240 | ) 241 | 242 | # The result now contains a 'text_blocks' key 243 | text_blocks = result.get("text_blocks", []) 244 | print(f"Found {len(text_blocks)} text blocks.") 245 | 246 | for i, block in enumerate(text_blocks): 247 | print(f"\n--- Block #{i+1} ---") 248 | print(block['text']) 249 | # block also contains 'lines' and 'geometry' keys 250 | 251 | asyncio.run(process_comics()) 252 | ``` 253 | 254 | #### **Getting Individual Lines and their Geometry** 255 | 256 | To get each recognized line of text as a separate item, use the `output_format='lines'` parameter. 257 | 258 | ```python 259 | import asyncio 260 | from chrome_lens_py import LensAPI 261 | 262 | async def process_document_lines(): 263 | api = LensAPI() 264 | image_source = "path/to/document.png" 265 | 266 | result = await api.process_image( 267 | image_path=image_source, 268 | output_format='lines' # Get individual lines with their geometry 269 | ) 270 | 271 | # The result now contains a 'line_blocks' key 272 | line_blocks = result.get("line_blocks", []) 273 | print(f"Found {len(line_blocks)} lines.") 274 | 275 | for i, line in enumerate(line_blocks): 276 | print(f"\n--- Line #{i+1} ---") 277 | print(f"Text: {line['text']}") 278 | print(f"Geometry: {line['geometry']}") 279 | 280 | asyncio.run(process_document_lines()) 281 | ``` 282 | 283 | #### **Getting Fully Detailed Text Structures** 284 | 285 | To get a complete, nested structure of paragraphs, lines, and words with geometry at each level, use `output_format='detailed'`. 286 | 287 | ```python 288 | import asyncio 289 | from chrome_lens_py import LensAPI 290 | 291 | async def process_with_details(): 292 | api = LensAPI() 293 | image_source = "path/to/document.png" 294 | 295 | result = await api.process_image( 296 | image_path=image_source, 297 | output_format='detailed' # Get the fully nested structure 298 | ) 299 | 300 | # The result now contains a 'detailed_blocks' key 301 | detailed_blocks = result.get("detailed_blocks", []) 302 | print(f"Found {len(detailed_blocks)} detailed blocks.") 303 | 304 | for i, block in enumerate(detailed_blocks): 305 | print(f"\n--- Block #{i+1} ---") 306 | print(f" Geometry: {block['geometry']}") 307 | for j, line in enumerate(block['lines']): 308 | print(f" --- Line #{j+1}: '{line['text']}' ---") 309 | for k, word in enumerate(line['words']): 310 | print(f" - Word: '{word['text']}', Geometry: {word['geometry']}") 311 | 312 | asyncio.run(process_with_details()) 313 | ``` 314 | 315 | 316 | #### **`LensAPI` Constructor** 317 | 318 | ```python 319 | api = LensAPI( 320 | api_key: str = "YOUR_API_KEY_OR_DEFAULT", 321 | client_region: Optional[str] = None, 322 | client_time_zone: Optional[str] = None, 323 | proxy: Optional[str] = None, 324 | timeout: int = 60, 325 | font_path: Optional[str] = None, 326 | font_size: Optional[int] = None, 327 | max_concurrent: int = 5 328 | ) 329 | ``` 330 | 331 | #### **`process_image` Method** 332 | 333 | ```python 334 | result: dict = await api.process_image( 335 | image_path: Any, 336 | ocr_language: Optional[str] = None, 337 | target_translation_language: Optional[str] = None, 338 | source_translation_language: Optional[str] = None, 339 | output_overlay_path: Optional[str] = None, 340 | ocr_preserve_line_breaks: bool = True, 341 | output_format: Literal['full_text', 'blocks', 'lines', 'detailed'] = 'full_text' 342 | ) 343 | ``` 344 | - **`output_format`**: Controls the structure of the OCR output. `'full_text'` (default) returns a single string in `ocr_text`. `'blocks'` returns a list in `text_blocks`. `'lines'` returns a list in `line_blocks`. `'detailed'` returns a fully nested structure in `detailed_blocks`. 345 | - **`ocr_preserve_line_breaks`**: If `False` and `output_format` is `'full_text'`, joins all OCR text into a single line. 346 | 347 | **The returned `result` dictionary contains:** 348 | - `ocr_text` (Optional[str]): The full recognized text (if `output_format='full_text'`). 349 | - `text_blocks` (Optional[List[dict]]): A list of segmented text blocks (if `output_format='blocks'`). Each block is a dict with `text`, `lines`, and `geometry`. 350 | - `line_blocks` (Optional[List[dict]]): A list of individual text lines (if `output_format='lines'`). Each block is a dict with `text` and `geometry`. 351 | - `translated_text` (Optional[str]): The translated text, if requested. 352 | - `word_data` (List[dict]): A list of dictionaries for every recognized word with its geometry. 353 | - `detailed_blocks` (Optional[List[dict]]): A list of fully structured text blocks (if `output_format='detailed'`). Each block contains lines, which in turn contain words, with geometry at every level. 354 | - `raw_response_objects`: The "raw" Protobuf response object for further analysis. 355 | 356 |
357 | 358 |
359 | ⚙️ Configuration 360 | 361 | Settings are loaded with the following priority: **CLI Arguments > `config.json` File > Library Defaults**. 362 | 363 | #### **`config.json`** 364 | 365 | A `config.json` file can be placed in your system's default config directory to set persistent options. 366 | - **Linux**: `~/.config/chrome-lens-py/config.json` 367 | - **macOS**: `~/Library/Application Support/chrome-lens-py/config.json` 368 | - **Windows**: `C:\Users\\.config\chrome-lens-py\config.json` 369 | 370 | ##### **Example `config.json`** 371 | ```json 372 | { 373 | "api_key": "OPTIONAL! If you don't know what this is, I don't recommend setting it here.", 374 | "proxy": "socks5://127.0.0.1:9050", 375 | "client_region": "DE", 376 | "client_time_zone": "Europe/Berlin", 377 | "timeout": 90, 378 | "font_path": "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 379 | "ocr_preserve_line_breaks": true 380 | } 381 | ``` 382 | 383 |
384 | 385 | ## Sharex Integration 386 | Check [sharex.md](docs/sharex.md) for more information on how to use this library with ShareX. 387 | 388 | ## ❤️ Support & Acknowledgments 389 | 390 | - **OWOCR**: Greatly inspired by and based on [OWOCR](https://github.com/AuroraWright/owocr). Thank you to them for their research into Protobuf and OCR implementation. 391 | - **Chrome Lens OCR**: For the original implementation and ideas that formed the basis of this library. The update with SHAREX support was originally tested and added by me to [chrome-lens-ocr](https://github.com/dimdenGD/chrome-lens-ocr), thanks for the initial implementation and ideas. 392 | - **AI Collaboration**: A significant portion of the v3.0 code, including the architectural refactor, asynchronous implementation, and Protobuf integration, was developed in collaboration with an advanced AI assistant. 393 | - **GOOGLE**: For the convenient and high-quality Lens technology. 394 | - **Support the Author**: If you find this library useful, you can support the author - **[Boosty](https://boosty.to/pinus)** 395 | 396 | ## Star History 397 | 398 | [![Star History Chart](https://api.star-history.com/svg?repos=bropines/chrome-lens-py&type=Date)](https://www.star-history.com/#bropines/chrome-lens-py&Date) 399 | 400 | ### Disclaimer 401 | 402 | This project is intended for educational and experimental purposes only. Use of Google's services must comply with their Terms of Service. The author is not responsible for any misuse of this software. -------------------------------------------------------------------------------- /README_RU.md: -------------------------------------------------------------------------------- 1 | # Chrome Lens API для Python 2 | 3 | [English](/README.md) | **Русский** 4 | 5 | [![PyPI version](https://badge.fury.io/py/chrome-lens-py.svg)](https://badge.fury.io/py/chrome-lens-py) 6 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 7 | [![Python versions](https://img.shields.io/pypi/pyversions/chrome-lens-py.svg)](https://pypi.org/project/chrome-lens-py) 8 | [![Downloads](https://static.pepy.tech/badge/chrome-lens-py)](https://pepy.tech/project/chrome-lens-py) 9 | 10 | > [!IMPORTANT] 11 | > **Масштабное обновление (Версия 3.1.0+)** 12 | > Библиотека была полностью переписана с нуля. Теперь она использует современную асинхронную архитектуру (`async`/`await`) и взаимодействует напрямую с Protobuf эндпоинтом Google для значительно улучшенной надежности и производительности. 13 | > 14 | > **Пожалуйста, обновите ваши проекты. Все вызовы API теперь являются `async`.** 15 | 16 | > [!Warning] 17 | > Также обратите внимание, что библиотека была полностью переписана, и я мог что-то пропустить или не указать. Если вы заметили ошибку, сообщите мне в разделе "Issues" 18 | 19 | Этот проект предоставляет мощную, асинхронную Python-библиотеку и утилиту командной строки для взаимодействия с Google Lens. Она позволяет выполнять продвинутое распознавание текста (OCR), получать сегментированные текстовые блоки (например, для комиксов), переводить текст и получать точные координаты слов. 20 | 21 | ## ✨ Ключевые возможности 22 | 23 | - **Современный бэкенд**: Использует официальный Protobuf-эндпоинт (`v1/crupload`) Google для получения надежных и точных результатов. 24 | - **Асинхронность и безопасность**: Построена на `asyncio` и `httpx`. Включает встроенный семафор для предотвращения злоупотреблений API и банов IP-адресов из-за чрезмерного количества одновременных запросов. 25 | - **Мощный OCR и сегментация**: 26 | - Извлекайте текст с изображений в виде единой строки. 27 | - Получайте текст, разделенный на логические блоки (абзацы, диалоговые окна) с их собственными координатами. 28 | - Получайте отдельные строки текста с их собственной точной геометрией. 29 | - **Встроенный перевод**: Мгновенно переводите распознанный текст на любой поддерживаемый язык. 30 | - **Разные источники изображений**: Обрабатывайте изображения из **файла**, по **URL**, из **байтов**, объекта **PIL Image** или массива **NumPy**. 31 | - **Наложение текста**: Автоматически генерируйте и сохраняйте изображения с наложенным на них переводом(работает плохо, увы нет времени сделать лучше). 32 | - **Функциональный CLI**: Простой, но мощный интерфейс командной строки (`lens_scan`) для быстрого использования. 33 | - **Поддержка прокси**: Полная поддержка HTTP, HTTPS и SOCKS прокси. 34 | - **Интеграция с буфером обмена**: Мгновенно копируйте результаты OCR или перевода в буфер обмена с помощью флага `--sharex`. 35 | - **Гибкая конфигурация**: Управляйте настройками через файл `config.json`, аргументы CLI или переменные окружения. 36 | 37 | ## 🚀 Установка 38 | 39 | Вы можете установить пакет с помощью `pip`: 40 | 41 | ```bash 42 | pip install chrome-lens-py 43 | ``` 44 | 45 | Чтобы включить функцию копирования в буфер обмена (флаг `--sharex`), установите библиотеку с `[clipboard]` extra: 46 | 47 | ```bash 48 | pip install "chrome-lens-py[clipboard]" 49 | ``` 50 | 51 | Или установите последнюю версию напрямую с GitHub: 52 | ```bash 53 | pip install git+https://github.com/bropines/chrome-lens-py.git 54 | ``` 55 | ## 🚀 Использование 56 | 57 | 58 |
59 | 🛠️ Использование CLI (`lens_scan`) 60 | 61 | Утилита командной строки предоставляет быстрый доступ к возможностям библиотеки прямо из вашего терминала. 62 | 63 | ```bash 64 | lens_scan <источник_изображения> [язык_ocr] [опции] 65 | ``` 66 | 67 | - **`<источник_изображения>`**: Путь к локальному файлу или URL-адрес изображения. 68 | - **`[язык_ocr]`** (опционально): Код языка в формате BCP 47 для OCR (например, 'en', 'ja'). Если не указан, API попытается определить язык автоматически. 69 | 70 | #### **Опции** 71 | 72 | | Флаг | Алиас | Описание | 73 | | :--- | :--- | :--- | 74 | | `--translate <язык>` | `-t` | **Перевести** распознанный текст на целевой язык (например, `en`, `ru`). | 75 | | `--translate-from <язык>` | | Указать исходный язык для перевода (иначе определяется автоматически). | 76 | | `--translate-out <путь>` | `-to` | **Сохранить** изображение с наложенным переводом по указанному пути. | 77 | | `--output-blocks` | `-b` | **Вывести текст OCR в виде сегментированных блоков** (полезно для комиксов). Несовместимо с `--get-coords` и `--output-lines`.| 78 | | `--output-lines` | `-ol` | **Вывести текст OCR в виде отдельных строк** с их геометрией. Несовместимо с `--output-blocks` и `--get-coords`.| 79 | | `--get-coords` | | Вывести распознанные слова и их координаты в формате JSON. Несовместимо с `--output-blocks` и `--output-lines`.| 80 | | `--sharex` | `-sx` | **Скопировать** результат в буфер обмена (перевод или OCR). | 81 | | `--ocr-single-line` | | Объединить весь распознанный текст в одну строку, удалив переносы. | 82 | | `--config-file <путь>`| | Путь к кастомному файлу конфигурации в формате JSON. | 83 | | `--update-config` | | Обновить файл конфигурации по умолчанию настройками из текущей команды. | 84 | | `--font <путь>` | | Путь к файлу шрифта `.ttf` для наложения текста. | 85 | | `--font-size <размер>` | | Размер шрифта для наложения (по умолчанию: 20). | 86 | | `--proxy ` | | URL прокси-сервера (например, `socks5://127.0.0.1:9050`). | 87 | | `--logging-level <ур>`| `-l` | Установить уровень логирования (`DEBUG`, `INFO`, `WARNING`, `ERROR`). | 88 | | `--help` | `-h` | Показать это справочное сообщение. | 89 | 90 | #### **Примеры** 91 | 92 | **1. Базовое распознавание (OCR) и перевод** 93 | 94 | Автоматически определяет язык на изображении и переводит его на английский. Это самый распространенный сценарий использования. 95 | ```bash 96 | lens_scan "путь/к/вашему/изображению.png" -t en 97 | ``` 98 | 99 | --- 100 | 101 | **2. Получение сегментированных текстовых блоков (для комиксов/манги)** 102 | 103 | Идеально подходит для изображений с несколькими отдельными текстовыми блоками. Эта команда выводит каждый распознанный блок текста по отдельности, что отлично подходит для перевода комиксов или сложных документов. 104 | ```bash 105 | lens_scan "путь/к/манге.jpg" ja -b 106 | ``` 107 | - `-b` — это короткий псевдоним для `--output-blocks`. 108 | 109 | --- 110 | 111 | **3. Получение отдельных строк текста** 112 | 113 | Выводит каждую распознанную строку текста вместе с ее геометрией. 114 | ```bash 115 | lens_scan "путь/к/документу.png" --output-lines 116 | ``` 117 | - `-ol` — это короткий псевдоним для `--output-lines`. 118 | 119 | --- 120 | 121 | **4. Получение координат всех отдельных слов** 122 | 123 | Выводит подробный массив JSON, содержащий каждое распознанное слово и его точные геометрические данные (центр, размер, угол). Полезно для программного анализа или создания собственных наложений. 124 | ```bash 125 | lens_scan "путь/к/схеме.png" --get-coords 126 | ``` 127 | 128 | --- 129 | 130 | **5. Перевести, сохранить с наложением и скопировать в буфер обмена** 131 | 132 | Пример для продвинутых пользователей. Эта команда выполнит несколько действий: 133 | 1. Распознает текст на японском изображении. 134 | 2. Переведет его на русский. 135 | 3. Сохранит новое изображение `перевод_манги.png` с наложенным на него русским текстом. 136 | 4. Скопирует итоговый перевод в буфер обмена. 137 | ```bash 138 | lens_scan "путь/к/манге.jpg" ja -t ru -to "перевод_манги.png" -sx 139 | ``` 140 | 141 | --- 142 | 143 | **6. Обработать изображение по URL и получить текст в одну строку** 144 | 145 | Загружает изображение напрямую по URL-адресу и объединяет весь распознанный текст в одну непрерывную строку, удаляя все переносы. 146 | ```bash 147 | lens_scan "https://i.imgur.com/VPd1y6b.png" en --ocr-single-line 148 | ``` 149 | 150 | --- 151 | 152 | **7. Использовать SOCKS5 прокси** 153 | 154 | Все запросы к API Google будут направляться через указанный прокси-сервер, что полезно для обеспечения конфиденциальности или обхода региональных ограничений. 155 | ```bash 156 | lens_scan "image.png" --proxy "socks5://127.0.0.1:9050" 157 | ``` 158 | 159 | 160 |
161 | 162 |
163 | 👨‍💻 Программное использование (API) 164 | 165 | > [!IMPORTANT] 166 | > `LensAPI` полностью **асинхронный**. Все методы для получения данных должны вызываться с помощью `await` из `async` функции. 167 | 168 | #### **Базовый пример (Полный текст)** 169 | 170 | ```python 171 | import asyncio 172 | from chrome_lens_py import LensAPI 173 | 174 | async def main(): 175 | # Инициализируем API. Здесь можно передать прокси, регион и т.д. 176 | # По умолчанию API ключ не требуется. 177 | api = LensAPI() 178 | 179 | image_source = "путь/к/вашему/изображению.png" # Или URL, PIL Image, NumPy array 180 | 181 | try: 182 | # Обрабатываем изображение и получаем текст единой строкой 183 | result = await api.process_image( 184 | image_path=image_source, 185 | ocr_language="ja", 186 | target_translation_language="en" 187 | ) 188 | 189 | print("--- Распознанный текст (OCR) ---") 190 | print(result.get("ocr_text")) 191 | 192 | print("\n--- Переведенный текст ---") 193 | print(result.get("translated_text")) 194 | 195 | except Exception as e: 196 | print(f"Произошла ошибка: {e}") 197 | 198 | if __name__ == "__main__": 199 | asyncio.run(main()) 200 | ``` 201 | 202 | #### **Работа с разными источниками изображений** 203 | 204 | Метод `process_image` легко обрабатывает различные типы входных данных. 205 | 206 | ```python 207 | from PIL import Image 208 | import numpy as np 209 | 210 | # ... внутри async функции ... 211 | 212 | # Из URL 213 | result_url = await api.process_image("https://i.imgur.com/VPd1y6b.png") 214 | 215 | # Из объекта PIL Image 216 | with Image.open("путь/к/изображению.png") as img: 217 | result_pil = await api.process_image(img) 218 | 219 | # Из массива NumPy (например, загруженного через OpenCV) 220 | with Image.open("путь/к/изображению.png") as img: 221 | numpy_array = np.array(img) 222 | result_numpy = await api.process_image(numpy_array) 223 | ``` 224 | 225 | #### **Получение сегментированных текстовых блоков** 226 | 227 | Чтобы получить текст, разделенный на логические блоки (например, диалоговые окна в комиксе), используйте параметр `output_format='blocks'`. 228 | 229 | ```python 230 | import asyncio 231 | from chrome_lens_py import LensAPI 232 | 233 | async def process_comics(): 234 | api = LensAPI() 235 | image_source = "путь/к/манге.jpg" 236 | 237 | result = await api.process_image( 238 | image_path=image_source, 239 | output_format='blocks' # Получить сегментированные блоки вместо одной строки 240 | ) 241 | 242 | # Результат теперь содержит ключ 'text_blocks' 243 | text_blocks = result.get("text_blocks", []) 244 | print(f"Найдено {len(text_blocks)} текстовых блоков.") 245 | 246 | for i, block in enumerate(text_blocks): 247 | print(f"\n--- Блок #{i+1} ---") 248 | print(block['text']) 249 | # block также содержит ключи 'lines' и 'geometry' 250 | 251 | asyncio.run(process_comics()) 252 | ``` 253 | 254 | #### **Получение отдельных строк и их геометрии** 255 | 256 | Чтобы получить каждую распознанную строку текста как отдельный элемент, используйте параметр `output_format='lines'`. 257 | 258 | ```python 259 | import asyncio 260 | from chrome_lens_py import LensAPI 261 | 262 | async def process_document_lines(): 263 | api = LensAPI() 264 | image_source = "путь/к/документу.png" 265 | 266 | result = await api.process_image( 267 | image_path=image_source, 268 | output_format='lines' # Получить отдельные строки с их геометрией 269 | ) 270 | 271 | # Результат теперь содержит ключ 'line_blocks' 272 | line_blocks = result.get("line_blocks", []) 273 | print(f"Найдено {len(line_blocks)} строк.") 274 | 275 | for i, line in enumerate(line_blocks): 276 | print(f"\n--- Строка #{i+1} ---") 277 | print(f"Текст: {line['text']}") 278 | print(f"Геометрия: {line['geometry']}") 279 | 280 | asyncio.run(process_document_lines()) 281 | ``` 282 | #### **Получение полностью детализированных структур текста** 283 | 284 | Чтобы получить полную, вложенную структуру из абзацев, строк и слов с геометрией на каждом уровне, используйте `output_format='detailed'`. 285 | 286 | ```python 287 | import asyncio 288 | from chrome_lens_py import LensAPI 289 | 290 | async def process_with_details(): 291 | api = LensAPI() 292 | image_source = "путь/к/документу.png" 293 | 294 | result = await api.process_image( 295 | image_path=image_source, 296 | output_format='detailed' # Получить полностью вложенную структуру 297 | ) 298 | 299 | # Результат теперь содержит ключ 'detailed_blocks' 300 | detailed_blocks = result.get("detailed_blocks", []) 301 | print(f"Найдено {len(detailed_blocks)} детализированных блоков.") 302 | 303 | for i, block in enumerate(detailed_blocks): 304 | print(f"\n--- Блок #{i+1} ---") 305 | print(f" Геометрия: {block['geometry']}") 306 | for j, line in enumerate(block['lines']): 307 | print(f" --- Строка #{j+1}: '{line['text']}' ---") 308 | for k, word in enumerate(line['words']): 309 | print(f" - Слово: '{word['text']}', Геометрия: {word['geometry']}") 310 | 311 | asyncio.run(process_with_details()) 312 | ``` 313 | 314 | 315 | #### **Конструктор `LensAPI`** 316 | 317 | ```python 318 | api = LensAPI( 319 | api_key: str = "ВАШ_API_КЛЮЧ_ИЛИ_КЛЮЧ_ПО_УМОЛЧАНИЮ", 320 | client_region: Optional[str] = None, 321 | client_time_zone: Optional[str] = None, 322 | proxy: Optional[str] = None, 323 | timeout: int = 60, 324 | font_path: Optional[str] = None, 325 | font_size: Optional[int] = None, 326 | max_concurrent: int = 5 327 | ) 328 | ``` 329 | 330 | #### **Метод `process_image`** 331 | 332 | ```python 333 | result: dict = await api.process_image( 334 | image_path: Any, 335 | ocr_language: Optional[str] = None, 336 | target_translation_language: Optional[str] = None, 337 | source_translation_language: Optional[str] = None, 338 | output_overlay_path: Optional[str] = None, 339 | ocr_preserve_line_breaks: bool = True, 340 | output_format: Literal['full_text', 'blocks', 'lines', 'detailed'] = 'full_text'' 341 | ) 342 | ``` 343 | - **`output_format`**: Управляет структурой OCR-вывода. `'full_text'` (по умолчанию) возвращает одну строку в `ocr_text`. `'blocks'` возвращает список в `text_blocks`. `'lines'` возвращает список в `line_blocks`. `'detailed'` возвращает полностью вложенную структуру в `detailed_blocks`.` 344 | - **`ocr_preserve_line_breaks`**: Если `False` и `output_format` равен `'full_text'`, объединяет весь текст OCR в одну строку. 345 | 346 | **Возвращаемый словарь `result` содержит:** 347 | - `ocr_text` (Optional[str]): Полный распознанный текст (если `output_format='full_text'`). 348 | - `text_blocks` (Optional[List[dict]]): Список сегментированных текстовых блоков (если `output_format='blocks'`). Каждый блок — это словарь с ключами `text`, `lines` и `geometry`. 349 | - `line_blocks` (Optional[List[dict]]): Список отдельных текстовых строк (если `output_format='lines'`). Каждый блок — это словарь с ключами `text` и `geometry`. 350 | - `translated_text` (Optional[str]): Переведенный текст, если был запрошен. 351 | - `word_data` (List[dict]): Список словарей для каждого распознанного слова с его геометрией. 352 | - `detailed_blocks` (Optional[List[dict]]): Список полностью структурированных текстовых блоков (если `output_format='detailed'`). Каждый блок содержит строки, которые, в свою очередь, содержат слова, с геометрией на каждом уровне. 353 | - `raw_response_objects`: "Сырой" Protobuf-объект ответа для дальнейшего анализа. 354 | 355 |
356 | 357 |
358 | ⚙️ Конфигурация 359 | 360 | Настройки загружаются со следующим приоритетом: **Аргументы CLI > Файл `config.json` > Значения по умолчанию**. 361 | 362 | #### **`config.json`** 363 | 364 | Файл `config.json` можно разместить в директории конфигурации по умолчанию вашей ОС для установки постоянных опций. 365 | - **Linux**: `~/.config/chrome-lens-py/config.json` 366 | - **macOS**: `~/Library/Application Support/chrome-lens-py/config.json` 367 | - **Windows**: `C:\Users\\.config\chrome-lens-py\config.json` 368 | 369 | ##### **Пример `config.json`** 370 | ```json 371 | { 372 | "api_key": "ОПЦИОНАЛЬНО! Если вы не знаете что это, то не советую его здесь указывать", 373 | "proxy": "socks5://127.0.0.1:9050", 374 | "client_region": "DE", 375 | "client_time_zone": "Europe/Berlin", 376 | "timeout": 90, 377 | "font_path": "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 378 | "ocr_preserve_line_breaks": true 379 | } 380 | ``` 381 | 382 |
383 | 384 | ## Интеграция Sharex 385 | Посмотрите [sharex.md](docs/sharex.md) для получения дополнительной информации о том, как использовать эту библиотеку с ShareX. 386 | 387 | ## ❤️ Поддержка и благодарности 388 | 389 | - **OWOCR**: В большей степени вдохновлен и основан на [OWOCR](https://github.com/AuroraWright/owocr). Благодарю ребят, за их ресерч protobuf и реализацию OCR. 390 | - **Chrome Lens OCR**: За изначальную реализацию и идеи, которые легли в основу этой библиотеки. Обновление с поддержкой SHAREX изначально было протестировано и добавлено мной в [chrome-lens-ocr](https://github.com/dimdenGD/chrome-lens-ocr), спасибо за изначальную реализацию и идеи. 391 | - **Совместная работа с ИИ**: Значительная часть кода версии 3.0, включая рефакторинг архитектуры, асинхронную реализацию и интеграцию с Protobuf, была разработана в сотрудничестве с продвинутым ИИ-ассистентом. 392 | - **GOOGLE**: За удобную и качественную технологию Lens. 393 | - **Поддержать автора**: Если эта библиотека оказалась вам полезной, вы можете поддержать автора - **[Boosty](https://boosty.to/pinus)** 394 | 395 | ## Star History 396 | 397 | [![Star History Chart](https://api.star-history.com/svg?repos=bropines/chrome-lens-py&type=Date)](https://www.star-history.com/#bropines/chrome-lens-py&Date) 398 | 399 | ### Отказ от ответственности 400 | 401 | Этот проект предназначен исключительно для образовательных и экспериментальных целей. Использование сервисов Google должно соответствовать их Условиям предоставления услуг. Автор проекта не несет ответственности за любое неправомерное использование этого программного обеспечения. -------------------------------------------------------------------------------- /src/chrome_lens_py/api.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from math import pi 4 | from pathlib import Path 5 | from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union 6 | 7 | import httpx 8 | from PIL import ImageFont 9 | 10 | from .constants import ( 11 | DEFAULT_API_KEY, 12 | DEFAULT_CLIENT_REGION, 13 | DEFAULT_CLIENT_TIME_ZONE, 14 | DEFAULT_OCR_LANG, 15 | ) 16 | from .core.image_processor import ( 17 | draw_overlay_on_image, 18 | get_word_geometry_data, 19 | prepare_image_for_api, 20 | ) 21 | from .core.protobuf_builder import create_ocr_translate_request 22 | from .core.request_handler import LensRequestHandler 23 | from .exceptions import LensException 24 | 25 | if TYPE_CHECKING: 26 | from .utils.lens_betterproto import ( 27 | LensOverlayServerResponse, 28 | TextLayoutLine, 29 | TextLayoutParagraph, 30 | TextLayoutWord, 31 | TranslationDataStatusCode, 32 | ) 33 | else: 34 | from .utils.lens_betterproto import ( 35 | LensOverlayServerResponse, 36 | TextLayoutLine, 37 | TextLayoutParagraph, 38 | TextLayoutWord, 39 | TranslationDataStatusCode, 40 | ) 41 | 42 | from .utils.font_manager import FontType, get_font 43 | 44 | logger = logging.getLogger(__name__) 45 | 46 | 47 | class LensAPI: 48 | """ 49 | Main class for interacting with the Google Lens API. 50 | Provides methods for OCR, translation, and text block segmentation. 51 | """ 52 | 53 | def __init__( 54 | self, 55 | api_key: str = DEFAULT_API_KEY, 56 | client_region: Optional[str] = None, 57 | client_time_zone: Optional[str] = None, 58 | proxy: Optional[Union[str, Dict[str, httpx.AsyncBaseTransport]]] = None, 59 | timeout: int = 60, 60 | font_path: Optional[str] = None, 61 | font_size: Optional[int] = None, 62 | max_concurrent: int = 10, 63 | ): 64 | """ 65 | Initializes the LensAPI client. 66 | 67 | :param api_key: Your Google API key. Defaults to the library's built-in key. 68 | :param client_region: ISO 3166-1 alpha-2 country code (e.g., 'US', 'DE'). 69 | :param client_time_zone: Time zone name (e.g., 'America/New_York'). 70 | :param proxy: Proxy server URL or a dictionary for mounting transports. 71 | :param timeout: Request timeout in seconds. 72 | :param font_path: Path to a custom .ttf font file for text overlays. 73 | :param font_size: Font size for text overlays. 74 | :param max_concurrent: The maximum number of concurrent requests to prevent API abuse. Defaults to 5. 75 | """ 76 | self.request_handler = LensRequestHandler( 77 | api_key=api_key, proxy=proxy, timeout=timeout 78 | ) 79 | self.client_region = client_region 80 | self.client_time_zone = client_time_zone 81 | self.font_path = font_path 82 | self.font_size = font_size 83 | self._font_object: Optional[FontType] = None 84 | self._semaphore = asyncio.Semaphore(max_concurrent) 85 | if max_concurrent > 20: 86 | logger.warning( 87 | f"max_concurrent is set to {max_concurrent}, which is very high. " 88 | "This may lead to IP bans. Use with caution." 89 | ) 90 | 91 | def _get_font(self) -> FontType: 92 | """Lazily loads and returns the font object.""" 93 | if not self._font_object: 94 | self._font_object = get_font( 95 | font_path_override=self.font_path, font_size_override=self.font_size 96 | ) 97 | return self._font_object 98 | 99 | def _parse_line(self, line: "TextLayoutLine") -> Dict[str, Any]: 100 | """Parses a single TextLayoutLine into a structured dictionary.""" 101 | line_text = "".join( 102 | word.plain_text + (word.text_separator or "") for word in line.words 103 | ).strip() 104 | 105 | l_geom = line.geometry.bounding_box 106 | geometry_dict = { 107 | "center_x": l_geom.center_x, 108 | "center_y": l_geom.center_y, 109 | "width": l_geom.width, 110 | "height": l_geom.height, 111 | "angle_deg": l_geom.rotation_z * (180 / pi) if l_geom.rotation_z else 0.0, 112 | } 113 | 114 | return { 115 | "text": line_text, 116 | "geometry": geometry_dict, 117 | } 118 | 119 | def _parse_paragraph(self, paragraph: "TextLayoutParagraph") -> Dict[str, Any]: 120 | """Parses a single TextLayoutParagraph into a structured dictionary.""" 121 | paragraph_lines = [] 122 | for line in paragraph.lines: 123 | # Fixed Pylance issue: use 'or ""' to handle optional separator 124 | current_line_text = "".join( 125 | word.plain_text + (word.text_separator or "") for word in line.words 126 | ) 127 | paragraph_lines.append(current_line_text.strip()) 128 | 129 | full_paragraph_text = "\n".join(paragraph_lines) 130 | 131 | p_geom = paragraph.geometry.bounding_box 132 | geometry_dict = { 133 | "center_x": p_geom.center_x, 134 | "center_y": p_geom.center_y, 135 | "width": p_geom.width, 136 | "height": p_geom.height, 137 | "angle_deg": p_geom.rotation_z * (180 / pi) if p_geom.rotation_z else 0.0, 138 | } 139 | 140 | return { 141 | "text": full_paragraph_text, 142 | "lines": paragraph_lines, 143 | "geometry": geometry_dict, 144 | } 145 | 146 | def _extract_ocr_data_from_response( 147 | self, 148 | response_proto: "LensOverlayServerResponse", 149 | preserve_line_breaks: bool = True, 150 | output_format: Literal[ 151 | "full_text", "blocks", "lines", "detailed" 152 | ] = "full_text", 153 | ) -> Tuple[Union[str, List[Dict]], List[Dict[str, Any]]]: 154 | """ 155 | Extracts OCR data from the response. 156 | """ 157 | word_data_list: List[Dict[str, Any]] = [] 158 | if not ( 159 | response_proto.objects_response 160 | and response_proto.objects_response.text 161 | and response_proto.objects_response.text.text_layout 162 | ): 163 | return ("", []) if output_format == "full_text" else ([], []) 164 | 165 | text_layout = response_proto.objects_response.text.text_layout 166 | 167 | for paragraph in text_layout.paragraphs: 168 | for line in paragraph.lines: 169 | for word in line.words: 170 | word_data_list.append( 171 | { 172 | "word": word.plain_text, 173 | "separator": word.text_separator, 174 | "geometry": ( 175 | get_word_geometry_data(word.geometry.bounding_box) 176 | if word.geometry and word.geometry.bounding_box 177 | else None 178 | ), 179 | } 180 | ) 181 | 182 | detected_lang = getattr( 183 | response_proto.objects_response.text, "content_language", "N/A" 184 | ) 185 | logger.info( 186 | f"Extracted data for {len(word_data_list)} words. Detected language: {detected_lang}" 187 | ) 188 | 189 | if output_format == "detailed": 190 | detailed_blocks = [ 191 | self._parse_paragraph_detailed(p) for p in text_layout.paragraphs 192 | ] 193 | return detailed_blocks, word_data_list 194 | 195 | if output_format == "lines": 196 | line_blocks = [] 197 | for p in text_layout.paragraphs: 198 | for line in p.lines: 199 | line_blocks.append(self._parse_line(line)) 200 | return line_blocks, word_data_list 201 | 202 | if output_format == "blocks": 203 | text_blocks = [self._parse_paragraph(p) for p in text_layout.paragraphs] 204 | return text_blocks, word_data_list 205 | else: # 'full_text' 206 | if preserve_line_breaks: 207 | full_ocr_text = "\n".join( 208 | "\n".join(self._parse_paragraph(p)["lines"]) 209 | for p in text_layout.paragraphs 210 | ) 211 | else: 212 | text_parts = [ 213 | data["word"] + (data["separator"] or "") for data in word_data_list 214 | ] 215 | full_ocr_text = "".join(text_parts).strip() 216 | full_ocr_text = " ".join(full_ocr_text.split()) 217 | 218 | return full_ocr_text, word_data_list 219 | 220 | def _extract_translation_from_response( 221 | self, response_proto: "LensOverlayServerResponse" 222 | ) -> Optional[str]: 223 | """Extracts and consolidates all successful translations.""" 224 | all_translations = [] 225 | if ( 226 | response_proto.objects_response 227 | and response_proto.objects_response.deep_gleams 228 | ): 229 | for gleam in response_proto.objects_response.deep_gleams: 230 | if ( 231 | gleam.translation 232 | and gleam.translation.status.code 233 | == TranslationDataStatusCode.SUCCESS 234 | ): 235 | if gleam.translation.translation: 236 | all_translations.append(gleam.translation.translation) 237 | return "\n".join(all_translations).strip() or None 238 | 239 | def _parse_word_detailed(self, word: "TextLayoutWord") -> Dict[str, Any]: 240 | """Parses a single TextLayoutWord into a detailed dictionary including geometry.""" 241 | geometry_data = ( 242 | get_word_geometry_data(word.geometry.bounding_box) 243 | if word.geometry and word.geometry.bounding_box 244 | else None 245 | ) 246 | return { 247 | "text": word.plain_text, 248 | "separator": word.text_separator, 249 | "geometry": geometry_data, 250 | } 251 | 252 | def _parse_line_detailed(self, line: "TextLayoutLine") -> Dict[str, Any]: 253 | """Parses a TextLayoutLine into a detailed dictionary with words and geometry.""" 254 | line_text = "".join( 255 | word.plain_text + (word.text_separator or "") for word in line.words 256 | ).strip() 257 | 258 | l_geom = line.geometry.bounding_box 259 | geometry_dict = { 260 | "center_x": l_geom.center_x, 261 | "center_y": l_geom.center_y, 262 | "width": l_geom.width, 263 | "height": l_geom.height, 264 | "angle_deg": l_geom.rotation_z * (180 / pi) if l_geom.rotation_z else 0.0, 265 | } 266 | 267 | return { 268 | "text": line_text, 269 | "geometry": geometry_dict, 270 | "words": [self._parse_word_detailed(word) for word in line.words], 271 | } 272 | 273 | def _parse_paragraph_detailed( 274 | self, paragraph: "TextLayoutParagraph" 275 | ) -> Dict[str, Any]: 276 | """Parses a TextLayoutParagraph into a detailed dictionary with lines and geometry.""" 277 | full_paragraph_text = "\n".join( 278 | "".join( 279 | word.plain_text + (word.text_separator or "") for word in line.words 280 | ).strip() 281 | for line in paragraph.lines 282 | ) 283 | 284 | p_geom = paragraph.geometry.bounding_box 285 | geometry_dict = { 286 | "center_x": p_geom.center_x, 287 | "center_y": p_geom.center_y, 288 | "width": p_geom.width, 289 | "height": p_geom.height, 290 | "angle_deg": p_geom.rotation_z * (180 / pi) if p_geom.rotation_z else 0.0, 291 | } 292 | 293 | return { 294 | "text": full_paragraph_text, 295 | "geometry": geometry_dict, 296 | "lines": [self._parse_line_detailed(line) for line in paragraph.lines], 297 | } 298 | 299 | async def process_image( 300 | self, 301 | image_path: Any, 302 | ocr_language: Optional[str] = None, 303 | target_translation_language: Optional[str] = None, 304 | source_translation_language: Optional[str] = None, 305 | output_overlay_path: Optional[str] = None, 306 | new_session: bool = True, 307 | ocr_preserve_line_breaks: bool = True, 308 | output_format: Literal[ 309 | "full_text", "blocks", "lines", "detailed" 310 | ] = "full_text", 311 | ) -> Dict[str, Any]: 312 | """ 313 | Processes an image, performing OCR and optional translation. 314 | 315 | :param image_path: Path to a file (str or pathlib.Path), URL, bytes, PIL Image, or NumPy array. 316 | :param ocr_language: BCP 47 language code for OCR (e.g., 'en', 'ja'). 317 | :param target_translation_language: BCP 47 language code for translation target. 318 | :param source_translation_language: BCP 47 language code for translation source. 319 | :param output_overlay_path: Path to save the image with translated text overlaid. 320 | :param new_session: If True, starts a new server session for the request. 321 | :param ocr_preserve_line_breaks: If True and output_format is 'full_text', preserves line breaks. 322 | :param output_format: 'full_text' (default) returns a single string in 'ocr_text'. 323 | 'blocks' returns a list of dictionaries in 'text_blocks'. 324 | 'lines' returns a list of dictionaries in 'line_blocks', 325 | each representing a single recognized line with its geometry. 326 | :return: A dictionary containing the processing results. 327 | """ 328 | # Acquire the semaphore before starting any processing 329 | async with self._semaphore: 330 | if isinstance(image_path, Path): 331 | image_path = str(image_path) 332 | 333 | if isinstance(image_path, str): 334 | logger.info(f"Processing image source: {image_path[:120]}...") 335 | else: 336 | logger.info( 337 | f"Processing image source of type: {type(image_path).__name__}" 338 | ) 339 | 340 | try: 341 | img_bytes, width, height, original_pil_img = ( 342 | await prepare_image_for_api(image_path) 343 | ) 344 | 345 | if new_session: 346 | self.request_handler.start_new_session() 347 | 348 | session_uuid_for_request, seq_id, img_seq_id = ( 349 | self.request_handler.get_next_sequence_ids_for_request( 350 | is_new_image_payload=new_session 351 | ) 352 | ) 353 | 354 | proto_payload, uuid_for_this_request = create_ocr_translate_request( 355 | image_bytes=img_bytes, 356 | width=width, 357 | height=height, 358 | ocr_language=ocr_language or DEFAULT_OCR_LANG, 359 | target_translation_language=target_translation_language, 360 | source_translation_language=source_translation_language, 361 | client_region=self.client_region or DEFAULT_CLIENT_REGION, 362 | client_time_zone=self.client_time_zone or DEFAULT_CLIENT_TIME_ZONE, 363 | session_uuid=session_uuid_for_request, 364 | sequence_id=seq_id, 365 | image_sequence_id=img_seq_id, 366 | routing_info=( 367 | self.request_handler.last_cluster_info.routing_info 368 | if self.request_handler.last_cluster_info 369 | else None 370 | ), 371 | ) 372 | 373 | response_proto = await self.request_handler.send_request( 374 | proto_payload, request_uuid_used=uuid_for_this_request 375 | ) 376 | 377 | ocr_result, word_data = self._extract_ocr_data_from_response( 378 | response_proto, ocr_preserve_line_breaks, output_format 379 | ) 380 | 381 | translated_text = ( 382 | self._extract_translation_from_response(response_proto) 383 | if target_translation_language 384 | else None 385 | ) 386 | 387 | if output_overlay_path and translated_text: 388 | word_boxes_norm = [] 389 | for data in word_data: 390 | geom = data.get("geometry") 391 | if geom: 392 | x1 = geom["center_x"] - geom["width"] / 2 393 | y1 = geom["center_y"] - geom["height"] / 2 394 | x2 = geom["center_x"] + geom["width"] / 2 395 | y2 = geom["center_y"] + geom["height"] / 2 396 | word_boxes_norm.append((x1, y1, x2, y2)) 397 | 398 | overlay_image = draw_overlay_on_image( 399 | original_pil_img, 400 | word_boxes_norm, 401 | translated_text, 402 | self._get_font(), 403 | ) 404 | try: 405 | overlay_image.save(output_overlay_path) 406 | logger.info( 407 | f"Image with overlay saved to: {output_overlay_path}" 408 | ) 409 | except Exception as e_save: 410 | logger.error( 411 | f"Error saving overlay image to '{output_overlay_path}': {e_save}" 412 | ) 413 | elif output_overlay_path: 414 | logger.warning( 415 | f"Overlay output path '{output_overlay_path}' specified, but no translated text available." 416 | ) 417 | 418 | final_result = { 419 | "translated_text": translated_text, 420 | "word_data": word_data, 421 | "raw_response_objects": response_proto.objects_response, 422 | } 423 | 424 | if output_format == "detailed": 425 | final_result["detailed_blocks"] = ocr_result 426 | elif output_format == "blocks": 427 | final_result["text_blocks"] = ocr_result 428 | elif output_format == "lines": 429 | final_result["line_blocks"] = ocr_result 430 | else: 431 | final_result["ocr_text"] = ocr_result 432 | 433 | return final_result 434 | 435 | except LensException as e: 436 | logger.error(f"LensAPI processing error: {e}", exc_info=True) 437 | raise 438 | except Exception as e: 439 | logger.error(f"Unexpected error in LensAPI: {e}", exc_info=True) 440 | raise LensException(f"Unexpected error in LensAPI: {e}") from e 441 | -------------------------------------------------------------------------------- /experiments/reverse.py: -------------------------------------------------------------------------------- 1 | # This is the first prototype of the Google Lens API reverse. 2 | # It's simpler, and without a lot of garbage, which is suitable for your projects if you want to rewrite it into another language. 3 | import asyncio 4 | import io 5 | import json 6 | import logging 7 | import os 8 | import sys 9 | import time 10 | from urllib.parse import parse_qs, urlparse 11 | 12 | import httpx 13 | 14 | # --- JSON Parsing Setup --- 15 | try: 16 | import json5 17 | 18 | json_loader = json5.loads 19 | logging.info("Using json5 for parsing.") 20 | except ImportError: 21 | json_loader = json.loads 22 | logging.info("json5 not found, using standard json module.") 23 | 24 | # --- Logging Setup --- 25 | logging.basicConfig( 26 | level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" 27 | ) 28 | 29 | # --- Constants --- 30 | LENS_UPLOAD_ENDPOINT = "https://lens.google.com/v3/upload" 31 | LENS_METADATA_ENDPOINT = "https://lens.google.com/qfmetadata" 32 | HEADERS = { 33 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 34 | "Accept-Language": "ru", 35 | "Cache-Control": "max-age=0", 36 | "Sec-Ch-Ua": '"Not-A.Brand";v="8", "Chromium";v="135", "Google Chrome";v="135"', 37 | "Sec-Ch-Ua-Mobile": "?0", 38 | "Sec-Ch-Ua-Platform": '"Windows"', 39 | "Upgrade-Insecure-Requests": "1", 40 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", 41 | "Origin": "https://www.google.com", 42 | "Referer": "https://www.google.com/", 43 | "Sec-Fetch-Site": "same-site", 44 | "Sec-Fetch-Mode": "navigate", 45 | "Sec-Fetch-Dest": "document", 46 | "Sec-Fetch-User": "?1", 47 | "Priority": "u=0, i", 48 | } 49 | COOKIE_FILE = "cookies_lens_test.json" 50 | 51 | # --- Helper Functions --- 52 | 53 | 54 | async def read_image_data(image_path): 55 | """Reads image data from file.""" 56 | try: 57 | with open(image_path, "rb") as f: 58 | return f.read() 59 | except FileNotFoundError: 60 | logging.error(f"Image not found: {image_path}") 61 | return None 62 | except Exception as e: 63 | logging.error(f"Error reading image {image_path}: {e}") 64 | return None 65 | 66 | 67 | def extract_ids_from_url(url_string): 68 | """Extracts vsrid and lsessionid from URL.""" 69 | try: 70 | parsed_url = urlparse(url_string) 71 | query_params = parse_qs(parsed_url.query) 72 | vsrid = query_params.get("vsrid", [None])[0] 73 | lsessionid = query_params.get("lsessionid", [None])[0] 74 | return vsrid, lsessionid 75 | except Exception as e: 76 | logging.error(f"Error extracting IDs from URL {url_string}: {e}") 77 | return None, None 78 | 79 | 80 | async def save_cookies(cookies, cookie_file): 81 | """Saves cookies to JSON file.""" 82 | try: 83 | cookies_dict = {} 84 | cookie_jar = getattr(cookies, "jar", cookies) 85 | if hasattr(cookie_jar, "items"): 86 | for name, value in cookie_jar.items(): 87 | if isinstance(value, str): 88 | cookies_dict[name] = value 89 | elif hasattr(cookie_jar, "__iter__"): 90 | for cookie in cookie_jar: 91 | if hasattr(cookie, "name") and hasattr(cookie, "value"): 92 | cookies_dict[cookie.name] = cookie.value 93 | else: 94 | logging.warning( 95 | f"Could not determine how to iterate cookies object: {type(cookies)}" 96 | ) 97 | return 98 | 99 | with open(cookie_file, "w") as f: 100 | json.dump(cookies_dict, f, indent=2) 101 | logging.debug(f"Cookies saved to {cookie_file}") 102 | except Exception as e: 103 | logging.error(f"Error saving cookies: {e}") 104 | 105 | 106 | async def load_cookies(cookie_file): 107 | """Loads cookies from JSON file.""" 108 | try: 109 | if os.path.exists(cookie_file): 110 | with open(cookie_file, "r") as f: 111 | cookies_dict = json.load(f) 112 | logging.debug(f"Cookies loaded from {cookie_file}") 113 | return cookies_dict 114 | except (json.JSONDecodeError, FileNotFoundError) as e: 115 | logging.warning( 116 | f"Error loading cookies from {cookie_file}: {e}. Ignoring cookies." 117 | ) 118 | except Exception as e: 119 | logging.warning( 120 | f"Unexpected error loading cookies from {cookie_file}: {e}. Ignoring cookies." 121 | ) 122 | return {} 123 | 124 | 125 | def adaptive_parse_text_and_language(metadata_json): 126 | """ 127 | Adaptively parses JSON to extract language, text blocks, and word annotations. 128 | """ 129 | language = None 130 | all_word_annotations = [] 131 | reconstructed_blocks = [] 132 | 133 | try: 134 | if not isinstance(metadata_json, list) or not metadata_json: 135 | logging.error( 136 | "Invalid JSON structure: metadata_json is not a non-empty list." 137 | ) 138 | return None, [], [] 139 | response_container = next( 140 | ( 141 | item 142 | for item in metadata_json 143 | if isinstance(item, list) 144 | and item 145 | and item[0] == "fetch_query_formulation_metadata_response" 146 | ), 147 | None, 148 | ) 149 | if response_container is None: 150 | logging.error( 151 | "Could not find 'fetch_query_formulation_metadata_response' container." 152 | ) 153 | return None, [], [] 154 | 155 | # --- Language Extraction --- 156 | try: 157 | if len(response_container) > 2 and isinstance(response_container[2], list): 158 | lang_section = response_container[2] 159 | language = next( 160 | ( 161 | element 162 | for element in lang_section 163 | if isinstance(element, str) and len(element) == 2 164 | ), 165 | None, 166 | ) 167 | if language: 168 | logging.debug(f"Found language code: '{language}'") 169 | except (IndexError, TypeError, StopIteration): 170 | logging.warning("Could not find language code in expected structure.") 171 | 172 | # --- Text/Word Extraction --- 173 | segments_iterable = None 174 | possible_paths_to_segments_list = [ 175 | lambda rc: rc[2][0][0][0], 176 | lambda rc: rc[1][0][0][0], 177 | lambda rc: rc[2][0][0], 178 | ] 179 | path_names = ["[2][0][0][0]", "[1][0][0][0]", "[2][0][0]"] 180 | 181 | for i, path_func in enumerate(possible_paths_to_segments_list): 182 | path_name = path_names[i] 183 | try: 184 | candidate_iterable = path_func(response_container) 185 | if ( 186 | isinstance(candidate_iterable, list) 187 | and candidate_iterable 188 | and isinstance(candidate_iterable[0], list) 189 | ): 190 | try: 191 | first_segment = candidate_iterable[0] 192 | if len(first_segment) > 1 and isinstance( 193 | first_segment[1], list 194 | ): 195 | if ( 196 | first_segment[1] 197 | and isinstance(first_segment[1][0], list) 198 | and len(first_segment[1][0]) > 0 199 | and isinstance(first_segment[1][0][0], list) 200 | ): 201 | segments_iterable = candidate_iterable 202 | logging.debug( 203 | f"Segments list identified at path ending with {path_name}" 204 | ) 205 | break 206 | except (IndexError, TypeError): 207 | pass 208 | except (IndexError, TypeError): 209 | pass 210 | 211 | if segments_iterable is None: 212 | logging.error( 213 | f"Could not identify valid text segments list using paths {path_names}." 214 | ) 215 | return language, [], [] 216 | 217 | for segment_list in segments_iterable: 218 | current_block_word_annotations = [] 219 | block_text_builder = io.StringIO() 220 | last_word_ends_with_space = False 221 | 222 | if not isinstance(segment_list, list): 223 | logging.warning( 224 | f"Skipping segment: Expected list, got {type(segment_list)}." 225 | ) 226 | continue 227 | 228 | try: 229 | if len(segment_list) > 1 and isinstance(segment_list[1], list): 230 | word_groups_list = segment_list[1] 231 | 232 | for group_count, word_group in enumerate(word_groups_list, 1): 233 | try: 234 | if ( 235 | isinstance(word_group, list) 236 | and len(word_group) > 0 237 | and isinstance(word_group[0], list) 238 | and isinstance(word_group[0][0], list) 239 | ): 240 | 241 | word_list = word_group[0] 242 | 243 | if ( 244 | group_count > 1 245 | and block_text_builder.tell() > 0 246 | and not last_word_ends_with_space 247 | ): 248 | block_text_builder.write(" ") 249 | last_word_ends_with_space = True 250 | 251 | for word_info in word_list: 252 | try: 253 | if ( 254 | isinstance(word_info, list) 255 | and len(word_info) > 3 256 | and isinstance(word_info[1], str) 257 | and isinstance(word_info[2], str) 258 | and isinstance(word_info[3], list) 259 | and word_info[3] 260 | and isinstance(word_info[3][0], list) 261 | ): 262 | 263 | text = word_info[1] 264 | space_indicator = word_info[2] 265 | bbox = word_info[3][0] 266 | 267 | current_block_word_annotations.append( 268 | {"text": text, "bbox": bbox} 269 | ) 270 | 271 | block_text_builder.write(text) 272 | block_text_builder.write(space_indicator) 273 | last_word_ends_with_space = ( 274 | space_indicator == " " 275 | ) 276 | 277 | except (IndexError, TypeError): 278 | pass 279 | except (IndexError, TypeError): 280 | pass 281 | else: 282 | logging.warning("Word groups list structure [1] not found/invalid.") 283 | except (IndexError, TypeError): 284 | logging.error("Error processing segment structure.") 285 | except Exception as e: 286 | logging.error(f"Unexpected error processing segment: {e}") 287 | 288 | reconstructed_text = block_text_builder.getvalue().rstrip(" ") 289 | block_text_builder.close() 290 | 291 | if reconstructed_text or current_block_word_annotations: 292 | reconstructed_blocks.append(reconstructed_text) 293 | all_word_annotations.extend(current_block_word_annotations) 294 | 295 | except Exception as e: 296 | logging.error( 297 | f"Critical error during adaptive text extraction: {e}", exc_info=True 298 | ) 299 | return language, reconstructed_blocks, all_word_annotations 300 | 301 | logging.info( 302 | f"Adaptive parsing complete. Language: '{language}'. Text blocks found: {len(reconstructed_blocks)}. Total word annotations: {len(all_word_annotations)}." 303 | ) 304 | return language, reconstructed_blocks, all_word_annotations 305 | 306 | 307 | async def scan_image(image_path): 308 | """Scans image via Google Lens, extracts text, language, and coordinates.""" 309 | logging.info(f"Starting image scan: {image_path}") 310 | image_data = await read_image_data(image_path) 311 | if not image_data: 312 | return None, "Failed to read image data" 313 | 314 | filename = os.path.basename(image_path) 315 | _, ext = os.path.splitext(filename.lower()) 316 | content_type = "image/jpeg" 317 | if ext == ".png": 318 | content_type = "image/png" 319 | elif ext == ".webp": 320 | content_type = "image/webp" 321 | elif ext == ".gif": 322 | content_type = "image/gif" 323 | logging.debug(f"Using content type: {content_type}") 324 | 325 | files = {"encoded_image": (filename, image_data, content_type)} 326 | params_upload = { 327 | "hl": "ru", 328 | "re": "av", 329 | "vpw": "1903", 330 | "vph": "953", 331 | "ep": "gsbubb", 332 | "st": str(int(time.time() * 1000)), 333 | } 334 | 335 | loaded_cookies = await load_cookies(COOKIE_FILE) 336 | limits = httpx.Limits(max_keepalive_connections=5, max_connections=10) 337 | timeout = httpx.Timeout(30.0, connect=10.0) 338 | 339 | async with httpx.AsyncClient( 340 | cookies=loaded_cookies, 341 | follow_redirects=True, 342 | timeout=timeout, 343 | limits=limits, 344 | http2=True, 345 | verify=True, 346 | ) as client: 347 | try: 348 | # --- 1. Upload Image to Lens --- 349 | logging.debug(f"POST request to {LENS_UPLOAD_ENDPOINT}") 350 | response_upload = await client.post( 351 | LENS_UPLOAD_ENDPOINT, headers=HEADERS, files=files, params=params_upload 352 | ) 353 | await save_cookies(client.cookies, COOKIE_FILE) 354 | response_upload.raise_for_status() 355 | 356 | final_url = str(response_upload.url) 357 | 358 | # --- 2. Extract Session IDs from URL --- 359 | vsrid, lsessionid = extract_ids_from_url(final_url) 360 | if not vsrid or not lsessionid: 361 | logging.error( 362 | "Failed to extract vsrid or lsessionid from upload redirect URL." 363 | ) 364 | return None, "Failed to get session IDs from upload response" 365 | 366 | # --- 3. Fetch Metadata from Lens --- 367 | metadata_params = { 368 | "vsrid": vsrid, 369 | "lsessionid": lsessionid, 370 | "hl": params_upload["hl"], 371 | "qf": "CAI%3D", 372 | "st": str(int(time.time() * 1000)), 373 | "vpw": params_upload["vpw"], 374 | "vph": params_upload["vph"], 375 | "source": "lens", 376 | } 377 | metadata_headers = HEADERS.copy() 378 | metadata_headers.update( 379 | { 380 | "Accept": "*/*", 381 | "Referer": final_url, 382 | "Sec-Fetch-Site": "same-origin", 383 | "Sec-Fetch-Mode": "cors", 384 | "Sec-Fetch-Dest": "empty", 385 | "Priority": "u=1, i", 386 | } 387 | ) 388 | metadata_headers.pop("Upgrade-Insecure-Requests", None) 389 | metadata_headers.pop("Sec-Fetch-User", None) 390 | metadata_headers.pop("Cache-Control", None) 391 | metadata_headers.pop("Origin", None) 392 | 393 | metadata_url_obj = httpx.URL(LENS_METADATA_ENDPOINT, params=metadata_params) 394 | logging.debug(f"GET request to {str(metadata_url_obj)}") 395 | response_metadata = await client.get( 396 | metadata_url_obj, headers=metadata_headers 397 | ) 398 | await save_cookies(client.cookies, COOKIE_FILE) 399 | response_metadata.raise_for_status() 400 | 401 | # --- 4. Parse Metadata Response --- 402 | response_text = response_metadata.text 403 | if response_text.startswith(")]}'\n"): 404 | response_text = response_text[5:] 405 | elif response_text.startswith(")]}'"): 406 | response_text = response_text[4:] 407 | 408 | try: 409 | metadata_json = json_loader(response_text) 410 | 411 | # --- 5. Extract Data using Adaptive Parser --- 412 | language, reconstructed_blocks, all_word_annotations = ( 413 | adaptive_parse_text_and_language(metadata_json) 414 | ) 415 | full_text = "\n".join(reconstructed_blocks) 416 | 417 | result_data = { 418 | "text": full_text, 419 | "language": language if language else "und", 420 | "text_with_coordinates": json.dumps( 421 | all_word_annotations, ensure_ascii=False 422 | ), # JSON as string 423 | } 424 | return result_data, metadata_json 425 | 426 | except Exception as e_parse: 427 | logging.error( 428 | f"Error parsing JSON or extracting text: {e_parse}", exc_info=True 429 | ) 430 | return None, response_metadata.text 431 | 432 | except httpx.HTTPStatusError as e: 433 | logging.error( 434 | f"HTTP error: {e.response.status_code} for URL {e.request.url}" 435 | ) 436 | return None, f"HTTP Error {e.response.status_code}" 437 | except httpx.RequestError as e: 438 | logging.error(f"Request error: {e}") 439 | return None, f"Request Error: {e}" 440 | except Exception as e: 441 | logging.error(f"Unexpected error in scan_image: {e}", exc_info=True) 442 | return None, f"Unexpected Error: {e}" 443 | 444 | 445 | async def main(): 446 | if len(sys.argv) < 2: 447 | print(f"Usage: python {sys.argv[0]} ") 448 | sys.exit(1) 449 | 450 | image_path = sys.argv[1] 451 | if not os.path.isfile(image_path): 452 | print(f"Error: File not found: {image_path}") 453 | sys.exit(1) 454 | 455 | print(f"Starting Google Lens scan for: {image_path}") 456 | start_total_time = time.time() 457 | 458 | result_dict, raw_data = await scan_image(image_path) 459 | 460 | end_total_time = time.time() 461 | logging.info( 462 | f"Total scan_image execution time: {end_total_time - start_total_time:.2f} sec." 463 | ) 464 | 465 | if result_dict: 466 | print("\n--- Google Lens Scan Result ---") 467 | print( 468 | json.dumps(result_dict, indent=2, ensure_ascii=False) 469 | ) # Output result as JSON 470 | print("------------------------------") 471 | else: 472 | print("\nGoogle Lens scan failed.") 473 | logging.error(f"Scan failed. Details: {raw_data}") 474 | 475 | 476 | if __name__ == "__main__": 477 | if sys.platform == "win32": 478 | asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) 479 | asyncio.run(main()) 480 | -------------------------------------------------------------------------------- /src/chrome_lens_py/cli/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import json 4 | import logging 5 | import os 6 | import sys 7 | 8 | from rich.console import Console 9 | from rich.logging import RichHandler 10 | from rich.table import Table 11 | from rich.text import Text 12 | 13 | from ..api import LensAPI 14 | from ..constants import ( 15 | DEFAULT_API_KEY, 16 | DEFAULT_CLIENT_REGION, 17 | DEFAULT_CLIENT_TIME_ZONE, 18 | DEFAULT_CONFIG_FILENAME, 19 | ) 20 | from ..exceptions import LensConfigError, LensException 21 | from ..utils.config_manager import ( 22 | build_app_config, 23 | get_default_config_dir, 24 | update_config_file_from_cli, 25 | ) 26 | from ..utils.general import is_image_file_supported 27 | from ..utils.sharex import copy_to_clipboard 28 | 29 | console = Console() 30 | 31 | 32 | def setup_logging(level_str: str = "WARNING"): 33 | log_level = getattr(logging, level_str.upper(), logging.WARNING) 34 | log_format = ( 35 | "[%(levelname)s] %(name)s:%(funcName)s:%(lineno)d - %(message)s" 36 | if log_level <= logging.DEBUG 37 | else "%(message)s" 38 | ) 39 | logging.basicConfig( 40 | level=log_level, 41 | format=log_format, 42 | handlers=[ 43 | RichHandler( 44 | console=console, 45 | show_time=False, 46 | show_level=log_level <= logging.INFO, 47 | show_path=log_level <= logging.DEBUG, 48 | markup=True, 49 | rich_tracebacks=True, 50 | ) 51 | ], 52 | ) 53 | if log_level > logging.DEBUG: 54 | logging.getLogger("httpx").setLevel(logging.WARNING) 55 | logging.debug(f"Logging level set to {level_str.upper()}") 56 | 57 | 58 | def print_help(): 59 | console.print("\n[bold cyan]Google Lens CLI (chrome-lens-py)[/bold cyan]") 60 | console.print("Performs OCR and optional translation on an image.") 61 | 62 | table = Table(show_header=False, box=None, padding=(0, 2)) 63 | table.add_column(style="green") 64 | table.add_column() 65 | table.add_row("Usage:", "lens_scan [ocr_lang] [options]") 66 | table.add_row("\n[bold]Arguments:[/bold]") 67 | table.add_row(" image_source", "Path to an image file, a URL, or a directory.") 68 | table.add_row( 69 | " ocr_lang", 70 | "BCP 47 language code for OCR (e.g., 'en', 'ja'). If omitted, auto-detection is attempted.", 71 | ) 72 | table.add_row("\n[bold]Translation Options:[/bold]") 73 | table.add_row( 74 | " -t, --translate TARGET_LANG", 75 | "Target language for translation (e.g., 'en', 'ru').", 76 | ) 77 | table.add_row( 78 | " --translate-from SOURCE_LANG", 79 | "Source language for translation (auto-detected if omitted).", 80 | ) 81 | table.add_row( 82 | " -to, --translate-out FILE_PATH", 83 | "Save the image with translated text overlaid.", 84 | ) 85 | table.add_row("\n[bold]Output and Config Options:[/bold]") 86 | table.add_row( 87 | " -b, --output-blocks", 88 | "Output OCR text as segmented blocks (useful for comics).", 89 | ) 90 | table.add_row( 91 | " -ol, --output-lines", 92 | "Output OCR text as individual lines with their geometry.", 93 | ) 94 | table.add_row( 95 | " --get-coords", 96 | "Output recognized words with their coordinates in JSON format.", 97 | ) 98 | table.add_row( 99 | " -q, --quiet", 100 | "Suppress informational messages and headers, printing only the final result data.", 101 | ) 102 | table.add_row( 103 | " -sx, --sharex", "Copy the result (translation or OCR) to the clipboard." 104 | ) 105 | table.add_row( 106 | " --ocr-single-line", 107 | "Join all OCR text into a single line (preserves line breaks by default).", 108 | ) 109 | table.add_row( 110 | " --config-file FILE_PATH", "Path to a custom JSON configuration file." 111 | ) 112 | table.add_row( 113 | " --update-config", "Update the default config file with CLI arguments." 114 | ) 115 | table.add_row(" --font FONT_PATH", "Path to a .ttf font file for the overlay.") 116 | table.add_row(" --font-size SIZE", "Font size for the overlay (default: 20).") 117 | table.add_row("\n[bold]Advanced & Debug Options:[/bold]") 118 | table.add_row(" --api-key KEY", "Google Cloud API key (overrides config).") 119 | table.add_row( 120 | " --proxy URL", 121 | "Proxy server URL (e.g., http://user:pass@host:port, socks5://host:port).", 122 | ) 123 | table.add_row(" --timeout SECONDS", "Request timeout in seconds (default: 60).") 124 | table.add_row( 125 | " --concurrency N", 126 | "Set the maximum number of concurrent requests (default: 5).", 127 | ) 128 | table.add_row( 129 | " --client-region REGION", 130 | f"Client region code (default: '{DEFAULT_CLIENT_REGION}').", 131 | ) 132 | table.add_row( 133 | " --client-time-zone TZ", 134 | f"Client time zone ID (default: '{DEFAULT_CLIENT_TIME_ZONE}').", 135 | ) 136 | table.add_row( 137 | " -l, --logging-level LEVEL", 138 | "Set logging level (DEBUG, INFO, WARNING, ERROR).", 139 | ) 140 | table.add_row(" -h, --help", "Show this help message and exit.") 141 | console.print(table) 142 | 143 | 144 | async def cli_main(): 145 | parser = argparse.ArgumentParser(description="Google Lens CLI", add_help=False) 146 | # Positional 147 | parser.add_argument( 148 | "image_source", nargs="?", help="Path to the image file, a URL, or a directory." 149 | ) 150 | parser.add_argument( 151 | "ocr_lang", nargs="?", default=None, help="BCP 47 code for OCR." 152 | ) 153 | # Translation 154 | parser.add_argument("-t", "--translate", dest="target_lang") 155 | parser.add_argument("--translate-from", dest="source_lang") 156 | parser.add_argument("-to", "--translate-out", dest="output_overlay_path") 157 | # Output & Config 158 | parser.add_argument( 159 | "-b", 160 | "--output-blocks", 161 | action="store_true", 162 | help="Output OCR text as segmented blocks.", 163 | ) 164 | parser.add_argument( 165 | "-ol", 166 | "--output-lines", 167 | action="store_true", 168 | help="Output OCR text as individual lines.", 169 | ) 170 | parser.add_argument( 171 | "--get-coords", 172 | action="store_true", 173 | help="Output word coordinates in JSON format.", 174 | ) 175 | parser.add_argument( 176 | "-q", 177 | "--quiet", 178 | action="store_true", 179 | help="Suppress informational messages, printing only result data.", 180 | ) 181 | parser.add_argument("-sx", "--sharex", action="store_true") 182 | parser.add_argument( 183 | "--ocr-single-line", 184 | action="store_false", 185 | dest="ocr_preserve_line_breaks", 186 | default=None, 187 | ) 188 | parser.add_argument("--config-file", dest="config_file_path_override") 189 | parser.add_argument("--update-config", action="store_true") 190 | parser.add_argument("--font", dest="font_path") 191 | parser.add_argument("--font-size", type=int) 192 | # Advanced 193 | parser.add_argument("--api-key") 194 | parser.add_argument("--proxy") 195 | parser.add_argument("--timeout", type=int) 196 | parser.add_argument( 197 | "--concurrency", 198 | type=int, 199 | default=5, 200 | help="Maximum number of concurrent requests.", 201 | ) 202 | parser.add_argument("--client-region") 203 | parser.add_argument("--client-time-zone") 204 | # Meta 205 | parser.add_argument("-l", "--logging-level", dest="logging_level") 206 | parser.add_argument("-h", "--help", action="store_true") 207 | 208 | args = parser.parse_args() 209 | 210 | MAX_CONCURRENCY_HARD_LIMIT = 30 211 | CONCURRENCY_WARNING_THRESHOLD = 20 212 | 213 | if args.concurrency > MAX_CONCURRENCY_HARD_LIMIT: 214 | console.print( 215 | f"[bold red]Error:[/bold red] The concurrency value cannot be greater than {MAX_CONCURRENCY_HARD_LIMIT}." 216 | ) 217 | console.print("This is a security measure to prevent IP blocking.") 218 | sys.exit(1) 219 | 220 | if args.concurrency > CONCURRENCY_WARNING_THRESHOLD: 221 | console.print( 222 | f"[bold yellow]Warning:[/bold yellow] High concurrency value ({args.concurrency}) set." 223 | ) 224 | console.print( 225 | "This may result in a temporary block by Google. Use with caution." 226 | ) 227 | 228 | if args.help: 229 | print_help() 230 | return 231 | if not args.image_source: 232 | console.print( 233 | "[bold red]Error:[/bold red] The 'image_source' argument is required.\n" 234 | ) 235 | print_help() 236 | sys.exit(1) 237 | 238 | # Validate mutually exclusive output formats 239 | output_modes = [args.output_blocks, args.get_coords, args.output_lines] 240 | if sum(output_modes) > 1: 241 | console.print( 242 | "[bold red]Error:[/bold red] --output-blocks, --output-lines, and --get-coords cannot be used together." 243 | ) 244 | sys.exit(1) 245 | 246 | default_config_path = os.path.join( 247 | get_default_config_dir(), DEFAULT_CONFIG_FILENAME 248 | ) 249 | config_file_to_load = args.config_file_path_override or default_config_path 250 | 251 | try: 252 | app_config = build_app_config(vars(args), config_file_to_load) 253 | except LensConfigError as e: 254 | console.print(f"[bold red]Configuration Error:[/bold red] {e}") 255 | sys.exit(1) 256 | 257 | setup_logging(app_config.get("logging_level", "WARNING")) 258 | 259 | if os.path.exists(config_file_to_load): 260 | logging.info(f"Using config file: {config_file_to_load}") 261 | elif args.config_file_path_override: 262 | logging.warning( 263 | f"Specified config file not found: {args.config_file_path_override}" 264 | ) 265 | 266 | image_sources = [] 267 | if os.path.isdir(args.image_source): 268 | if not args.quiet: 269 | console.print(f"Processing directory: [cyan]{args.image_source}[/cyan]") 270 | for filename in sorted(os.listdir(args.image_source)): 271 | full_path = os.path.join(args.image_source, filename) 272 | if is_image_file_supported(full_path): 273 | image_sources.append(full_path) 274 | if not image_sources: 275 | console.print( 276 | f"[bold red]Error:[/bold red] No supported image files found in directory '{args.image_source}'." 277 | ) 278 | sys.exit(1) 279 | else: 280 | if not is_image_file_supported(args.image_source): 281 | console.print( 282 | f"[bold red]Error:[/bold red] Source '{args.image_source}' is not a valid URL or supported image file." 283 | ) 284 | sys.exit(1) 285 | image_sources.append(args.image_source) 286 | 287 | if args.update_config: 288 | if args.config_file_path_override: 289 | console.print( 290 | "[bold yellow]Warning:[/bold yellow] --update-config only affects the default config file." 291 | ) 292 | else: 293 | try: 294 | update_config_file_from_cli(vars(args), default_config_path) 295 | except LensConfigError as e: 296 | console.print(f"[bold red]Error updating config:[/bold red] {e}") 297 | 298 | api = LensAPI( 299 | api_key=app_config.get("api_key", DEFAULT_API_KEY), 300 | client_region=app_config.get("client_region"), 301 | client_time_zone=app_config.get("client_time_zone"), 302 | proxy=app_config.get("proxy"), 303 | timeout=app_config.get("timeout", 60), 304 | font_path=app_config.get("font_path"), 305 | font_size=app_config.get("font_size"), 306 | max_concurrent=args.concurrency, 307 | ) 308 | 309 | try: 310 | output_format = "full_text" 311 | if args.output_blocks: 312 | output_format = "blocks" 313 | elif args.output_lines: 314 | output_format = "lines" 315 | 316 | results_buffer = {} 317 | next_to_print = 0 318 | results_ready = asyncio.Condition() 319 | 320 | async def worker(queue): 321 | while True: 322 | index, path = await queue.get() 323 | try: 324 | 325 | try: 326 | result = await api.process_image( 327 | image_path=path, 328 | ocr_language=args.ocr_lang, 329 | target_translation_language=args.target_lang, 330 | source_translation_language=args.source_lang, 331 | output_overlay_path=args.output_overlay_path, 332 | ocr_preserve_line_breaks=app_config.get( 333 | "ocr_preserve_line_breaks", True 334 | ), 335 | output_format=output_format, 336 | ) 337 | except Exception as e: 338 | result = e 339 | 340 | async with results_ready: 341 | results_buffer[index] = result 342 | results_ready.notify() 343 | 344 | finally: 345 | queue.task_done() 346 | 347 | job_queue = asyncio.Queue() 348 | for i, path in enumerate(image_sources): 349 | job_queue.put_nowait((i, path)) 350 | 351 | worker_tasks = [ 352 | asyncio.create_task(worker(job_queue)) for _ in range(args.concurrency) 353 | ] 354 | 355 | while next_to_print < len(image_sources): 356 | async with results_ready: 357 | await results_ready.wait_for(lambda: next_to_print in results_buffer) 358 | 359 | result = results_buffer.pop(next_to_print) 360 | image_path = image_sources[next_to_print] 361 | 362 | if isinstance(result, Exception): 363 | console.print( 364 | f"\n- [bold red]({next_to_print + 1}/{len(image_sources)}) Error for: {os.path.basename(image_path)}[/bold red] -" 365 | ) 366 | console.print(f"[red]{result}[/red]") 367 | next_to_print += 1 368 | continue 369 | 370 | if len(image_sources) > 1 and not args.quiet: 371 | console.print( 372 | f"\n- [bold green]({next_to_print + 1}/{len(image_sources)}) Result for: {os.path.basename(image_path)}[/bold green] -" 373 | ) 374 | 375 | if args.get_coords: 376 | word_data = result.get("word_data") 377 | if not word_data: 378 | console.print("[]") 379 | next_to_print += 1 380 | continue # Continue to next image in batch 381 | 382 | processed_coords = [] 383 | for data in word_data: 384 | geom = data.get("geometry") 385 | processed_coords.append( 386 | { 387 | data["word"]: ( 388 | { 389 | "center_x": round(geom["center_x"], 4), 390 | "center_y": round(geom["center_y"], 4), 391 | "width": round(geom["width"], 4), 392 | "height": round(geom["height"], 4), 393 | "angle_deg": round(geom["angle_deg"], 2), 394 | } 395 | if geom 396 | else None 397 | ) 398 | } 399 | ) 400 | 401 | console.print( 402 | json.dumps(processed_coords, indent=2, ensure_ascii=False) 403 | ) 404 | 405 | elif args.output_lines: 406 | line_blocks = result.get("line_blocks", []) 407 | if not args.quiet: 408 | console.print( 409 | f"\n[bold green]OCR Results ({len(line_blocks)} lines):[/bold green]" 410 | ) 411 | if not line_blocks and not args.quiet: 412 | console.print("No lines found.") 413 | 414 | for j, line in enumerate(line_blocks): 415 | if not args.quiet: 416 | console.print(f"\n--- [cyan]Line #{j+1}[/cyan] ---") 417 | console.print(Text(line.get("text", ""))) 418 | 419 | translated_text = result.get("translated_text") 420 | if translated_text: 421 | if not args.quiet: 422 | console.print( 423 | "\n[bold green]Translated Text (Full):[/bold green]" 424 | ) 425 | console.print(Text(translated_text)) 426 | 427 | elif args.output_blocks: 428 | text_blocks = result.get("text_blocks", []) 429 | if not args.quiet: 430 | console.print( 431 | f"\n[bold green]OCR Results ({len(text_blocks)} blocks):[/bold green]" 432 | ) 433 | if not text_blocks and not args.quiet: 434 | console.print("No text blocks found.") 435 | 436 | for j, block in enumerate(text_blocks): 437 | if not args.quiet: 438 | console.print(f"\n--- [cyan]Block #{j+1}[/cyan] ---") 439 | console.print(Text(block.get("text", ""))) 440 | 441 | translated_text = result.get("translated_text") 442 | if translated_text: 443 | if not args.quiet: 444 | console.print( 445 | "\n[bold green]Translated Text (Full):[/bold green]" 446 | ) 447 | console.print(Text(translated_text)) 448 | 449 | else: # Default 'full_text' output 450 | ocr_text = result.get("ocr_text") 451 | if ocr_text: 452 | if not args.quiet: 453 | console.print("\n[bold green]OCR Results:[/bold green]") 454 | console.print(Text(ocr_text)) 455 | elif not args.quiet: 456 | console.print("\n[bold green]OCR Results:[/bold green]") 457 | console.print("No OCR text found.") 458 | 459 | translated_text = result.get("translated_text") 460 | if translated_text: 461 | if not args.quiet: 462 | console.print("\n[bold green]Translated Text:[/bold green]") 463 | console.print(Text(translated_text)) 464 | 465 | translated_text = result.get("translated_text") 466 | if args.target_lang and not translated_text and not args.quiet: 467 | console.print( 468 | "\n[yellow]Translation was requested but not found in the response.[/yellow]" 469 | ) 470 | 471 | if args.output_overlay_path and translated_text: 472 | if not args.quiet: 473 | console.print( 474 | f"\nImage with overlay saved to: [cyan]{args.output_overlay_path}[/cyan]" 475 | ) 476 | else: 477 | logging.info( 478 | f"Image with overlay saved to: {args.output_overlay_path}" 479 | ) 480 | 481 | if args.sharex: 482 | source_for_copy, text_to_copy = ("", "") 483 | # Prioritize translated text for copying 484 | if args.target_lang and translated_text: 485 | text_to_copy, source_for_copy = translated_text, "Translated text" 486 | elif args.output_blocks: 487 | blocks = result.get("text_blocks", []) 488 | if blocks: 489 | text_to_copy = "\n\n".join([b.get("text", "") for b in blocks]) 490 | source_for_copy = "OCR text (blocks)" 491 | else: 492 | ocr_text = result.get("ocr_text") 493 | if ocr_text: 494 | text_to_copy, source_for_copy = ocr_text, "OCR text" 495 | 496 | if text_to_copy: 497 | if copy_to_clipboard(text_to_copy): 498 | if not args.quiet: 499 | console.print( 500 | f"\n[bold magenta]({source_for_copy} copied to clipboard)[/bold magenta]" 501 | ) 502 | else: 503 | logging.info(f"{source_for_copy} copied to clipboard") 504 | else: 505 | # This is an error/warning, so it should probably stay visible 506 | console.print( 507 | "\n[bold red]Failed to copy text. Is 'pyperclip' installed? " 508 | '(`pip install "chrome-lens-py[clipboard]"`)[/bold red]' 509 | ) 510 | elif not args.quiet: 511 | console.print("\n[yellow]No text available to copy.[/yellow]") 512 | 513 | next_to_print += 1 514 | 515 | await job_queue.join() 516 | for task in worker_tasks: 517 | task.cancel() 518 | await asyncio.gather(*worker_tasks, return_exceptions=True) 519 | 520 | except LensException as e: 521 | console.print(f"\n[bold red]Lens API Error:[/bold red] {e}") 522 | sys.exit(1) 523 | 524 | 525 | def run(): 526 | if sys.platform == "win32" and sys.stdout.encoding != "utf-8": 527 | try: 528 | os.system("chcp 65001 > nul") 529 | logging.debug("Set Windows console to chcp 65001 (UTF-8)") 530 | except Exception as e: 531 | print(f"Warning: Failed to set console to UTF-8 (chcp 65001). Error: {e}") 532 | try: 533 | asyncio.run(cli_main()) 534 | except KeyboardInterrupt: 535 | console.print("\n[yellow]Operation cancelled by user.[/yellow]") 536 | 537 | 538 | if __name__ == "__main__": 539 | run() 540 | -------------------------------------------------------------------------------- /experiments/test.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import io 3 | import json 4 | import logging 5 | import os 6 | import sys 7 | import time 8 | from urllib.parse import parse_qs, urlparse 9 | 10 | import httpx 11 | 12 | try: 13 | import json5 14 | 15 | json_loader = json5.loads 16 | except ImportError: 17 | json_loader = json.loads 18 | 19 | logging.basicConfig( 20 | level=logging.INFO, 21 | format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", 22 | datefmt="%H:%M:%S", 23 | ) 24 | main_log = logging.getLogger("main") 25 | scan_log = logging.getLogger("scan_image") 26 | http_log = logging.getLogger("http_client") 27 | parse_log = logging.getLogger("parser") 28 | cookie_log = logging.getLogger("cookies") 29 | io_log = logging.getLogger("image_io") 30 | 31 | 32 | LENS_UPLOAD_ENDPOINT = "https://lens.google.com/v3/upload" 33 | LENS_METADATA_ENDPOINT = "https://lens.google.com/qfmetadata" 34 | HEADERS = { 35 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 36 | "Accept-Language": "ru", 37 | "Cache-Control": "max-age=0", 38 | "Sec-Ch-Ua": '"Not-A.Brand";v="8", "Chromium";v="135", "Google Chrome";v="135"', 39 | "Sec-Ch-Ua-Mobile": "?0", 40 | "Sec-Ch-Ua-Platform": '"Windows"', 41 | "Upgrade-Insecure-Requests": "1", 42 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", 43 | "X-Client-Data": "CIW2yQEIorbJAQipncoBCIH+ygEIkqHLAQiKo8sBCPWYzQEIhaDNAQji0M4BCLPTzgEI19TOAQjy1c4BCJLYzgEIwNjOAQjM2M4BGM7VzgE=", 44 | "Origin": "https://www.google.com", 45 | "Referer": "https://www.google.com/", 46 | "Sec-Fetch-Site": "same-site", 47 | "Sec-Fetch-Mode": "navigate", 48 | "Sec-Fetch-Dest": "document", 49 | "Sec-Fetch-User": "?1", 50 | "Priority": "u=0, i", 51 | "Accept-Encoding": "gzip, deflate, br", 52 | "Connection": "keep-alive", 53 | } 54 | COOKIE_FILE = "cookies_lens_test.json" 55 | 56 | 57 | async def read_image_data(image_path): 58 | """Reads image data from file.""" 59 | io_log.debug(f"Attempting to read image: {image_path}") 60 | start_time = time.perf_counter() 61 | try: 62 | with open(image_path, "rb") as f: 63 | data = f.read() 64 | end_time = time.perf_counter() 65 | io_log.debug( 66 | f"Read {len(data)} bytes from {image_path} in {end_time - start_time:.4f} sec." 67 | ) 68 | return data 69 | except FileNotFoundError: 70 | io_log.error(f"Image not found: {image_path}") 71 | return None 72 | except Exception as e: 73 | io_log.error(f"Error reading image {image_path}: {e}", exc_info=True) 74 | return None 75 | 76 | 77 | def extract_ids_from_url(url_string): 78 | """Extracts vsrid and lsessionid from URL.""" 79 | parse_log.debug(f"Attempting to extract IDs from URL: {url_string}") 80 | start_time = time.perf_counter() 81 | try: 82 | parsed_url = urlparse(url_string) 83 | query_params = parse_qs(parsed_url.query) 84 | vsrid = query_params.get("vsrid", [None])[0] 85 | lsessionid = query_params.get("lsessionid", [None])[0] 86 | end_time = time.perf_counter() 87 | if vsrid and lsessionid: 88 | parse_log.debug( 89 | f"Extracted vsrid='{vsrid}', lsessionid='{lsessionid}' in {end_time - start_time:.4f} sec." 90 | ) 91 | else: 92 | parse_log.warning( 93 | f"Could not extract vsrid or lsessionid from URL in {end_time - start_time:.4f} sec." 94 | ) 95 | return vsrid, lsessionid 96 | except Exception as e: 97 | parse_log.error( 98 | f"Error extracting IDs from URL {url_string}: {e}", exc_info=True 99 | ) 100 | return None, None 101 | 102 | 103 | async def save_cookies(cookies, cookie_file): 104 | """Saves cookies to JSON file.""" 105 | cookie_log.debug(f"Attempting to save cookies to {cookie_file}") 106 | start_time = time.perf_counter() 107 | try: 108 | cookies_dict = {} 109 | cookie_jar = getattr(cookies, "jar", cookies) 110 | if hasattr(cookie_jar, "items"): 111 | for name, value in cookie_jar.items(): 112 | cookie_obj = cookie_jar.get(name) 113 | if cookie_obj and hasattr(cookie_obj, "value"): 114 | cookies_dict[name] = cookie_obj.value 115 | elif isinstance(value, str): 116 | cookies_dict[name] = value 117 | elif hasattr(cookie_jar, "__iter__"): 118 | for cookie in cookie_jar: 119 | if hasattr(cookie, "name") and hasattr(cookie, "value"): 120 | cookies_dict[cookie.name] = cookie.value 121 | else: 122 | cookie_log.warning( 123 | f"Could not determine how to iterate cookies object: {type(cookies)}" 124 | ) 125 | return 126 | 127 | with open(cookie_file, "w") as f: 128 | json.dump(cookies_dict, f, indent=2) 129 | end_time = time.perf_counter() 130 | cookie_log.debug( 131 | f"Cookies saved ({len(cookies_dict)} items) to {cookie_file} in {end_time - start_time:.4f} sec." 132 | ) 133 | except Exception as e: 134 | cookie_log.error(f"Error saving cookies: {e}", exc_info=True) 135 | 136 | 137 | async def load_cookies(cookie_file): 138 | """Loads cookies from JSON file.""" 139 | cookie_log.debug(f"Attempting to load cookies from {cookie_file}") 140 | start_time = time.perf_counter() 141 | try: 142 | if os.path.exists(cookie_file): 143 | with open(cookie_file, "r") as f: 144 | cookies_dict = json.load(f) 145 | end_time = time.perf_counter() 146 | cookie_log.debug( 147 | f"Cookies loaded ({len(cookies_dict)} items) from {cookie_file} in {end_time - start_time:.4f} sec." 148 | ) 149 | return cookies_dict 150 | else: 151 | cookie_log.debug(f"Cookie file {cookie_file} not found.") 152 | return {} 153 | except (json.JSONDecodeError, FileNotFoundError) as e: 154 | cookie_log.warning( 155 | f"Error loading cookies from {cookie_file}: {e}. Ignoring cookies." 156 | ) 157 | except Exception as e: 158 | cookie_log.warning( 159 | f"Unexpected error loading cookies from {cookie_file}: {e}. Ignoring cookies." 160 | ) 161 | return {} 162 | 163 | 164 | def adaptive_parse_text_and_language(metadata_json): 165 | """ 166 | Adaptively parses JSON to extract language, text blocks, and word annotations. 167 | """ 168 | parse_log.info("Starting adaptive parsing of metadata JSON.") 169 | start_time = time.perf_counter() 170 | language = None 171 | all_word_annotations = [] 172 | reconstructed_blocks = [] 173 | 174 | try: 175 | if not isinstance(metadata_json, list) or not metadata_json: 176 | parse_log.error( 177 | "Invalid JSON structure: metadata_json is not a non-empty list." 178 | ) 179 | return None, [], [] 180 | response_container = next( 181 | ( 182 | item 183 | for item in metadata_json 184 | if isinstance(item, list) 185 | and item 186 | and item[0] == "fetch_query_formulation_metadata_response" 187 | ), 188 | None, 189 | ) 190 | if response_container is None: 191 | parse_log.error( 192 | "Could not find 'fetch_query_formulation_metadata_response' container." 193 | ) 194 | return None, [], [] 195 | parse_log.debug("'fetch_query_formulation_metadata_response' container found.") 196 | 197 | lang_start_time = time.perf_counter() 198 | try: 199 | if len(response_container) > 2 and isinstance(response_container[2], list): 200 | lang_section = response_container[2] 201 | language = next( 202 | ( 203 | element 204 | for element in lang_section 205 | if isinstance(element, str) and len(element) == 2 206 | ), 207 | None, 208 | ) 209 | if language: 210 | parse_log.debug( 211 | f"Found potential language code: '{language}' in {time.perf_counter() - lang_start_time:.4f} sec." 212 | ) 213 | else: 214 | parse_log.debug( 215 | f"No direct 2-char language code found in section [2] in {time.perf_counter() - lang_start_time:.4f} sec." 216 | ) 217 | else: 218 | parse_log.debug( 219 | f"Language section [2] not found or not a list in {time.perf_counter() - lang_start_time:.4f} sec." 220 | ) 221 | 222 | except (IndexError, TypeError, StopIteration): 223 | parse_log.warning( 224 | "Could not find language code using primary method.", exc_info=True 225 | ) 226 | 227 | parse_log.debug("Searching for text segments list...") 228 | segments_iterable = None 229 | possible_paths_to_segments_list = [ 230 | lambda rc: rc[2][0][0][0], 231 | lambda rc: rc[1][0][0][0], 232 | lambda rc: rc[2][0][0], 233 | ] 234 | path_names = ["[2][0][0][0]", "[1][0][0][0]", "[2][0][0]"] 235 | path_search_start = time.perf_counter() 236 | 237 | for i, path_func in enumerate(possible_paths_to_segments_list): 238 | path_name = path_names[i] 239 | parse_log.debug(f"Trying path ending with {path_name}...") 240 | try: 241 | candidate_iterable = path_func(response_container) 242 | if ( 243 | isinstance(candidate_iterable, list) 244 | and candidate_iterable 245 | and isinstance(candidate_iterable[0], list) 246 | ): 247 | try: 248 | first_segment = candidate_iterable[0] 249 | if len(first_segment) > 1 and isinstance( 250 | first_segment[1], list 251 | ): 252 | if ( 253 | first_segment[1] 254 | and isinstance(first_segment[1][0], list) 255 | and len(first_segment[1][0]) > 0 256 | and isinstance(first_segment[1][0][0], list) 257 | ): 258 | segments_iterable = candidate_iterable 259 | parse_log.debug( 260 | f"Segments list identified at path ending with {path_name}." 261 | ) 262 | break 263 | except (IndexError, TypeError) as e_check: 264 | parse_log.debug( 265 | f"Path {path_name} candidate structure check failed: {e_check}" 266 | ) 267 | pass 268 | except (IndexError, TypeError) as e_path: 269 | parse_log.debug(f"Path {path_name} access failed: {e_path}") 270 | pass 271 | 272 | parse_log.debug( 273 | f"Path search finished in {time.perf_counter() - path_search_start:.4f} sec." 274 | ) 275 | 276 | if segments_iterable is None: 277 | parse_log.error( 278 | f"Could not identify valid text segments list using known paths {path_names}. Full structure might have changed." 279 | ) 280 | return language, [], [] 281 | 282 | parse_log.info( 283 | f"Processing {len(segments_iterable)} potential text segments..." 284 | ) 285 | # segment_processing_start = time.perf_counter() 286 | 287 | for i, segment_list in enumerate(segments_iterable): 288 | segment_start_time = time.perf_counter() 289 | current_block_word_annotations = [] 290 | block_text_builder = io.StringIO() 291 | last_word_ends_with_space = False 292 | 293 | if not isinstance(segment_list, list): 294 | parse_log.warning( 295 | f"Skipping segment #{i}: Expected list, got {type(segment_list)}." 296 | ) 297 | continue 298 | 299 | try: 300 | if len(segment_list) > 1 and isinstance(segment_list[1], list): 301 | word_groups_list = segment_list[1] 302 | parse_log.debug( 303 | f"Segment #{i}: Found {len(word_groups_list)} word groups." 304 | ) 305 | 306 | for group_count, word_group in enumerate(word_groups_list, 1): 307 | try: 308 | if ( 309 | isinstance(word_group, list) 310 | and len(word_group) > 0 311 | and isinstance(word_group[0], list) 312 | ): 313 | 314 | word_list = word_group[0] 315 | parse_log.debug( 316 | f" Group {group_count}: Found {len(word_list)} words." 317 | ) 318 | 319 | if ( 320 | group_count > 1 321 | and block_text_builder.tell() > 0 322 | and not last_word_ends_with_space 323 | ): 324 | block_text_builder.write(" ") 325 | last_word_ends_with_space = True 326 | 327 | for word_idx, word_info in enumerate(word_list): 328 | try: 329 | if ( 330 | isinstance(word_info, list) 331 | and len(word_info) > 3 332 | and isinstance(word_info[1], str) 333 | and isinstance(word_info[2], str) 334 | and isinstance(word_info[3], list) 335 | and word_info[3] 336 | and isinstance(word_info[3][0], list) 337 | ): 338 | 339 | text = word_info[1] 340 | space_indicator = word_info[2] 341 | bbox = word_info[3][0] 342 | 343 | current_block_word_annotations.append( 344 | {"text": text, "bbox": bbox} 345 | ) 346 | 347 | block_text_builder.write(text) 348 | if space_indicator == " ": 349 | block_text_builder.write( 350 | space_indicator 351 | ) 352 | last_word_ends_with_space = True 353 | else: 354 | last_word_ends_with_space = False 355 | else: 356 | parse_log.warning( 357 | f"Segment #{i}, Group {group_count}, Word {word_idx}: Unexpected word_info structure or type: {word_info}" 358 | ) 359 | 360 | except (IndexError, TypeError) as e_word: 361 | parse_log.warning( 362 | f"Segment #{i}, Group {group_count}, Word {word_idx}: Error processing word_info: {e_word}. Data: {word_info}" 363 | ) 364 | pass 365 | else: 366 | parse_log.warning( 367 | f"Segment #{i}, Group {group_count}: Unexpected word_group structure: {word_group}" 368 | ) 369 | 370 | except (IndexError, TypeError) as e_group: 371 | parse_log.warning( 372 | f"Segment #{i}, Group {group_count}: Error processing word_group: {e_group}. Data: {word_group}" 373 | ) 374 | pass 375 | else: 376 | parse_log.warning( 377 | f"Segment #{i}: Word groups list structure segment_list[1] not found or invalid. Segment data: {segment_list}" 378 | ) 379 | except (IndexError, TypeError) as e_segment: 380 | parse_log.error( 381 | f"Segment #{i}: Error processing segment structure: {e_segment}. Data: {segment_list}", 382 | exc_info=True, 383 | ) 384 | except Exception as e_segment_unexpected: 385 | parse_log.error( 386 | f"Segment #{i}: Unexpected error processing segment: {e_segment_unexpected}", 387 | exc_info=True, 388 | ) 389 | 390 | reconstructed_text = ( 391 | block_text_builder.getvalue().rstrip(" ") 392 | if not last_word_ends_with_space 393 | else block_text_builder.getvalue() 394 | ) 395 | block_text_builder.close() 396 | 397 | segment_end_time = time.perf_counter() 398 | parse_log.debug( 399 | f"Segment #{i} processed in {segment_end_time - segment_start_time:.4f} sec. Text length: {len(reconstructed_text)}, Annotations: {len(current_block_word_annotations)}" 400 | ) 401 | 402 | if reconstructed_text or current_block_word_annotations: 403 | reconstructed_blocks.append(reconstructed_text) 404 | all_word_annotations.extend(current_block_word_annotations) 405 | else: 406 | parse_log.debug(f"Segment #{i} resulted in no text or annotations.") 407 | 408 | except Exception as e: 409 | parse_log.error( 410 | f"Critical error during adaptive text extraction: {e}", exc_info=True 411 | ) 412 | return language, reconstructed_blocks, all_word_annotations 413 | 414 | total_parse_time = time.perf_counter() - start_time 415 | parse_log.info( 416 | f"Adaptive parsing finished in {total_parse_time:.4f} sec. Language: '{language}'. Text blocks: {len(reconstructed_blocks)}. Word annotations: {len(all_word_annotations)}." 417 | ) 418 | return language, reconstructed_blocks, all_word_annotations 419 | 420 | 421 | async def scan_image(image_path): 422 | """Scans image via Google Lens, extracts text, language, and coordinates.""" 423 | scan_log.info(f"Starting image scan process for: {image_path}") 424 | total_scan_start_time = time.perf_counter() 425 | 426 | read_start = time.perf_counter() 427 | image_data = await read_image_data(image_path) 428 | read_end = time.perf_counter() 429 | scan_log.info(f"Image read finished in {read_end - read_start:.4f} sec.") 430 | if not image_data: 431 | return None, "Failed to read image data" 432 | 433 | filename = os.path.basename(image_path) 434 | _, ext = os.path.splitext(filename.lower()) 435 | content_type = "image/jpeg" 436 | if ext == ".png": 437 | content_type = "image/png" 438 | elif ext == ".webp": 439 | content_type = "image/webp" 440 | elif ext == ".gif": 441 | content_type = "image/gif" 442 | scan_log.debug(f"Determined filename: '{filename}', content type: {content_type}") 443 | 444 | files = {"encoded_image": (filename, image_data, content_type)} 445 | params_upload = { 446 | "hl": "ru", 447 | "re": "av", 448 | "vpw": "1903", 449 | "vph": "953", 450 | "ep": "gsbubb", 451 | "st": str(int(time.time() * 1000)), 452 | } 453 | 454 | cookie_load_start = time.perf_counter() 455 | loaded_cookies = await load_cookies(COOKIE_FILE) 456 | cookie_load_end = time.perf_counter() 457 | scan_log.info( 458 | f"Cookie loading finished in {cookie_load_end - cookie_load_start:.4f} sec." 459 | ) 460 | 461 | limits = httpx.Limits(max_keepalive_connections=5, max_connections=10) 462 | timeout = httpx.Timeout(60.0, connect=15.0) 463 | http_log.debug(f"Configuring httpx client: timeout={timeout}, limits={limits}") 464 | 465 | async with httpx.AsyncClient( 466 | cookies=loaded_cookies, 467 | follow_redirects=True, 468 | timeout=timeout, 469 | limits=limits, 470 | http2=True, 471 | verify=True, 472 | ) as client: 473 | try: 474 | http_log.info(f"POST request initiated to {LENS_UPLOAD_ENDPOINT}") 475 | upload_start_time = time.perf_counter() 476 | response_upload = await client.post( 477 | LENS_UPLOAD_ENDPOINT, headers=HEADERS, files=files, params=params_upload 478 | ) 479 | upload_end_time = time.perf_counter() 480 | http_log.info( 481 | f"POST request to {LENS_UPLOAD_ENDPOINT} finished in {upload_end_time - upload_start_time:.4f} sec. " 482 | f"Status: {response_upload.status_code}. Final URL: {response_upload.url}" 483 | ) 484 | 485 | cookie_save_start = time.perf_counter() 486 | await save_cookies(client.cookies, COOKIE_FILE) 487 | cookie_save_end = time.perf_counter() 488 | http_log.debug( 489 | f"Cookies saved after upload in {cookie_save_end - cookie_save_start:.4f} sec." 490 | ) 491 | 492 | response_upload.raise_for_status() 493 | 494 | final_url = str(response_upload.url) 495 | 496 | extract_start = time.perf_counter() 497 | vsrid, lsessionid = extract_ids_from_url(final_url) 498 | extract_end = time.perf_counter() 499 | scan_log.info( 500 | f"ID extraction finished in {extract_end - extract_start:.4f} sec." 501 | ) 502 | if not vsrid or not lsessionid: 503 | scan_log.error( 504 | "Failed to extract vsrid or lsessionid from upload redirect URL." 505 | ) 506 | return None, f"Failed to get session IDs from URL: {final_url}" 507 | 508 | scan_log.info("Waiting for 1 second before metadata request...") 509 | await asyncio.sleep(1) 510 | scan_log.info("Wait finished. Proceeding with metadata request.") 511 | 512 | metadata_params = { 513 | "vsrid": vsrid, 514 | "lsessionid": lsessionid, 515 | } 516 | metadata_headers = HEADERS.copy() 517 | metadata_headers.update( 518 | { 519 | "Accept": "*/*", 520 | "Referer": final_url, 521 | "Sec-Fetch-Site": "same-origin", 522 | "Sec-Fetch-Mode": "cors", 523 | "Sec-Fetch-Dest": "empty", 524 | "Priority": "u=1, i", 525 | } 526 | ) 527 | metadata_headers.pop("Upgrade-Insecure-Requests", None) 528 | metadata_headers.pop("Sec-Fetch-User", None) 529 | metadata_headers.pop("Cache-Control", None) 530 | metadata_headers.pop("Origin", None) 531 | 532 | metadata_url_obj = httpx.URL(LENS_METADATA_ENDPOINT, params=metadata_params) 533 | metadata_url_str = str(metadata_url_obj) 534 | http_log.info( 535 | f"GET request initiated to {LENS_METADATA_ENDPOINT} (URL: {metadata_url_str})" 536 | ) 537 | http_log.debug(f"Metadata request headers: {metadata_headers}") 538 | metadata_start_time = time.perf_counter() 539 | 540 | response_metadata = await client.get( 541 | metadata_url_obj, headers=metadata_headers 542 | ) 543 | metadata_end_time = time.perf_counter() 544 | http_log.info( 545 | f"GET request to {LENS_METADATA_ENDPOINT} finished in {metadata_end_time - metadata_start_time:.4f} sec. " 546 | f"Status: {response_metadata.status_code}" 547 | ) 548 | 549 | cookie_save_start = time.perf_counter() 550 | await save_cookies(client.cookies, COOKIE_FILE) 551 | cookie_save_end = time.perf_counter() 552 | http_log.debug( 553 | f"Cookies saved after metadata fetch in {cookie_save_end - cookie_save_start:.4f} sec." 554 | ) 555 | 556 | response_metadata.raise_for_status() 557 | 558 | parse_log.info("Starting metadata response processing.") 559 | process_start_time = time.perf_counter() 560 | 561 | response_text = response_metadata.text 562 | original_len = len(response_text) 563 | if response_text.startswith(")]}'\n"): 564 | response_text = response_text[5:] 565 | parse_log.debug("Removed ')]}'\\n prefix") 566 | elif response_text.startswith(")]}'"): 567 | response_text = response_text[4:] 568 | parse_log.debug("Removed ')]}' prefix") 569 | stripped_len = len(response_text) 570 | parse_log.debug(f"Response text length: {original_len} -> {stripped_len}") 571 | 572 | try: 573 | json_parse_start = time.perf_counter() 574 | metadata_json = json_loader(response_text) 575 | json_parse_end = time.perf_counter() 576 | parse_log.info( 577 | f"JSON parsing finished in {json_parse_end - json_parse_start:.4f} sec." 578 | ) 579 | 580 | # extract_start_time = time.perf_counter() 581 | language, reconstructed_blocks, all_word_annotations = ( 582 | adaptive_parse_text_and_language(metadata_json) 583 | ) 584 | # extract_end_time = time.perf_counter() 585 | 586 | full_text = "\n".join(reconstructed_blocks) 587 | 588 | result_data = { 589 | "text": full_text, 590 | "language": language if language else "und", 591 | "text_with_coordinates": json.dumps( 592 | all_word_annotations, ensure_ascii=False, indent=None 593 | ), 594 | } 595 | process_end_time = time.perf_counter() 596 | parse_log.info( 597 | f"Total metadata processing (strip + JSON parse + adaptive extract) finished in {process_end_time - process_start_time:.4f} sec." 598 | ) 599 | 600 | total_scan_end_time = time.perf_counter() 601 | scan_log.info( 602 | f"Image scan process completed successfully in {total_scan_end_time - total_scan_start_time:.4f} sec." 603 | ) 604 | return result_data, metadata_json 605 | 606 | except Exception as e_parse: 607 | parse_log.error( 608 | f"Error parsing JSON or extracting text: {e_parse}", exc_info=True 609 | ) 610 | log_snippet = ( 611 | response_text[:500] + "..." 612 | if len(response_text) > 500 613 | else response_text 614 | ) 615 | parse_log.error(f"Problematic text snippet (start): {log_snippet}") 616 | total_scan_end_time = time.perf_counter() 617 | scan_log.error( 618 | f"Image scan process failed during parsing/extraction after {total_scan_end_time - total_scan_start_time:.4f} sec." 619 | ) 620 | return None, response_metadata.text 621 | 622 | except httpx.HTTPStatusError as e: 623 | http_log.error( 624 | f"HTTP error: {e.response.status_code} for URL {e.request.url}", 625 | exc_info=True, 626 | ) 627 | try: 628 | body_snippet = ( 629 | e.response.text[:500] + "..." 630 | if len(e.response.text) > 500 631 | else e.response.text 632 | ) 633 | http_log.error(f"Response body snippet: {body_snippet}") 634 | except Exception: 635 | http_log.error("Could not read response body.") 636 | total_scan_end_time = time.perf_counter() 637 | scan_log.error( 638 | f"Image scan process failed due to HTTP error after {total_scan_end_time - total_scan_start_time:.4f} sec." 639 | ) 640 | return None, f"HTTP Error {e.response.status_code}: {e.request.url}" 641 | except httpx.RequestError as e: 642 | http_log.error(f"Request error for URL {e.request.url}: {e}", exc_info=True) 643 | total_scan_end_time = time.perf_counter() 644 | scan_log.error( 645 | f"Image scan process failed due to request error after {total_scan_end_time - total_scan_start_time:.4f} sec." 646 | ) 647 | return None, f"Request Error: {e}" 648 | except Exception as e: 649 | scan_log.error(f"Unexpected error in scan_image: {e}", exc_info=True) 650 | total_scan_end_time = time.perf_counter() 651 | scan_log.error( 652 | f"Image scan process failed unexpectedly after {total_scan_end_time - total_scan_start_time:.4f} sec." 653 | ) 654 | return None, f"Unexpected Error: {e}" 655 | 656 | 657 | async def main(): 658 | if len(sys.argv) < 2: 659 | print(f"Usage: python {sys.argv[0]} ") 660 | sys.exit(1) 661 | 662 | image_path = sys.argv[1] 663 | if not os.path.isfile(image_path): 664 | main_log.error(f"Error: File not found: {image_path}") 665 | sys.exit(1) 666 | 667 | main_log.info("========================================") 668 | main_log.info(f"Starting Google Lens scan for: {image_path}") 669 | main_log.info( 670 | f"Using log level: {logging.getLevelName(logging.getLogger().getEffectiveLevel())}" 671 | ) 672 | main_log.info("========================================") 673 | start_total_time = time.perf_counter() 674 | 675 | result_dict, raw_data_or_error = await scan_image(image_path) 676 | 677 | end_total_time = time.perf_counter() 678 | main_log.info( 679 | f"--- Total execution time for scan_image call: {end_total_time - start_total_time:.4f} sec. ---" 680 | ) 681 | 682 | if result_dict: 683 | print("\n--- Google Lens Scan Result ---") 684 | try: 685 | print(f"Language: {result_dict.get('language', 'N/A')}") 686 | print("\nText:") 687 | print(result_dict.get("text", "N/A")) 688 | print("\nText with Coordinates (JSON String):") 689 | coords_json_str = result_dict.get("text_with_coordinates", "[]") 690 | try: 691 | coords_data = json.loads(coords_json_str) 692 | print(json.dumps(coords_data, indent=2, ensure_ascii=False)) 693 | except json.JSONDecodeError: 694 | print(coords_json_str) 695 | print("------------------------------") 696 | main_log.info("Scan successful. Results printed.") 697 | except Exception as e: 698 | main_log.error(f"Error printing results: {e}") 699 | print("\n--- Raw Result Dictionary ---") 700 | print(result_dict) 701 | else: 702 | print("\nGoogle Lens scan failed.") 703 | main_log.error( 704 | f"Scan failed. See previous logs for details. Error context/data: {raw_data_or_error}" 705 | ) 706 | 707 | 708 | if __name__ == "__main__": 709 | if sys.platform == "win32" and sys.version_info >= (3, 8): 710 | asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) 711 | elif sys.platform == "win32": 712 | pass 713 | 714 | asyncio.run(main()) 715 | -------------------------------------------------------------------------------- /src/chrome_lens_py/utils/lens_betterproto.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # sources: lens_overlay_client_context.proto, lens_overlay_client_logs.proto, lens_overlay_client_platform.proto, lens_overlay_cluster_info.proto, lens_overlay_deep_gleam_data.proto, lens_overlay_document.proto, lens_overlay_filters.proto, lens_overlay_geometry.proto, lens_overlay_image_crop.proto, lens_overlay_image_data.proto, lens_overlay_interaction_request_metadata.proto, lens_overlay_knowledge_intent_query.proto, lens_overlay_knowledge_query.proto, lens_overlay_math_solver_query.proto, lens_overlay_message_set.proto, lens_overlay_overlay_object.proto, lens_overlay_payload.proto, lens_overlay_phase_latencies_metadata.proto, lens_overlay_platform.proto, lens_overlay_polygon.proto, lens_overlay_request_id.proto, lens_overlay_routing_info.proto, lens_overlay_selection_type.proto, lens_overlay_server.proto, lens_overlay_service_deps.proto, lens_overlay_stickiness_signals.proto, lens_overlay_surface.proto, lens_overlay_text.proto, lens_overlay_text_query.proto, lens_overlay_translate_stickiness_signals.proto, lens_overlay_video_context_input_params.proto, lens_overlay_video_params.proto, lens_overlay_visual_search_interaction_data.proto, lens_overlay_visual_search_interaction_log_data.proto 3 | # plugin: python-betterproto 4 | # This file has been @generated 5 | 6 | from dataclasses import dataclass 7 | from typing import List, Optional 8 | 9 | import betterproto 10 | 11 | 12 | class LensOverlayFilterType(betterproto.Enum): 13 | """Supported filter types.""" 14 | 15 | UNKNOWN_FILTER_TYPE = 0 16 | TRANSLATE = 2 17 | AUTO_FILTER = 7 18 | 19 | 20 | class Platform(betterproto.Enum): 21 | UNSPECIFIED = 0 22 | WEB = 3 23 | 24 | 25 | class Surface(betterproto.Enum): 26 | UNSPECIFIED = 0 27 | CHROMIUM = 4 28 | 29 | 30 | class LensRenderingEnvironment(betterproto.Enum): 31 | """The possible rendering environments.""" 32 | 33 | RENDERING_ENV_UNSPECIFIED = 0 34 | RENDERING_ENV_LENS_OVERLAY = 14 35 | 36 | 37 | class LensOverlayPhaseLatenciesMetadataImageType(betterproto.Enum): 38 | UNKNOWN = 0 39 | JPEG = 1 40 | PNG = 2 41 | WEBP = 3 42 | 43 | 44 | class LensOverlayClientLogsLensOverlayEntryPoint(betterproto.Enum): 45 | UNKNOWN_ENTRY_POINT = 0 46 | APP_MENU = 1 47 | PAGE_CONTEXT_MENU = 2 48 | IMAGE_CONTEXT_MENU = 3 49 | OMNIBOX_BUTTON = 4 50 | TOOLBAR_BUTTON = 5 51 | FIND_IN_PAGE = 6 52 | 53 | 54 | class ClientPlatform(betterproto.Enum): 55 | UNSPECIFIED = 0 56 | LENS_OVERLAY = 2 57 | 58 | 59 | class CoordinateType(betterproto.Enum): 60 | """Specifies the coordinate system used for geometry protos.""" 61 | 62 | UNSPECIFIED = 0 63 | """Unspecified default value, per proto best practice.""" 64 | 65 | NORMALIZED = 1 66 | """Normalized coordinates.""" 67 | 68 | IMAGE = 2 69 | """Image pixel coordinates.""" 70 | 71 | 72 | class PolygonVertexOrdering(betterproto.Enum): 73 | """Specifies the vertex ordering.""" 74 | 75 | VERTEX_ORDERING_UNSPECIFIED = 0 76 | CLOCKWISE = 1 77 | COUNTER_CLOCKWISE = 2 78 | 79 | 80 | class WritingDirection(betterproto.Enum): 81 | """The text reading order.""" 82 | 83 | LEFT_TO_RIGHT = 0 84 | RIGHT_TO_LEFT = 1 85 | TOP_TO_BOTTOM = 2 86 | 87 | 88 | class Alignment(betterproto.Enum): 89 | """The text alignment.""" 90 | 91 | DEFAULT_LEFT_ALIGNED = 0 92 | RIGHT_ALIGNED = 1 93 | CENTER_ALIGNED = 2 94 | 95 | 96 | class TextLayoutWordType(betterproto.Enum): 97 | TEXT = 0 98 | """Printed text.""" 99 | 100 | FORMULA = 1 101 | """Formula type, including mathematical or chemical formulas.""" 102 | 103 | 104 | class TranslationDataStatusCode(betterproto.Enum): 105 | UNKNOWN = 0 106 | SUCCESS = 1 107 | SERVER_ERROR = 2 108 | UNSUPPORTED_LANGUAGE_PAIR = 3 109 | SAME_LANGUAGE = 4 110 | UNKNOWN_SOURCE_LANGUAGE = 5 111 | INVALID_REQUEST = 6 112 | DEADLINE_EXCEEDED = 7 113 | EMPTY_TRANSLATION = 8 114 | NO_OP_TRANSLATION = 9 115 | 116 | 117 | class TranslationDataBackgroundImageDataFileFormat(betterproto.Enum): 118 | """File format of the bytes in background_image.""" 119 | 120 | UNKNOWN = 0 121 | RAW_BYTES_RGBA = 1 122 | PNG_RGBA = 2 123 | WEBP_RGBA = 3 124 | JPEG_RGB_PNG_MASK = 4 125 | 126 | 127 | class LensOverlayInteractionRequestMetadataType(betterproto.Enum): 128 | UNKNOWN = 0 129 | TAP = 1 130 | """User's tap on the screen.""" 131 | 132 | REGION = 2 133 | """User's region selection on the screenshot.""" 134 | 135 | TEXT_SELECTION = 3 136 | """User's text selection on the screenshot.""" 137 | 138 | REGION_SEARCH = 4 139 | """User selected a bounding box to region search.""" 140 | 141 | OBJECT_FULFILLMENT = 5 142 | """Requests selection and fulfillment of a specific object.""" 143 | 144 | CONTEXTUAL_SEARCH_QUERY = 9 145 | """User sent a query in the contextual search box.""" 146 | 147 | PDF_QUERY = 10 148 | """User sent a query about a pdf.""" 149 | 150 | WEBPAGE_QUERY = 11 151 | """User sent a query about a website.""" 152 | 153 | 154 | class OverlayObjectRenderingMetadataRenderType(betterproto.Enum): 155 | DEFAULT = 0 156 | GLEAM = 1 157 | 158 | 159 | class LensOverlaySelectionType(betterproto.Enum): 160 | """Possible selection types for Lens overlay.""" 161 | 162 | UNKNOWN_SELECTION_TYPE = 0 163 | TAP_ON_EMPTY = 1 164 | SELECT_TEXT_HIGHLIGHT = 3 165 | REGION_SEARCH = 7 166 | INJECTED_IMAGE = 10 167 | TAP_ON_REGION_GLEAM = 15 168 | MULTIMODAL_SEARCH = 18 169 | SELECT_TRANSLATED_TEXT = 21 170 | TAP_ON_OBJECT = 22 171 | MULTIMODAL_SUGGEST_TYPEAHEAD = 25 172 | MULTIMODAL_SUGGEST_ZERO_PREFIX = 26 173 | TRANSLATE_CHIP = 52 174 | SYMBOLIC_MATH_OBJECT = 53 175 | 176 | 177 | class PayloadRequestType(betterproto.Enum): 178 | """The type of the request the payload is sent in.""" 179 | 180 | REQUEST_TYPE_DEFAULT = 0 181 | """Unset Request type.""" 182 | 183 | REQUEST_TYPE_PDF = 1 184 | """Request is for PDF.""" 185 | 186 | REQUEST_TYPE_EARLY_PARTIAL_PDF = 3 187 | """Request is for partial PDF upload.""" 188 | 189 | REQUEST_TYPE_WEBPAGE = 2 190 | """Request is for webpage.""" 191 | 192 | 193 | class PayloadCompressionType(betterproto.Enum): 194 | """Possible compression types for content_data.""" 195 | 196 | UNCOMPRESSED = 0 197 | """Default value. File is not compressed.""" 198 | 199 | ZSTD = 1 200 | """ZSTD compression.""" 201 | 202 | 203 | class LensOverlayServerErrorErrorType(betterproto.Enum): 204 | UNKNOWN_TYPE = 0 205 | MISSING_REQUEST = 1 206 | 207 | 208 | class StickinessSignalsNamespace(betterproto.Enum): 209 | UNKNOWN = 0 210 | TRANSLATE_LITE = 56 211 | EDUCATION_INPUT = 79 212 | 213 | 214 | @dataclass(eq=False, repr=False) 215 | class AppliedFilter(betterproto.Message): 216 | """Supported filter types.""" 217 | 218 | filter_type: "LensOverlayFilterType" = betterproto.enum_field(1) 219 | translate: "AppliedFilterTranslate" = betterproto.message_field( 220 | 3, group="filter_payload" 221 | ) 222 | 223 | 224 | @dataclass(eq=False, repr=False) 225 | class AppliedFilterTranslate(betterproto.Message): 226 | target_language: str = betterproto.string_field(1) 227 | source_language: str = betterproto.string_field(2) 228 | 229 | 230 | @dataclass(eq=False, repr=False) 231 | class AppliedFilters(betterproto.Message): 232 | """Supported filter types.""" 233 | 234 | filter: List["AppliedFilter"] = betterproto.message_field(1) 235 | 236 | 237 | @dataclass(eq=False, repr=False) 238 | class LensOverlayClientContext(betterproto.Message): 239 | """Context information of the client sending the request.""" 240 | 241 | platform: "Platform" = betterproto.enum_field(1) 242 | """Required. Client platform.""" 243 | 244 | surface: "Surface" = betterproto.enum_field(2) 245 | """Optional. Client surface.""" 246 | 247 | locale_context: "LocaleContext" = betterproto.message_field(4) 248 | """Required. Locale specific context.""" 249 | 250 | app_id: str = betterproto.string_field(6) 251 | """ 252 | Required. Name of the package which sends the request to Lens Frontend. 253 | """ 254 | 255 | client_filters: "AppliedFilters" = betterproto.message_field(17) 256 | """Filters that are enabled on the client side.""" 257 | 258 | rendering_context: "RenderingContext" = betterproto.message_field(20) 259 | """The rendering context info.""" 260 | 261 | client_logging_data: "ClientLoggingData" = betterproto.message_field(23) 262 | """Logging data.""" 263 | 264 | 265 | @dataclass(eq=False, repr=False) 266 | class LocaleContext(betterproto.Message): 267 | """Describes locale context.""" 268 | 269 | language: str = betterproto.string_field(1) 270 | """The BCP 47 language tag used to identify the language of the client.""" 271 | 272 | region: str = betterproto.string_field(2) 273 | """The CLDR region tag used to identify the region of the client.""" 274 | 275 | time_zone: str = betterproto.string_field(3) 276 | """The CLDR time zone ID used to identify the timezone of the client.""" 277 | 278 | 279 | @dataclass(eq=False, repr=False) 280 | class RenderingContext(betterproto.Message): 281 | rendering_environment: "LensRenderingEnvironment" = betterproto.enum_field(2) 282 | """The rendering environment.""" 283 | 284 | 285 | @dataclass(eq=False, repr=False) 286 | class ClientLoggingData(betterproto.Message): 287 | """Contains data that can be used for logging purposes.""" 288 | 289 | is_history_eligible: bool = betterproto.bool_field(1) 290 | """Whether history is enabled.""" 291 | 292 | 293 | @dataclass(eq=False, repr=False) 294 | class LensOverlayPhaseLatenciesMetadata(betterproto.Message): 295 | """Phase latency metadata for the Lens Overlay.""" 296 | 297 | phase: List["LensOverlayPhaseLatenciesMetadataPhase"] = betterproto.message_field(1) 298 | 299 | 300 | @dataclass(eq=False, repr=False) 301 | class LensOverlayPhaseLatenciesMetadataPhase(betterproto.Message): 302 | """ 303 | Represents a single point in time during the image preprocessing flow. 304 | """ 305 | 306 | image_downscale_data: "LensOverlayPhaseLatenciesMetadataPhaseImageDownscaleData" = ( 307 | betterproto.message_field(3, group="phase_data") 308 | ) 309 | """Data specifically only relevant for IMAGE_DOWNSCALE_END PhaseType.""" 310 | 311 | image_encode_data: "LensOverlayPhaseLatenciesMetadataPhaseImageEncodeData" = ( 312 | betterproto.message_field(4, group="phase_data") 313 | ) 314 | """Data specifically only relevant for IMAGE_ENCODE_END PhaseType.""" 315 | 316 | 317 | @dataclass(eq=False, repr=False) 318 | class LensOverlayPhaseLatenciesMetadataPhaseImageDownscaleData(betterproto.Message): 319 | original_image_size: int = betterproto.int64_field(1) 320 | """The size of the original image, in pixels.""" 321 | 322 | downscaled_image_size: int = betterproto.int64_field(2) 323 | """The size of the downscaled image, in pixels.""" 324 | 325 | 326 | @dataclass(eq=False, repr=False) 327 | class LensOverlayPhaseLatenciesMetadataPhaseImageEncodeData(betterproto.Message): 328 | original_image_type: "LensOverlayPhaseLatenciesMetadataImageType" = ( 329 | betterproto.enum_field(1) 330 | ) 331 | """ 332 | The type of the original Image. This only applies to IMAGE_ENCODE_END 333 | PhaseTypes 334 | """ 335 | 336 | encoded_image_size_bytes: int = betterproto.int64_field(2) 337 | """The bytes size of the encoded image.""" 338 | 339 | 340 | @dataclass(eq=False, repr=False) 341 | class LensOverlayClientLogs(betterproto.Message): 342 | phase_latencies_metadata: "LensOverlayPhaseLatenciesMetadata" = ( 343 | betterproto.message_field(1) 344 | ) 345 | """ 346 | The phase latency metadata for any image preprocessing required for the 347 | request. 348 | """ 349 | 350 | lens_overlay_entry_point: "LensOverlayClientLogsLensOverlayEntryPoint" = ( 351 | betterproto.enum_field(2) 352 | ) 353 | """The Lens Overlay entry point used to access lens.""" 354 | 355 | paella_id: int = betterproto.uint64_field(3) 356 | """ 357 | A unique identifier for associating events logged by lens asynchronously. 358 | """ 359 | 360 | metrics_collection_disabled: bool = betterproto.bool_field(5) 361 | """Whether the user has disabled metrics collection.""" 362 | 363 | 364 | @dataclass(eq=False, repr=False) 365 | class LensOverlayRoutingInfo(betterproto.Message): 366 | """Information about where to route the request.""" 367 | 368 | server_address: str = betterproto.string_field(1) 369 | """Address to route the request to.""" 370 | 371 | cell_address: str = betterproto.string_field(3) 372 | """Cell to route the request to.""" 373 | 374 | blade_target: str = betterproto.string_field(2) 375 | """Blade target to route the request to.""" 376 | 377 | 378 | @dataclass(eq=False, repr=False) 379 | class LensOverlayClusterInfo(betterproto.Message): 380 | """The cluster info for a Lens Overlay session.""" 381 | 382 | server_session_id: str = betterproto.string_field(1) 383 | """ID for subsequent server requests.""" 384 | 385 | search_session_id: str = betterproto.string_field(2) 386 | """ID for subsequent search requests.""" 387 | 388 | routing_info: "LensOverlayRoutingInfo" = betterproto.message_field(6) 389 | """Info used for routing subsequent requests.""" 390 | 391 | 392 | @dataclass(eq=False, repr=False) 393 | class Polygon(betterproto.Message): 394 | """Information about a polygon.""" 395 | 396 | vertex: List["PolygonVertex"] = betterproto.message_field(1) 397 | vertex_ordering: "PolygonVertexOrdering" = betterproto.enum_field(2) 398 | coordinate_type: "CoordinateType" = betterproto.enum_field(3) 399 | """Specifies the coordinate type of vertices.""" 400 | 401 | 402 | @dataclass(eq=False, repr=False) 403 | class PolygonVertex(betterproto.Message): 404 | """Represents a single vertex in the polygon.""" 405 | 406 | x: float = betterproto.float_field(1) 407 | y: float = betterproto.float_field(2) 408 | 409 | 410 | @dataclass(eq=False, repr=False) 411 | class CenterRotatedBox(betterproto.Message): 412 | """Information about a center bounding box rotated around its center.""" 413 | 414 | center_x: float = betterproto.float_field(1) 415 | center_y: float = betterproto.float_field(2) 416 | width: float = betterproto.float_field(3) 417 | height: float = betterproto.float_field(4) 418 | rotation_z: float = betterproto.float_field(5) 419 | """ 420 | Clockwise rotation around the center in radians. The rotation angle is 421 | computed before normalizing the coordinates. 422 | """ 423 | 424 | coordinate_type: "CoordinateType" = betterproto.enum_field(6) 425 | """ 426 | Specifies the coordinate type of center and size. 427 | @note default is COORDINATE_TYPE_UNSPECIFIED, please initialize this value 428 | to NORMALIZED or IMAGE for Lens detection API usage. 429 | """ 430 | 431 | 432 | @dataclass(eq=False, repr=False) 433 | class Geometry(betterproto.Message): 434 | """Geometric shape(s) used for tracking and detection.""" 435 | 436 | bounding_box: "CenterRotatedBox" = betterproto.message_field(1) 437 | """Specifies the bounding box for this geometry.""" 438 | 439 | segmentation_polygon: List["Polygon"] = betterproto.message_field(5) 440 | """ 441 | Specifies the segmentation polygon. The vertices of the outer-boundaries 442 | are in clockwise, and the ones of inner-boundaries are in counter-clockwise 443 | ordering. 444 | """ 445 | 446 | 447 | @dataclass(eq=False, repr=False) 448 | class ZoomedCrop(betterproto.Message): 449 | """ 450 | A cropped and potentially re-scaled image region, rectangular subregion of a 451 | canonical image. 452 | """ 453 | 454 | crop: "CenterRotatedBox" = betterproto.message_field(1) 455 | """The cropped region of the parent image in parent coordinates.""" 456 | 457 | parent_width: int = betterproto.int32_field(2) 458 | """Width of the parent image.""" 459 | 460 | parent_height: int = betterproto.int32_field(3) 461 | """Height of the parent image.""" 462 | 463 | zoom: float = betterproto.float_field(4) 464 | """ 465 | The ratio of the pixel dimensions of the child image to the pixel 466 | dimensions of the 'crop' in parent coordinates. 467 | """ 468 | 469 | 470 | @dataclass(eq=False, repr=False) 471 | class Text(betterproto.Message): 472 | text_layout: "TextLayout" = betterproto.message_field(1) 473 | """Optional. Information describing the text.""" 474 | 475 | content_language: str = betterproto.string_field(2) 476 | """ 477 | Optional. Dominant content language of the text. Language 478 | code is CLDR/BCP-47. 479 | """ 480 | 481 | 482 | @dataclass(eq=False, repr=False) 483 | class TextLayout(betterproto.Message): 484 | """Nested text structure.""" 485 | 486 | paragraphs: List["TextLayoutParagraph"] = betterproto.message_field(1) 487 | """Optional. List of paragraphs in natural reading order.""" 488 | 489 | 490 | @dataclass(eq=False, repr=False) 491 | class TextLayoutWord(betterproto.Message): 492 | id: "TextEntityIdentifier" = betterproto.message_field(1) 493 | """Required. Unique id within TextLayout.""" 494 | 495 | plain_text: str = betterproto.string_field(2) 496 | """Optional. The text in a plain text.""" 497 | 498 | text_separator: Optional[str] = betterproto.string_field(3, optional=True) 499 | """ 500 | Optional. The text separator that should be appended after this word when 501 | it is concatenated with the subsequent word in the same or next 502 | line/paragraph into a single-line string. This is specified as optional 503 | because there is a distinction between the absence of a separator and 504 | the empty string as a separator. 505 | """ 506 | 507 | geometry: "Geometry" = betterproto.message_field(4) 508 | """Optional. The geometry of the word.""" 509 | 510 | type: "TextLayoutWordType" = betterproto.enum_field(5) 511 | """Optional. The type of this word.""" 512 | 513 | formula_metadata: "TextLayoutWordFormulaMetadata" = betterproto.message_field(6) 514 | """ 515 | Optional. Metadata for formulas. This is populated for entities of 516 | `type=FORMULA`. 517 | """ 518 | 519 | 520 | @dataclass(eq=False, repr=False) 521 | class TextLayoutWordFormulaMetadata(betterproto.Message): 522 | latex: str = betterproto.string_field(1) 523 | """ 524 | Optional. LaTeX representation of a formula. Can be the same as 525 | `plain_text`. Example: "\frac{2}{x}=y". The plain text 526 | representation of this is available in Word.plain_text. 527 | """ 528 | 529 | 530 | @dataclass(eq=False, repr=False) 531 | class TextLayoutLine(betterproto.Message): 532 | words: List["TextLayoutWord"] = betterproto.message_field(1) 533 | """Optional. List of words in natural reading order.""" 534 | 535 | geometry: "Geometry" = betterproto.message_field(2) 536 | """Optional. The geometry of the line.""" 537 | 538 | 539 | @dataclass(eq=False, repr=False) 540 | class TextLayoutParagraph(betterproto.Message): 541 | id: "TextEntityIdentifier" = betterproto.message_field(1) 542 | """Required. Unique id within TextLayout.""" 543 | 544 | lines: List["TextLayoutLine"] = betterproto.message_field(2) 545 | """ 546 | Optional. List of lines in natural reading order (see also 547 | `writing_direction`). 548 | """ 549 | 550 | geometry: "Geometry" = betterproto.message_field(3) 551 | """Optional. Geometry of the paragraph.""" 552 | 553 | writing_direction: "WritingDirection" = betterproto.enum_field(4) 554 | """Optional. The text writing direction (aka reading order).""" 555 | 556 | content_language: str = betterproto.string_field(5) 557 | """ 558 | Optional. BCP-47 language code of the dominant language in this 559 | paragraph. 560 | """ 561 | 562 | 563 | @dataclass(eq=False, repr=False) 564 | class TextEntityIdentifier(betterproto.Message): 565 | id: int = betterproto.int64_field(1) 566 | """ 567 | Required. Unique entity id used to reference (and match) text entities and 568 | ranges. 569 | """ 570 | 571 | 572 | @dataclass(eq=False, repr=False) 573 | class DeepGleamData(betterproto.Message): 574 | translation: "TranslationData" = betterproto.message_field( 575 | 10, group="rendering_oneof" 576 | ) 577 | visual_object_id: List[str] = betterproto.string_field(11) 578 | 579 | 580 | @dataclass(eq=False, repr=False) 581 | class TranslationData(betterproto.Message): 582 | status: "TranslationDataStatus" = betterproto.message_field(1) 583 | target_language: str = betterproto.string_field(2) 584 | source_language: str = betterproto.string_field(3) 585 | translation: str = betterproto.string_field(4) 586 | """The translated text.""" 587 | 588 | line: List["TranslationDataLine"] = betterproto.message_field(5) 589 | writing_direction: "WritingDirection" = betterproto.enum_field(7) 590 | """The original writing direction of the source text.""" 591 | 592 | alignment: "Alignment" = betterproto.enum_field(8) 593 | justified: bool = betterproto.bool_field(9) 594 | """Whether the text is justified.""" 595 | 596 | 597 | @dataclass(eq=False, repr=False) 598 | class TranslationDataStatus(betterproto.Message): 599 | code: "TranslationDataStatusCode" = betterproto.enum_field(1) 600 | 601 | 602 | @dataclass(eq=False, repr=False) 603 | class TranslationDataTextStyle(betterproto.Message): 604 | """ 605 | Style as the aggregation of the styles of the words in the original text. 606 | """ 607 | 608 | text_color: int = betterproto.uint32_field(1) 609 | """The foreground color of text in aRGB format.""" 610 | 611 | background_primary_color: int = betterproto.uint32_field(2) 612 | """The background color of text in aRGB format.""" 613 | 614 | 615 | @dataclass(eq=False, repr=False) 616 | class TranslationDataBackgroundImageData(betterproto.Message): 617 | """Properties of the image used to inpaint the source text.""" 618 | 619 | background_image: bytes = betterproto.bytes_field(1) 620 | """ 621 | Image bytes to inpaint the source text. Contains image bytes in the 622 | format specified in file_format. 623 | """ 624 | 625 | image_width: int = betterproto.int32_field(2) 626 | """Width of background_image in pixels.""" 627 | 628 | image_height: int = betterproto.int32_field(3) 629 | """Height of background_image in pixels.""" 630 | 631 | vertical_padding: float = betterproto.float_field(4) 632 | """ 633 | Vertical padding to apply to the text box before drawing the background 634 | image. Expressed as a fraction of the text box height, i.e. 1.0 means 635 | that the height should be doubled. Half of the padding should be added on 636 | the top and half on the bottom. 637 | """ 638 | 639 | horizontal_padding: float = betterproto.float_field(5) 640 | """ 641 | Horizontal padding to apply to the text box before drawing the background 642 | image. Expressed as a fraction of the text box height. Half of the 643 | padding should be added on the left and half on the right. 644 | """ 645 | 646 | file_format: "TranslationDataBackgroundImageDataFileFormat" = ( 647 | betterproto.enum_field(6) 648 | ) 649 | text_mask: bytes = betterproto.bytes_field(7) 650 | """Text mask for the generated background image.""" 651 | 652 | 653 | @dataclass(eq=False, repr=False) 654 | class TranslationDataLine(betterproto.Message): 655 | start: int = betterproto.int32_field(1) 656 | """ 657 | A substring from the translation from start to end (exclusive), 658 | that needs to be distributed on this line, measured in Unicode 659 | characters. If not set, the Line doesn't have any translation. 660 | """ 661 | 662 | end: int = betterproto.int32_field(2) 663 | style: "TranslationDataTextStyle" = betterproto.message_field(3) 664 | word: List["TranslationDataLineWord"] = betterproto.message_field(5) 665 | background_image_data: "TranslationDataBackgroundImageData" = ( 666 | betterproto.message_field(9) 667 | ) 668 | """Background image data is set only when inpainting is computed.""" 669 | 670 | 671 | @dataclass(eq=False, repr=False) 672 | class TranslationDataLineWord(betterproto.Message): 673 | start: int = betterproto.int32_field(1) 674 | """ 675 | A substring from the translation from start to end (exclusive), 676 | representing a word (without separator), measured in Unicode 677 | characters. 678 | """ 679 | 680 | end: int = betterproto.int32_field(2) 681 | 682 | 683 | @dataclass(eq=False, repr=False) 684 | class LensOverlayDocument(betterproto.Message): 685 | """ 686 | Top-level PDF representation extracted using Pdfium. 687 | Next ID: 6 688 | """ 689 | 690 | pages: List["Page"] = betterproto.message_field(1) 691 | """Ordered pdf pages.""" 692 | 693 | 694 | @dataclass(eq=False, repr=False) 695 | class Page(betterproto.Message): 696 | """ 697 | Represents a single page of the PDF. 698 | Next ID: 10 699 | """ 700 | 701 | page_number: int = betterproto.int32_field(1) 702 | """Page number in the pdf (indexed starting at 1).""" 703 | 704 | text_segments: List[str] = betterproto.string_field(4) 705 | """List of text segments of the page.""" 706 | 707 | 708 | @dataclass(eq=False, repr=False) 709 | class ClientImage(betterproto.Message): 710 | """Image data from the client.""" 711 | 712 | image_content: bytes = betterproto.bytes_field(1) 713 | """Required. A byte array encoding an image.""" 714 | 715 | 716 | @dataclass(eq=False, repr=False) 717 | class ImageCrop(betterproto.Message): 718 | """User-selected / auto-detected cropped image region.""" 719 | 720 | crop_id: str = betterproto.string_field(1) 721 | """The ID of the cropped image region.""" 722 | 723 | image: "ClientImage" = betterproto.message_field(2) 724 | """The image content of the cropped image region.""" 725 | 726 | zoomed_crop: "ZoomedCrop" = betterproto.message_field(3) 727 | """The zoomed crop properties of the cropped image region.""" 728 | 729 | 730 | @dataclass(eq=False, repr=False) 731 | class ImageData(betterproto.Message): 732 | """ 733 | Data representing image. Contains image bytes or image retrieval identifier. 734 | """ 735 | 736 | payload: "ImagePayload" = betterproto.message_field(1) 737 | """Image payload to process. This contains image bytes.""" 738 | 739 | image_metadata: "ImageMetadata" = betterproto.message_field(3) 740 | """Required. Context of the given image.""" 741 | 742 | significant_regions: List["Geometry"] = betterproto.message_field(4) 743 | """The bounds of significant regions in the image.""" 744 | 745 | 746 | @dataclass(eq=False, repr=False) 747 | class ImagePayload(betterproto.Message): 748 | image_bytes: bytes = betterproto.bytes_field(1) 749 | """Required. Image byte array.""" 750 | 751 | 752 | @dataclass(eq=False, repr=False) 753 | class ImageMetadata(betterproto.Message): 754 | width: int = betterproto.int32_field(1) 755 | """ 756 | Required. Image width in pixels. Should reflect the actual size of 757 | image_bytes. 758 | """ 759 | 760 | height: int = betterproto.int32_field(2) 761 | """ 762 | Required. Image height in pixels. Should reflect the actual size of 763 | image_bytes. 764 | """ 765 | 766 | 767 | @dataclass(eq=False, repr=False) 768 | class TextQuery(betterproto.Message): 769 | """Contains an unstructured text query to add to an image query.""" 770 | 771 | query: str = betterproto.string_field(1) 772 | """The unstructured text query, such as "blue" or "blouse".""" 773 | 774 | is_primary: bool = betterproto.bool_field(2) 775 | 776 | 777 | @dataclass(eq=False, repr=False) 778 | class LensOverlayInteractionRequestMetadata(betterproto.Message): 779 | """Metadata associated with an interaction request.""" 780 | 781 | type: "LensOverlayInteractionRequestMetadataType" = betterproto.enum_field(1) 782 | selection_metadata: "LensOverlayInteractionRequestMetadataSelectionMetadata" = ( 783 | betterproto.message_field(2) 784 | ) 785 | query_metadata: "LensOverlayInteractionRequestMetadataQueryMetadata" = ( 786 | betterproto.message_field(4) 787 | ) 788 | 789 | 790 | @dataclass(eq=False, repr=False) 791 | class LensOverlayInteractionRequestMetadataSelectionMetadata(betterproto.Message): 792 | """ 793 | Metadata related to the selection associated with this interaction request. 794 | """ 795 | 796 | point: "LensOverlayInteractionRequestMetadataSelectionMetadataPoint" = ( 797 | betterproto.message_field(1, group="selection") 798 | ) 799 | region: "LensOverlayInteractionRequestMetadataSelectionMetadataRegion" = ( 800 | betterproto.message_field(2, group="selection") 801 | ) 802 | object: "LensOverlayInteractionRequestMetadataSelectionMetadataObject" = ( 803 | betterproto.message_field(3, group="selection") 804 | ) 805 | 806 | 807 | @dataclass(eq=False, repr=False) 808 | class LensOverlayInteractionRequestMetadataSelectionMetadataPoint(betterproto.Message): 809 | x: float = betterproto.float_field(1) 810 | y: float = betterproto.float_field(2) 811 | 812 | 813 | @dataclass(eq=False, repr=False) 814 | class LensOverlayInteractionRequestMetadataSelectionMetadataRegion(betterproto.Message): 815 | region: "CenterRotatedBox" = betterproto.message_field(1) 816 | 817 | 818 | @dataclass(eq=False, repr=False) 819 | class LensOverlayInteractionRequestMetadataSelectionMetadataObject(betterproto.Message): 820 | object_id: str = betterproto.string_field(1) 821 | geometry: "Geometry" = betterproto.message_field(2) 822 | 823 | 824 | @dataclass(eq=False, repr=False) 825 | class LensOverlayInteractionRequestMetadataQueryMetadata(betterproto.Message): 826 | """Metadata related to query.""" 827 | 828 | text_query: "TextQuery" = betterproto.message_field(2) 829 | """The text query information.""" 830 | 831 | 832 | @dataclass(eq=False, repr=False) 833 | class TranslateStickinessSignals(betterproto.Message): 834 | """ 835 | Signals specific to queries coming from translate stickiness extension. 836 | """ 837 | 838 | translate_suppress_echo_for_sticky: bool = betterproto.bool_field(1) 839 | 840 | 841 | @dataclass(eq=False, repr=False) 842 | class FunctionCall(betterproto.Message): 843 | """A message representing the function call of an answers intent query.""" 844 | 845 | name: str = betterproto.string_field(1) 846 | """Name of this function call.""" 847 | 848 | argument: List["Argument"] = betterproto.message_field(2) 849 | """A list of arguments of this function call.""" 850 | 851 | signals: "FunctionCallSignals" = betterproto.message_field(4) 852 | """Signals at the function call level""" 853 | 854 | 855 | @dataclass(eq=False, repr=False) 856 | class FunctionCallSignals(betterproto.Message): 857 | """Signals at the function call level""" 858 | 859 | translate_stickiness_signals: "TranslateStickinessSignals" = ( 860 | betterproto.message_field(311378150) 861 | ) 862 | 863 | 864 | @dataclass(eq=False, repr=False) 865 | class Argument(betterproto.Message): 866 | """A message representing the function argument.""" 867 | 868 | name: str = betterproto.string_field(1) 869 | """Name of this argument.""" 870 | 871 | value: "ArgumentValue" = betterproto.message_field(2) 872 | """The value of this argument.""" 873 | 874 | 875 | @dataclass(eq=False, repr=False) 876 | class ArgumentValue(betterproto.Message): 877 | """A message representing the value of an argument.""" 878 | 879 | simple_value: "SimpleValue" = betterproto.message_field(3, group="value") 880 | 881 | 882 | @dataclass(eq=False, repr=False) 883 | class SimpleValue(betterproto.Message): 884 | """A message representing a simple literal value.""" 885 | 886 | string_value: str = betterproto.string_field(1, group="value") 887 | 888 | 889 | @dataclass(eq=False, repr=False) 890 | class Query(betterproto.Message): 891 | """A Query is a representation of the meaning of the user query.""" 892 | 893 | intent_query: "FunctionCall" = betterproto.message_field(56249026) 894 | 895 | 896 | @dataclass(eq=False, repr=False) 897 | class MathSolverQuery(betterproto.Message): 898 | math_input_equation: str = betterproto.string_field(3) 899 | 900 | 901 | @dataclass(eq=False, repr=False) 902 | class MessageSet(betterproto.Message): 903 | """This is proto2's version of MessageSet.""" 904 | 905 | message_set_extension: "Query" = betterproto.message_field(41401449) 906 | 907 | 908 | @dataclass(eq=False, repr=False) 909 | class OverlayObject(betterproto.Message): 910 | """Overlay Object.""" 911 | 912 | id: str = betterproto.string_field(1) 913 | """The id.""" 914 | 915 | geometry: "Geometry" = betterproto.message_field(2) 916 | """The object geometry.""" 917 | 918 | rendering_metadata: "OverlayObjectRenderingMetadata" = betterproto.message_field(8) 919 | """The rendering metadata for the object.""" 920 | 921 | interaction_properties: "OverlayObjectInteractionProperties" = ( 922 | betterproto.message_field(4) 923 | ) 924 | is_fulfilled: bool = betterproto.bool_field(9) 925 | """ 926 | Indicates to the client that this object is eligible to be an object 927 | fulfillment request. 928 | """ 929 | 930 | 931 | @dataclass(eq=False, repr=False) 932 | class OverlayObjectRenderingMetadata(betterproto.Message): 933 | """Rendering metadata for the object.""" 934 | 935 | render_type: "OverlayObjectRenderingMetadataRenderType" = betterproto.enum_field(1) 936 | 937 | 938 | @dataclass(eq=False, repr=False) 939 | class OverlayObjectInteractionProperties(betterproto.Message): 940 | select_on_tap: bool = betterproto.bool_field(1) 941 | """Whether an object can be tapped""" 942 | 943 | 944 | @dataclass(eq=False, repr=False) 945 | class LensOverlayRequestId(betterproto.Message): 946 | """ 947 | Request Id definition to support request sequencing and state lookup. 948 | """ 949 | 950 | uuid: int = betterproto.uint64_field(1) 951 | """A unique identifier for a sequence of related Lens requests.""" 952 | 953 | sequence_id: int = betterproto.int32_field(2) 954 | """ 955 | An id to indicate the order of the current request within a sequence of 956 | requests sharing the same uuid. Starts from 1, increments by 1 if there is 957 | a new request with the same uuid. 958 | """ 959 | 960 | image_sequence_id: int = betterproto.int32_field(3) 961 | """ 962 | An id to indicate the order of image payload sent within a sequence of 963 | requests sharing the same uuid. Starts from 1, increments by 1 if there is 964 | a new request with an image payload with the same uuid. 965 | Note, region search request does not increment this id. 966 | """ 967 | 968 | analytics_id: bytes = betterproto.bytes_field(4) 969 | """ 970 | Analytics ID for the Lens request. Will be updated on the initial request 971 | and once per interaction request. 972 | """ 973 | 974 | routing_info: "LensOverlayRoutingInfo" = betterproto.message_field(6) 975 | """Information about where to route the request.""" 976 | 977 | 978 | @dataclass(eq=False, repr=False) 979 | class LensOverlayRequestContext(betterproto.Message): 980 | """Request context for a Lens Overlay request.""" 981 | 982 | request_id: "LensOverlayRequestId" = betterproto.message_field(3) 983 | """Required. Identifiers for this request.""" 984 | 985 | client_context: "LensOverlayClientContext" = betterproto.message_field(4) 986 | """The client context for the request.""" 987 | 988 | 989 | @dataclass(eq=False, repr=False) 990 | class LensOverlayObjectsRequest(betterproto.Message): 991 | request_context: "LensOverlayRequestContext" = betterproto.message_field(1) 992 | """Required. Basic information and context for the request.""" 993 | 994 | image_data: "ImageData" = betterproto.message_field(3) 995 | """Required. Image Data to process.""" 996 | 997 | payload: "Payload" = betterproto.message_field(4) 998 | """ 999 | Optional. Data payload of the request. 1000 | TODO(b/359638436): Mark required when clients have migrated to use Payload 1001 | field. 1002 | """ 1003 | 1004 | 1005 | @dataclass(eq=False, repr=False) 1006 | class LensOverlayObjectsResponse(betterproto.Message): 1007 | overlay_objects: List["OverlayObject"] = betterproto.message_field(2) 1008 | """Overlay objects.""" 1009 | 1010 | text: "Text" = betterproto.message_field(3) 1011 | """Text.""" 1012 | 1013 | deep_gleams: List["DeepGleamData"] = betterproto.message_field(4) 1014 | """Gleams.""" 1015 | 1016 | cluster_info: "LensOverlayClusterInfo" = betterproto.message_field(7) 1017 | """The cluster info.""" 1018 | 1019 | 1020 | @dataclass(eq=False, repr=False) 1021 | class LensOverlayInteractionRequest(betterproto.Message): 1022 | request_context: "LensOverlayRequestContext" = betterproto.message_field(1) 1023 | """Basic information and context for the request.""" 1024 | 1025 | interaction_request_metadata: "LensOverlayInteractionRequestMetadata" = ( 1026 | betterproto.message_field(2) 1027 | ) 1028 | """Metadata associated with an interaction request.""" 1029 | 1030 | image_crop: "ImageCrop" = betterproto.message_field(3) 1031 | """The image crop data.""" 1032 | 1033 | 1034 | @dataclass(eq=False, repr=False) 1035 | class LensOverlayInteractionResponse(betterproto.Message): 1036 | encoded_response: str = betterproto.string_field(3) 1037 | 1038 | 1039 | @dataclass(eq=False, repr=False) 1040 | class Payload(betterproto.Message): 1041 | """Next ID: 9""" 1042 | 1043 | request_type: "PayloadRequestType" = betterproto.enum_field(6) 1044 | """Optional. The type of the request.""" 1045 | 1046 | image_data: "ImageData" = betterproto.message_field(2) 1047 | """ 1048 | Currently unset, use image_data in ObjectsRequest. 1049 | TODO(b/359638436): Move ObjectsRequest clients onto Payload.ImageData. 1050 | """ 1051 | 1052 | content_data: bytes = betterproto.bytes_field(3) 1053 | """ 1054 | Data for non-image payloads. May be sent with or without an image in the 1055 | image_data field. If content_data is set, content_type must also be set. 1056 | """ 1057 | 1058 | content_type: str = betterproto.string_field(4) 1059 | """ 1060 | The media type/MIME type of the data represented i content_data, e.g. 1061 | "application/pdf". If content_type is set, content_data should also be set. 1062 | """ 1063 | 1064 | page_url: str = betterproto.string_field(5) 1065 | """The page url this request was made on.""" 1066 | 1067 | partial_pdf_document: "LensOverlayDocument" = betterproto.message_field(7) 1068 | """ 1069 | The partially parsed PDF document. Used to get early suggest signals. This 1070 | is only set for REQUEST_TYPE_EARLY_PARTIAL_PDF. 1071 | """ 1072 | 1073 | compression_type: "PayloadCompressionType" = betterproto.enum_field(8) 1074 | """ 1075 | Compression format of content_data. Currently only used for PDF data. 1076 | """ 1077 | 1078 | 1079 | @dataclass(eq=False, repr=False) 1080 | class LensOverlayServerClusterInfoRequest(betterproto.Message): 1081 | """The cluster info request for a Lens Overlay session.""" 1082 | 1083 | enable_search_session_id: bool = betterproto.bool_field(1) 1084 | """ 1085 | Whether to return a search session id alongside the server session id. 1086 | """ 1087 | 1088 | 1089 | @dataclass(eq=False, repr=False) 1090 | class LensOverlayServerClusterInfoResponse(betterproto.Message): 1091 | server_session_id: str = betterproto.string_field(1) 1092 | """ID for subsequent server requests.""" 1093 | 1094 | search_session_id: str = betterproto.string_field(2) 1095 | """ID for subsequent search requests.""" 1096 | 1097 | routing_info: "LensOverlayRoutingInfo" = betterproto.message_field(3) 1098 | """The routing info for the server session.""" 1099 | 1100 | 1101 | @dataclass(eq=False, repr=False) 1102 | class LensOverlayServerError(betterproto.Message): 1103 | """ 1104 | An error encountered while handling a request. 1105 | Next ID: 2 1106 | """ 1107 | 1108 | error_type: "LensOverlayServerErrorErrorType" = betterproto.enum_field(1) 1109 | """The error type.""" 1110 | 1111 | 1112 | @dataclass(eq=False, repr=False) 1113 | class LensOverlayServerRequest(betterproto.Message): 1114 | """Next ID: 4""" 1115 | 1116 | objects_request: "LensOverlayObjectsRequest" = betterproto.message_field(1) 1117 | """Options for fetching objects.""" 1118 | 1119 | interaction_request: "LensOverlayInteractionRequest" = betterproto.message_field(2) 1120 | """Options for fetching interactions.""" 1121 | 1122 | client_logs: "LensOverlayClientLogs" = betterproto.message_field(3) 1123 | """Client logs for the request.""" 1124 | 1125 | 1126 | @dataclass(eq=False, repr=False) 1127 | class LensOverlayServerResponse(betterproto.Message): 1128 | """ 1129 | Response details for an LensOverlay request. 1130 | Next ID: 4 1131 | """ 1132 | 1133 | error: "LensOverlayServerError" = betterproto.message_field(1) 1134 | """The encountered error.""" 1135 | 1136 | objects_response: "LensOverlayObjectsResponse" = betterproto.message_field(2) 1137 | """The objects response.""" 1138 | 1139 | interaction_response: "LensOverlayInteractionResponse" = betterproto.message_field( 1140 | 3 1141 | ) 1142 | """The interaction response.""" 1143 | 1144 | 1145 | @dataclass(eq=False, repr=False) 1146 | class StickinessSignals(betterproto.Message): 1147 | id_namespace: "StickinessSignalsNamespace" = betterproto.enum_field(1) 1148 | interpretation: "MessageSet" = betterproto.message_field(28) 1149 | education_input_extension: "EducationInputExtension" = betterproto.message_field( 1150 | 121 1151 | ) 1152 | 1153 | 1154 | @dataclass(eq=False, repr=False) 1155 | class EducationInputExtension(betterproto.Message): 1156 | math_solver_query: "MathSolverQuery" = betterproto.message_field(1) 1157 | 1158 | 1159 | @dataclass(eq=False, repr=False) 1160 | class LensOverlayVideoContextInputParams(betterproto.Message): 1161 | url: str = betterproto.string_field(1) 1162 | """Url of the video.""" 1163 | 1164 | 1165 | @dataclass(eq=False, repr=False) 1166 | class LensOverlayVideoParams(betterproto.Message): 1167 | video_context_input_params: "LensOverlayVideoContextInputParams" = ( 1168 | betterproto.message_field(1) 1169 | ) 1170 | """Video context params from input.""" 1171 | 1172 | 1173 | @dataclass(eq=False, repr=False) 1174 | class LensOverlayVisualSearchInteractionLogData(betterproto.Message): 1175 | """Log data for a Lens Overlay visual search interaction.""" 1176 | 1177 | filter_data: "FilterData" = betterproto.message_field(1) 1178 | """Filter related metadata.""" 1179 | 1180 | user_selection_data: "UserSelectionData" = betterproto.message_field(2) 1181 | """User Selection metadata.""" 1182 | 1183 | is_parent_query: bool = betterproto.bool_field(3) 1184 | """Whether the query is a parent query.""" 1185 | 1186 | client_platform: "ClientPlatform" = betterproto.enum_field(4) 1187 | """The client platform this query was originated from.""" 1188 | 1189 | 1190 | @dataclass(eq=False, repr=False) 1191 | class FilterData(betterproto.Message): 1192 | """ 1193 | Filter data. 1194 | Next ID: 2 1195 | """ 1196 | 1197 | filter_type: "LensOverlayFilterType" = betterproto.enum_field(1) 1198 | """ 1199 | The filter type associated with this interaction (auto, translate, etc.). 1200 | """ 1201 | 1202 | 1203 | @dataclass(eq=False, repr=False) 1204 | class UserSelectionData(betterproto.Message): 1205 | """ 1206 | User selection data. 1207 | Next ID: 2 1208 | """ 1209 | 1210 | selection_type: "LensOverlaySelectionType" = betterproto.enum_field(1) 1211 | """ 1212 | The selection type associated with this interaction (e.g. region search). 1213 | """ 1214 | 1215 | 1216 | @dataclass(eq=False, repr=False) 1217 | class LensOverlayVisualSearchInteractionData(betterproto.Message): 1218 | """Metadata associated with a Lens Visual Search request.""" 1219 | 1220 | interaction_type: "LensOverlayInteractionRequestMetadataType" = ( 1221 | betterproto.enum_field(1) 1222 | ) 1223 | """The type of interaction.""" 1224 | 1225 | zoomed_crop: "ZoomedCrop" = betterproto.message_field(7) 1226 | """The selected region for this interaction, instead of the object id.""" 1227 | 1228 | object_id: str = betterproto.string_field(3) 1229 | """ 1230 | The selected object id for this interaction, instead of the zoomed crop. 1231 | Currently unsupported and should not be populated. 1232 | """ 1233 | 1234 | log_data: "LensOverlayVisualSearchInteractionLogData" = betterproto.message_field(5) 1235 | """Logging-specific data.""" 1236 | --------------------------------------------------------------------------------