├── gaze_ocr ├── __init__.py ├── dragonfly.py ├── eye_tracking.py ├── talon_adapter.py └── _gaze_ocr.py ├── .git-blame-ignore-revs ├── .gitattributes ├── .pre-commit-config.yaml ├── .github └── workflows │ ├── test.yml │ └── publish.yml ├── tests └── gaze_ocr_test.py ├── pyproject.toml ├── .gitignore ├── README.md └── LICENSE /gaze_ocr/__init__.py: -------------------------------------------------------------------------------- 1 | from ._gaze_ocr import * # noqa: F403 2 | -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Migrate code style to Black 2 | b423c2f49bd59e45c499d728b79143e4a5c63151 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Explicitly declare text files you want to always be normalized and converted 5 | # to native line endings on checkout. 6 | *.py text 7 | *.txt text 8 | *.rst text 9 | *.md text 10 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/astral-sh/ruff-pre-commit 5 | # Ruff version. 6 | rev: v0.12.1 7 | hooks: 8 | # Run the linter. 9 | - id: ruff-check 10 | args: [ --fix ] 11 | # Run the formatter. 12 | - id: ruff-format 13 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | permissions: 6 | contents: read 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | python-version: ["3.11", "3.12"] 13 | steps: 14 | - uses: actions/checkout@v5 15 | - name: Install uv 16 | uses: astral-sh/setup-uv@v6 17 | - name: Set up Python ${{ matrix.python-version }} 18 | run: uv python install ${{ matrix.python-version }} 19 | - name: Run tests 20 | run: uv run --no-sources pytest 21 | -------------------------------------------------------------------------------- /tests/gaze_ocr_test.py: -------------------------------------------------------------------------------- 1 | from gaze_ocr._gaze_ocr import _distance_squared 2 | 3 | 4 | def test_distance_squared(): 5 | # Test same point 6 | assert _distance_squared((0, 0), (0, 0)) == 0 7 | 8 | # Test horizontal distance 9 | assert _distance_squared((0, 0), (3, 0)) == 9 10 | assert _distance_squared((3, 0), (0, 0)) == 9 11 | 12 | # Test vertical distance 13 | assert _distance_squared((0, 0), (0, 4)) == 16 14 | assert _distance_squared((0, 4), (0, 0)) == 16 15 | 16 | # Test diagonal distance (3-4-5 triangle) 17 | assert _distance_squared((0, 0), (3, 4)) == 25 18 | assert _distance_squared((3, 4), (0, 0)) == 25 19 | 20 | # Test negative coordinates 21 | assert _distance_squared((-1, -1), (2, 3)) == 25 22 | assert _distance_squared((2, 3), (-1, -1)) == 25 23 | 24 | # Test with floating point coordinates 25 | assert abs(_distance_squared((1.5, 2.5), (4.5, 6.5)) - 25.0) < 1e-10 26 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | on: 3 | release: 4 | types: [created] 5 | 6 | permissions: 7 | contents: read 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: ["3.11", "3.12"] 15 | steps: 16 | - uses: actions/checkout@v5 17 | - name: Install uv 18 | uses: astral-sh/setup-uv@v6 19 | - name: Set up Python ${{ matrix.python-version }} 20 | run: uv python install ${{ matrix.python-version }} 21 | - name: Run tests 22 | run: uv run --no-sources pytest 23 | deploy: 24 | runs-on: ubuntu-latest 25 | needs: [test] 26 | environment: release 27 | permissions: 28 | id-token: write 29 | steps: 30 | - uses: actions/checkout@v5 31 | - name: Install uv 32 | uses: astral-sh/setup-uv@v6 33 | - name: Set up Python 34 | run: uv python install 35 | - name: Build 36 | run: uv build --no-sources 37 | - name: Publish 38 | uses: pypa/gh-action-pypi-publish@release/v1 39 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "gaze-ocr" 3 | version = "0.5.0" 4 | description = "Library for applying OCR to where the user is looking." 5 | readme = "README.md" 6 | authors = [{ name = "James Stout" }] 7 | requires-python = ">=3.11" 8 | dependencies = ["screen-ocr"] 9 | classifiers = [ 10 | "Programming Language :: Python :: 3", 11 | "License :: OSI Approved :: Apache Software License", 12 | "Operating System :: OS Independent", 13 | ] 14 | 15 | [project.optional-dependencies] 16 | dragonfly = ["dragonfly2", "pythonnet"] 17 | 18 | [project.urls] 19 | Homepage = "https://github.com/wolfmanstout/gaze-ocr" 20 | 21 | [build-system] 22 | requires = ["hatchling"] 23 | build-backend = "hatchling.build" 24 | 25 | [tool.uv.sources] 26 | screen-ocr = { path = "../screen_ocr", editable = true } 27 | 28 | [tool.ruff.lint] 29 | select = [ 30 | # pycodestyle 31 | "E", 32 | # Pyflakes 33 | "F", 34 | # pyupgrade 35 | "UP", 36 | # flake8-bugbear 37 | "B", 38 | # flake8-simplify 39 | "SIM", 40 | # isort 41 | "I", 42 | ] 43 | ignore = ["E501", "SIM105", "SIM116", "UP045"] 44 | 45 | [dependency-groups] 46 | dev = ["pytest>=8.4.1"] 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # VSCode settings 132 | .vscode/settings.json 133 | .claude/settings.local.json 134 | -------------------------------------------------------------------------------- /gaze_ocr/dragonfly.py: -------------------------------------------------------------------------------- 1 | # Avoid type checker confusion with the module name matching this file name. 2 | # A cleaner fix would be to rename this file, but that would be a breaking change. 3 | # pyright: reportAttributeAccessIssue=false, reportCallIssue=false 4 | import dragonfly 5 | 6 | 7 | class Mouse: 8 | def move(self, coordinates): 9 | dragonfly.Mouse("[{}, {}]".format(*coordinates)).execute() 10 | 11 | def click(self): 12 | dragonfly.Mouse("left").execute() 13 | 14 | 15 | class Keyboard: 16 | def __init__(self): 17 | self._shift = False 18 | 19 | def shift_down(self): 20 | dragonfly.Key("shift:down").execute() 21 | self._shift = True 22 | 23 | def shift_up(self): 24 | dragonfly.Key("shift:up").execute() 25 | self._shift = False 26 | 27 | def is_shift_down(self): 28 | return self._shift 29 | 30 | def left(self, n=1): 31 | dragonfly.Key(f"left:{n}").execute() 32 | 33 | def right(self, n=1): 34 | dragonfly.Key(f"right:{n}").execute() 35 | 36 | 37 | class Windows: 38 | def get_monitor_size(self): 39 | primary = dragonfly.Monitor.get_all_monitors()[0] 40 | return (primary.rectangle.dx, primary.rectangle.dy) 41 | 42 | def get_foreground_window_center(self): 43 | window_position = dragonfly.Window.get_foreground().get_position() 44 | return (window_position.x_center, window_position.y_center) 45 | 46 | 47 | class MoveCursorToWordAction(dragonfly.ActionBase): 48 | def __init__(self, controller, word, cursor_position="middle", *args, **kwargs): 49 | self.controller = controller 50 | self.word = word 51 | self.cursor_position = cursor_position 52 | super().__init__(*args, **kwargs) 53 | 54 | def _execute(self, data=None): 55 | dynamic_word = self.word 56 | if data: 57 | dynamic_word = self.word % data 58 | # On Windows, works best if cursor is slightly offset to the right. 59 | return ( 60 | self.controller.move_cursor_to_word( 61 | dynamic_word, self.cursor_position, click_offset_right=1 62 | ) 63 | or False 64 | ) 65 | 66 | 67 | class MoveTextCursorAction(dragonfly.ActionBase): 68 | def __init__(self, controller, word, cursor_position="middle", *args, **kwargs): 69 | self.controller = controller 70 | self.word = word 71 | self.cursor_position = cursor_position 72 | super().__init__(*args, **kwargs) 73 | 74 | def _execute(self, data=None): 75 | dynamic_word = self.word 76 | if data: 77 | dynamic_word = self.word % data 78 | # On Windows, works best if cursor is slightly offset to the right. 79 | return ( 80 | self.controller.move_text_cursor_to_word( 81 | dynamic_word, self.cursor_position, click_offset_right=1 82 | ) 83 | or False 84 | ) 85 | 86 | 87 | class SelectTextAction(dragonfly.ActionBase): 88 | def __init__( 89 | self, controller, start_word, end_word=None, for_deletion=False, *args, **kwargs 90 | ): 91 | self.controller = controller 92 | self.start_word = start_word 93 | self.end_word = end_word 94 | self.for_deletion = for_deletion 95 | super().__init__(*args, **kwargs) 96 | 97 | def _execute(self, data=None): 98 | dynamic_start_word = self.start_word 99 | dynamic_end_word = self.end_word 100 | if data: 101 | dynamic_start_word = self.start_word % data 102 | if self.end_word: 103 | try: 104 | dynamic_end_word = self.end_word % data 105 | except KeyError: 106 | dynamic_end_word = None 107 | # On Windows, works best if cursor is slightly offset to the right. 108 | return ( 109 | self.controller.select_text( 110 | dynamic_start_word, 111 | dynamic_end_word, 112 | for_deletion=self.for_deletion, 113 | click_offset_right=1, 114 | ) 115 | or False 116 | ) 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gaze-ocr 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/gaze-ocr.svg)](https://pypi.org/project/gaze-ocr/) 4 | [![Changelog](https://img.shields.io/github/v/release/wolfmanstout/gaze-ocr?include_prereleases&label=changelog)](https://github.com/wolfmanstout/gaze-ocr/releases) 5 | [![Tests](https://github.com/wolfmanstout/gaze-ocr/actions/workflows/test.yml/badge.svg)](https://github.com/wolfmanstout/gaze-ocr/actions/workflows/test.yml) 6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/wolfmanstout/gaze-ocr/blob/master/LICENSE) 7 | 8 | The `gaze-ocr` package makes easy to apply OCR to wherever the user is looking. 9 | This library is designed for voice control. The following instructions are for 10 | use with Dragonfly. For Talon, see 11 | https://github.com/wolfmanstout/talon-gaze-ocr. See handsfreecoding.org for more 12 | information about this package. 13 | 14 | ## Installation 15 | 16 | 1. Follow [instructions for installing 17 | screen-ocr](https://github.com/wolfmanstout/screen-ocr). 18 | 2. Download the [latest 19 | Tobii.Interaction](https://www.nuget.org/packages/Tobii.Interaction/) package 20 | from NuGet (these instructions have been tested on 0.7.3). 21 | 3. Rename the file extension to .zip and expand the contents. 22 | 4. Copy these 3 DLLs to a directory of your choice: 23 | build/AnyCPU/Tobii.EyeX.Client.dll, lib/net45/Tobii.Interaction.Model.dll, 24 | lib/net45/Tobii.Interaction.Net.dll. 25 | 5. Ensure that the files are not blocked (right-click Properties, and if there 26 | is a "Security" section at the bottom, check the "Unblock" box.) 27 | 6. `pip install gaze-ocr[dragonfly]` 28 | 29 | ## Usage 30 | 31 | Provide the path to the DLL directory when constructing an EyeTracker instance. 32 | 33 | Sample Dragonfly grammar: 34 | 35 | ```python 36 | import gaze_ocr 37 | import gaze_ocr.dragonfly 38 | import gaze_ocr.eye_tracking 39 | import screen_ocr 40 | 41 | from dragonfly import ( 42 | Dictation, 43 | Grammar, 44 | Key, 45 | MappingRule, 46 | Mouse, 47 | Text 48 | ) 49 | 50 | # See installation instructions: 51 | # https://github.com/wolfmanstout/gaze-ocr 52 | DLL_DIRECTORY = "c:/Users/james/Downloads/tobii.interaction.0.7.3/" 53 | 54 | # Initialize eye tracking and OCR. 55 | tracker = gaze_ocr.eye_tracking.EyeTracker.get_connected_instance(DLL_DIRECTORY, 56 | mouse=gaze_ocr.dragonfly.Mouse(), 57 | keyboard=gaze_ocr.dragonfly.Keyboard(), 58 | windows=gaze_ocr.dragonfly.Windows()) 59 | ocr_reader = screen_ocr.Reader.create_fast_reader() 60 | gaze_ocr_controller = gaze_ocr.Controller(ocr_reader, 61 | tracker, 62 | mouse=gaze_ocr.dragonfly.Mouse(), 63 | keyboard=gaze_ocr.dragonfly.Keyboard()) 64 | 65 | 66 | class CommandRule(MappingRule): 67 | mapping = { 68 | # Click on text. 69 | " click": gaze_ocr.dragonfly.MoveCursorToWordAction(gaze_ocr_controller, "%(text)s") + Mouse("left"), 70 | 71 | # Move the cursor for text editing. 72 | "go before ": gaze_ocr.dragonfly.MoveTextCursorAction(gaze_ocr_controller, "%(text)s", "before"), 73 | "go after ": gaze_ocr.dragonfly.MoveTextCursorAction(gaze_ocr_controller, "%(text)s", "after"), 74 | 75 | # Select text starting from the current position. 76 | "words before ": Key("shift:down") + gaze_ocr.dragonfly.MoveTextCursorAction(gaze_ocr_controller, "%(text)s", "before") + Key("shift:up"), 77 | "words after ": Key("shift:down") + gaze_ocr.dragonfly.MoveTextCursorAction(gaze_ocr_controller, "%(text)s", "after") + Key("shift:up"), 78 | 79 | # Select a phrase or range of text. 80 | "words [through ]": gaze_ocr.dragonfly.SelectTextAction(gaze_ocr_controller, "%(text)s", "%(text2)s"), 81 | 82 | # Select and replace text. 83 | "replace with ": gaze_ocr.dragonfly.SelectTextAction(gaze_ocr_controller, "%(text)s") + Text("%(replacement)s"), 84 | } 85 | 86 | extras = [ 87 | Dictation("text"), 88 | Dictation("text2"), 89 | Dictation("replacement"), 90 | ] 91 | 92 | def _process_begin(self): 93 | # Start OCR now so that results are ready when the command completes. 94 | gaze_ocr_controller.start_reading_nearby() 95 | 96 | 97 | grammar = Grammar("ocr_test") 98 | grammar.add_rule(CommandRule()) 99 | grammar.load() 100 | 101 | 102 | # Unload function which will be called by natlink at unload time. 103 | def unload(): 104 | global grammar 105 | if grammar: grammar.unload() 106 | grammar = None 107 | ``` 108 | -------------------------------------------------------------------------------- /gaze_ocr/eye_tracking.py: -------------------------------------------------------------------------------- 1 | """Tobii eye tracker wrapper.""" 2 | 3 | import sys 4 | 5 | 6 | class EyeTracker: 7 | _instance = None 8 | 9 | @classmethod 10 | def get_connected_instance(cls, *args, **kwargs): 11 | if not cls._instance: 12 | cls._instance = cls(*args, **kwargs) 13 | if not cls._instance.is_connected: 14 | cls._instance.connect() 15 | return cls._instance 16 | 17 | def __init__( 18 | self, 19 | tobii_dll_directory, 20 | mouse=None, 21 | keyboard=None, 22 | windows=None, 23 | ): 24 | if not mouse or not keyboard or not windows: 25 | raise RuntimeError( 26 | "Must provide keyboard, mouse, and windows implementation. " 27 | "Import gaze_ocr.dragonfly or gaze_ocr.talon and use Mouse(), Keyboard(), and Windows()" 28 | ) 29 | self._mouse = mouse 30 | self._keyboard = keyboard 31 | self._windows = windows 32 | # Attempt to load eye tracker DLLs. 33 | global clr, Action, Double, Host, GazeTracking 34 | try: 35 | import clr 36 | from System import Action, Double 37 | 38 | sys.path.append(tobii_dll_directory) 39 | clr.AddReference("Tobii.Interaction.Model") 40 | clr.AddReference("Tobii.Interaction.Net") 41 | from Tobii.Interaction import Host 42 | from Tobii.Interaction.Framework import GazeTracking 43 | 44 | self.is_mock = False 45 | except Exception: 46 | print("Eye tracking libraries are unavailable.") 47 | self.is_mock = True 48 | self._host = None 49 | self._gaze_point = None 50 | self._gaze_state = None 51 | self._screen_scale = (1.0, 1.0) 52 | self._monitor_size = windows.get_monitor_size() 53 | self._head_rotation = None 54 | self.is_connected = False 55 | 56 | def connect(self): 57 | if self.is_mock: 58 | return 59 | self._host = Host() 60 | 61 | # Connect handlers. 62 | screen_bounds_state = self._host.States.CreateScreenBoundsObserver() 63 | screen_bounds_state.Changed += self._handle_screen_bounds 64 | gaze_state = self._host.States.CreateGazeTrackingObserver() 65 | gaze_state.Changed += self._handle_gaze_state 66 | gaze_points = self._host.Streams.CreateGazePointDataStream() 67 | action = Action[Double, Double, Double](self._handle_gaze_point) 68 | gaze_points.GazePoint(action) 69 | head_pose = self._host.Streams.CreateHeadPoseStream() 70 | head_pose.Next += self._handle_head_pose 71 | self.is_connected = True 72 | print("Eye tracker connected.") 73 | 74 | def disconnect(self): 75 | if not self.is_connected: 76 | return 77 | assert self._host is not None 78 | self._host.DisableConnection() 79 | self._host = None 80 | self._gaze_point = None 81 | self._gaze_state = None 82 | self.is_connected = False 83 | print("Eye tracker disconnected.") 84 | 85 | def _handle_screen_bounds(self, sender, state): 86 | if not state.IsValid: 87 | print("Ignoring invalid screen bounds.") 88 | return 89 | bounds = state.Value 90 | monitor_size = self._windows.get_monitor_size() 91 | self._screen_scale = ( 92 | monitor_size[0] / float(bounds.Width), 93 | monitor_size[1] / float(bounds.Height), 94 | ) 95 | self._monitor_size = monitor_size 96 | 97 | def _handle_gaze_state(self, sender, state): 98 | if not state.IsValid: 99 | print("Ignoring invalid gaze state.") 100 | return 101 | self._gaze_state = state.Value 102 | 103 | def _handle_gaze_point(self, x, y, timestamp): 104 | self._gaze_point = (x, y, timestamp) 105 | 106 | def _handle_head_pose(self, sender, stream_data): 107 | pose = stream_data.Data 108 | self._head_rotation = ( 109 | pose.HeadRotation.X, 110 | pose.HeadRotation.Y, 111 | pose.HeadRotation.Z, 112 | ) 113 | self._head_position = ( 114 | pose.HeadPosition.X, 115 | pose.HeadPosition.Y, 116 | pose.HeadPosition.Z, 117 | ) 118 | 119 | def has_gaze_point(self): 120 | return ( 121 | not self.is_mock 122 | and self._gaze_state == GazeTracking.GazeTracked 123 | and self._gaze_point 124 | ) 125 | 126 | def get_gaze_point(self): 127 | if self.has_gaze_point(): 128 | assert self._gaze_point is not None 129 | assert self._screen_scale is not None 130 | return ( 131 | self._gaze_point[0] * self._screen_scale[0], 132 | self._gaze_point[1] * self._screen_scale[1], 133 | ) 134 | else: 135 | return None 136 | 137 | def get_gaze_point_or_default(self): 138 | return self.get_gaze_point() or self._windows.get_foreground_window_center() 139 | 140 | def get_monitor_size(self): 141 | return self._monitor_size 142 | 143 | def move_to_gaze_point(self, offset=(0, 0)): 144 | gaze = self.get_gaze_point_or_default() 145 | x = max(0, int(gaze[0]) + offset[0]) 146 | y = max(0, int(gaze[1]) + offset[1]) 147 | self._mouse.move((x, y)) 148 | -------------------------------------------------------------------------------- /gaze_ocr/talon_adapter.py: -------------------------------------------------------------------------------- 1 | import bisect 2 | import logging 3 | import time 4 | from collections import deque 5 | from dataclasses import dataclass 6 | from typing import Optional 7 | 8 | from talon import actions, tracking_system, ui 9 | from talon.track import tobii 10 | from talon.types import Point2d 11 | 12 | 13 | class Mouse: 14 | def move(self, coordinates): 15 | actions.mouse_move(*coordinates) 16 | 17 | def click(self): 18 | actions.mouse_click() 19 | 20 | 21 | class Keyboard: 22 | def __init__(self): 23 | # shift:down won't affect future keystrokes on Mac, so we track it ourselves. 24 | self._shift = False 25 | 26 | def shift_down(self): 27 | actions.key("shift:down") 28 | self._shift = True 29 | 30 | def shift_up(self): 31 | actions.key("shift:up") 32 | self._shift = False 33 | 34 | def is_shift_down(self): 35 | return self._shift 36 | 37 | def left(self, n=1): 38 | for _ in range(n): 39 | if self._shift: 40 | actions.key("shift-left") 41 | else: 42 | actions.key("left") 43 | 44 | def right(self, n=1): 45 | for _ in range(n): 46 | if self._shift: 47 | actions.key("shift-right") 48 | else: 49 | actions.key("right") 50 | 51 | 52 | class AppActions: 53 | def focus_at(self, x: int, y: int): 54 | """Focus the window at the given coordinates.""" 55 | actions.user.focus_at(x, y) 56 | 57 | def peek_left(self) -> Optional[str]: 58 | try: 59 | return actions.user.dictation_peek(True, False)[0] 60 | except KeyError: 61 | try: 62 | return actions.user.dictation_peek_left() 63 | # If action is unavailable (e.g. no knausj). 64 | except KeyError: 65 | logging.warning("Action user.dictation_peek is unavailable.") 66 | return None 67 | 68 | def peek_right(self) -> Optional[str]: 69 | try: 70 | return actions.user.dictation_peek(False, True)[1] 71 | except KeyError: 72 | try: 73 | return actions.user.dictation_peek_right() 74 | # If action is unavailable (e.g. no knausj). 75 | except KeyError: 76 | logging.warning("Action user.dictation_peek is unavailable.") 77 | return None 78 | 79 | 80 | @dataclass 81 | class BoundingBox: 82 | left: int 83 | right: int 84 | top: int 85 | bottom: int 86 | 87 | 88 | class TalonEyeTracker: 89 | STALE_GAZE_THRESHOLD_SECONDS = 0.1 90 | 91 | def __init__(self): 92 | # Keep approximately 10 seconds of frames on Tobii 5 93 | self._queue = deque(maxlen=1000) 94 | self.is_connected = False 95 | self.connect() 96 | 97 | def _on_gaze(self, frame: tobii.GazeFrame): 98 | if not frame or not frame.gaze: 99 | return 100 | self._queue.append(frame) 101 | 102 | def connect(self): 103 | if self.is_connected: 104 | return 105 | # !!! Using unstable private API that may break at any time !!! 106 | tracking_system.register("gaze", self._on_gaze) 107 | self.is_connected = True 108 | 109 | def disconnect(self): 110 | if not self.is_connected: 111 | return 112 | # !!! Using unstable private API that may break at any time !!! 113 | tracking_system.unregister("gaze", self._on_gaze) 114 | self.is_connected = False 115 | 116 | def has_gaze_point(self): 117 | if not self._queue: 118 | return False 119 | return ( 120 | self._queue[-1].ts > time.perf_counter() - self.STALE_GAZE_THRESHOLD_SECONDS 121 | ) 122 | 123 | def get_gaze_point(self): 124 | if not self.has_gaze_point(): 125 | return None 126 | return self._gaze_to_pixels(self._queue[-1].gaze) 127 | 128 | def get_gaze_point_or_default(self): 129 | return self.get_gaze_point() or tuple(ui.active_window().rect.center) 130 | 131 | def get_gaze_bounds_during_time_range(self, start_timestamp, end_timestamp): 132 | if not self._queue: 133 | print("No gaze history available") 134 | return None 135 | start_index = bisect.bisect_left( 136 | self._queue, start_timestamp, key=lambda f: f.ts 137 | ) 138 | if start_index == len(self._queue): 139 | start_index -= 1 140 | end_index = bisect.bisect_left(self._queue, end_timestamp, key=lambda f: f.ts) 141 | if end_index == len(self._queue): 142 | end_index -= 1 143 | left = right = top = bottom = None 144 | for i in range(start_index, end_index + 1): 145 | frame = self._queue[i] 146 | if frame.ts < start_timestamp - 0.1 or frame.ts > end_timestamp + 0.1: 147 | continue 148 | left = min(frame.gaze.x, left) if left is not None else frame.gaze.x 149 | top = min(frame.gaze.y, top) if top is not None else frame.gaze.y 150 | right = max(frame.gaze.x, right) if right is not None else frame.gaze.x 151 | bottom = max(frame.gaze.y, bottom) if bottom is not None else frame.gaze.y 152 | if left is None or right is None or top is None or bottom is None: 153 | assert left is None 154 | assert right is None 155 | assert top is None 156 | assert bottom is None 157 | return None 158 | top_left = self._gaze_to_pixels(Point2d(x=left, y=top)) 159 | bottom_right = self._gaze_to_pixels(Point2d(x=right, y=bottom)) 160 | return BoundingBox( 161 | left=top_left[0], 162 | top=top_left[1], 163 | right=bottom_right[0], 164 | bottom=bottom_right[1], 165 | ) 166 | 167 | @staticmethod 168 | def _gaze_to_pixels(gaze): 169 | rect = ui.main_screen().rect 170 | pos = rect.pos + gaze * rect.size 171 | pos = rect.clamp(pos) 172 | return (pos.x, pos.y) 173 | 174 | def move_to_gaze_point(self, offset=(0, 0)): 175 | gaze = self.get_gaze_point_or_default() 176 | x = gaze[0] + offset[0] 177 | y = gaze[1] + offset[1] 178 | actions.mouse_move(x, y) 179 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /gaze_ocr/_gaze_ocr.py: -------------------------------------------------------------------------------- 1 | """Library for manipulating on-screen text using gaze tracking and OCR. 2 | 3 | Supports disambiguation using Python generators to stop and resume computation. The *_generator 4 | functions return a generator which can be started with next(generator), which will return a list of 5 | matches if disambiguation is needed. Resume computation with generator.send(match). When computation 6 | completes, next() or send() will raise StopIteration with the .value set to the return value. 7 | """ 8 | 9 | import os.path 10 | import time 11 | from collections.abc import Callable, Generator, Sequence 12 | from concurrent import futures 13 | from dataclasses import dataclass, field 14 | from enum import Enum, auto 15 | from typing import Any, Optional, TypeVar, cast 16 | 17 | from screen_ocr import Reader, ScreenContents, WordLocation 18 | 19 | T = TypeVar("T") 20 | 21 | 22 | @dataclass 23 | class CursorLocation: 24 | base_coordinates: tuple[int, int] 25 | visual_coordinates: tuple[int, int] 26 | # Move cursor to the right if True, left if False. 27 | move_cursor_right: bool 28 | move_distance: int 29 | move_past_whitespace_left: bool 30 | move_past_whitespace_right: bool 31 | text_height: int 32 | # Lambda to get click offset (resolved after focus) 33 | click_offset_right: Optional[Callable[[], int]] 34 | 35 | mouse: Any = field(repr=False, compare=False) 36 | keyboard: Any = field(repr=False, compare=False) 37 | app_actions: Any = field(repr=False, compare=False) 38 | 39 | def _focus_and_get_final_coordinates(self) -> tuple[int, int]: 40 | """Focus window and return coordinates with offset applied.""" 41 | # Focus at base coordinates before resolving offset 42 | if self.app_actions: 43 | self.app_actions.focus_at(*self.base_coordinates) 44 | # Resolve offset after focus (to get correct app-specific offset) 45 | offset = self.click_offset_right() if self.click_offset_right else 0 46 | return (self.base_coordinates[0] + offset, self.base_coordinates[1]) 47 | 48 | def move_mouse_cursor(self): 49 | final_coordinates = self._focus_and_get_final_coordinates() 50 | self.mouse.move(final_coordinates) 51 | 52 | def move_text_cursor(self): 53 | final_coordinates = self._focus_and_get_final_coordinates() 54 | self.mouse.move(final_coordinates) 55 | self.mouse.click() 56 | # Needed to avoid selection issues on Mac. 57 | time.sleep(0.01) 58 | if self.move_distance: 59 | if self.move_cursor_right: 60 | self.keyboard.right(self.move_distance) 61 | else: 62 | self.keyboard.left(self.move_distance) 63 | if ( 64 | self.move_past_whitespace_left 65 | and not self.keyboard.is_shift_down() 66 | and self.app_actions 67 | ): 68 | left_chars = self.app_actions.peek_left() 69 | # Check that there is actually a space adjacent (not a newline). Google docs 70 | # represents a newline as newline followed by space, so we handle that case as 71 | # well. 72 | if ( 73 | len(left_chars) >= 2 74 | and left_chars[-1] == " " 75 | and left_chars[-2] != "\n" 76 | ): 77 | self.keyboard.left(1) 78 | if ( 79 | self.move_past_whitespace_right 80 | and not self.keyboard.is_shift_down() 81 | and self.app_actions 82 | ): 83 | right_chars = self.app_actions.peek_right() 84 | if right_chars and right_chars[0] == " ": 85 | self.keyboard.right(1) 86 | 87 | 88 | class EyeTrackerFallback(Enum): 89 | MAIN_SCREEN = auto() 90 | ACTIVE_WINDOW = auto() 91 | 92 | 93 | class OcrCache: 94 | def __init__( 95 | self, 96 | ocr_reader: Reader, 97 | fallback_when_no_eye_tracker: EyeTrackerFallback = EyeTrackerFallback.MAIN_SCREEN, 98 | ): 99 | self.ocr_reader = ocr_reader 100 | self._last_time_range = None 101 | self._last_screen_contents = None 102 | self.fallback_when_no_eye_tracker = fallback_when_no_eye_tracker 103 | 104 | def read( 105 | self, 106 | time_range: tuple[float, float], 107 | bounding_box: Optional[tuple[int, int, int, int]], 108 | ): 109 | if ( 110 | self._last_time_range 111 | and time_range[0] >= self._last_time_range[0] 112 | and time_range[1] <= self._last_time_range[1] 113 | ): 114 | # Assume that bounding box is a subset if the time range is a subset. 115 | # Don't update the cache, in case multiple subsets are requested. 116 | assert self._last_screen_contents is not None 117 | if bounding_box: 118 | return self._last_screen_contents.cropped(bounding_box) 119 | else: 120 | return self._last_screen_contents 121 | else: 122 | self._last_time_range = time_range 123 | if bounding_box: 124 | self._last_screen_contents = self.ocr_reader.read_screen(bounding_box) 125 | else: 126 | if ( 127 | self.fallback_when_no_eye_tracker 128 | == EyeTrackerFallback.ACTIVE_WINDOW 129 | ): 130 | self._last_screen_contents = self.ocr_reader.read_current_window() 131 | else: 132 | self._last_screen_contents = self.ocr_reader.read_screen() 133 | return self._last_screen_contents 134 | 135 | 136 | class Controller: 137 | """Mediates interaction with gaze tracking and OCR. 138 | 139 | Provide Mouse and Keyboard from gaze_ocr.dragonfly or gaze_ocr.talon. AppActions is optional. 140 | """ 141 | 142 | WordLocationsPredicate = Callable[[Sequence[WordLocation]], bool] 143 | 144 | class SelectionPosition(Enum): 145 | NONE = auto() 146 | LEFT = auto() 147 | RIGHT = auto() 148 | 149 | def __init__( 150 | self, 151 | ocr_reader: Reader, 152 | eye_tracker, 153 | mouse, 154 | keyboard, 155 | app_actions=None, 156 | save_data_directory: Optional[str] = None, 157 | gaze_box_padding: int = 100, 158 | fallback_when_no_eye_tracker: EyeTrackerFallback = EyeTrackerFallback.MAIN_SCREEN, 159 | ): 160 | self.ocr_reader = ocr_reader 161 | self.eye_tracker = eye_tracker 162 | self.mouse = mouse 163 | self.keyboard = keyboard 164 | self.app_actions = app_actions 165 | self.save_data_directory = save_data_directory 166 | self.gaze_box_padding = gaze_box_padding 167 | self._executor = futures.ThreadPoolExecutor(max_workers=1) 168 | self._future = None 169 | self._ocr_cache = OcrCache( 170 | ocr_reader, fallback_when_no_eye_tracker=fallback_when_no_eye_tracker 171 | ) 172 | self.fallback_when_no_eye_tracker = fallback_when_no_eye_tracker 173 | 174 | def shutdown(self, wait=True): 175 | self._executor.shutdown(wait) 176 | 177 | def __enter__(self): 178 | return self 179 | 180 | def __exit__(self, exc_type, exc_val, exc_tb): 181 | self.shutdown(wait=True) 182 | return False 183 | 184 | @staticmethod 185 | def _resolve_value(value: Callable[[], T] | T) -> T: 186 | """Resolve a value that may be a callable or direct value.""" 187 | return cast(T, value() if callable(value) else value) 188 | 189 | @staticmethod 190 | def _as_callable(value: Callable[[], T] | T) -> Callable[[], T]: 191 | """Convert a value or callable to a callable.""" 192 | if callable(value): 193 | return cast(Callable[[], T], value) 194 | return lambda: cast(T, value) 195 | 196 | def start_reading_nearby(self) -> None: 197 | """Start OCR nearby the gaze point in a background thread.""" 198 | gaze_point = ( 199 | self.eye_tracker.get_gaze_point() 200 | if self.eye_tracker and self.eye_tracker.is_connected 201 | else None 202 | ) 203 | # Don't enqueue multiple requests. 204 | if self._future and not self._future.done(): 205 | self._future.cancel() 206 | self._future = self._executor.submit( 207 | (lambda: self.ocr_reader.read_nearby(gaze_point)) 208 | if gaze_point 209 | else (lambda: self.ocr_reader.read_screen()) 210 | ) 211 | 212 | def read_nearby( 213 | self, 214 | time_range: Optional[tuple[float, float]] = None, 215 | ) -> None: 216 | """Perform OCR nearby the gaze point in the current thread. 217 | 218 | Arguments: 219 | time_range: If specified, read within the bounds of gaze during that time. 220 | """ 221 | self._future = futures.Future() 222 | if time_range and time_range[0] and time_range[1]: 223 | start_timestamp, end_timestamp = time_range 224 | # Pad the range to account for timestamp inaccuracy. 225 | gaze_bounds = ( 226 | self.eye_tracker.get_gaze_bounds_during_time_range( 227 | start_timestamp - 0.5, end_timestamp + 0.5 228 | ) 229 | if self.eye_tracker and self.eye_tracker.is_connected 230 | else None 231 | ) 232 | if not gaze_bounds: 233 | self._future.set_result( 234 | self._ocr_cache.read((start_timestamp, end_timestamp), None) 235 | ) 236 | return 237 | ocr_bounds = ( 238 | gaze_bounds.left - self.gaze_box_padding, 239 | gaze_bounds.top - self.gaze_box_padding, 240 | gaze_bounds.right + self.gaze_box_padding, 241 | gaze_bounds.bottom + self.gaze_box_padding, 242 | ) 243 | self._future.set_result( 244 | self._ocr_cache.read((start_timestamp, end_timestamp), ocr_bounds) 245 | ) 246 | else: 247 | gaze_point = ( 248 | self.eye_tracker.get_gaze_point() 249 | if self.eye_tracker and self.eye_tracker.is_connected 250 | else None 251 | ) 252 | if gaze_point: 253 | self._future.set_result(self.ocr_reader.read_nearby(gaze_point)) 254 | else: 255 | if ( 256 | self.fallback_when_no_eye_tracker 257 | == EyeTrackerFallback.ACTIVE_WINDOW 258 | ): 259 | self._future.set_result(self.ocr_reader.read_current_window()) 260 | else: 261 | self._future.set_result(self.ocr_reader.read_screen()) 262 | 263 | def latest_screen_contents(self) -> ScreenContents: 264 | """Return the ScreenContents of the latest call to start_reading_nearby(). 265 | 266 | Blocks until available. 267 | """ 268 | if not self._future: 269 | raise RuntimeError( 270 | "Call start_reading_nearby() before latest_screen_contents()" 271 | ) 272 | return self._future.result() 273 | 274 | def move_cursor_to_words( 275 | self, 276 | words: str, 277 | cursor_position: str = "middle", 278 | time_range: Optional[tuple[float, float]] = None, 279 | click_offset_right: Callable[[], int] | int = 0, 280 | ) -> Optional[tuple[int, int]]: 281 | """Move the mouse cursor nearby the specified word or words. 282 | 283 | If successful, returns the new cursor coordinates. 284 | 285 | Arguments: 286 | words: The word or words to search for. 287 | cursor_position: "before", "middle", or "after" (relative to the matching word) 288 | time_range: If specified, read within the bounds of gaze during that time. 289 | click_offset_right: Adjust the X-coordinate when clicking. 290 | """ 291 | return self._extract_result( 292 | self.move_cursor_to_words_generator( 293 | words, 294 | disambiguate=False, 295 | cursor_position=cursor_position, 296 | time_range=time_range, 297 | click_offset_right=click_offset_right, 298 | ) 299 | ) 300 | 301 | def move_cursor_to_words_generator( 302 | self, 303 | words: str, 304 | disambiguate: bool, 305 | cursor_position: str = "middle", 306 | time_range: Optional[tuple[float, float]] = None, 307 | click_offset_right: Callable[[], int] | int = 0, 308 | ) -> Generator[Sequence[CursorLocation], CursorLocation, Optional[tuple[int, int]]]: 309 | """Same as move_cursor_to_words, except it supports disambiguation through a generator. 310 | See header comment for details. 311 | """ 312 | if time_range: 313 | self.read_nearby(time_range) 314 | screen_contents = self.latest_screen_contents() 315 | matches = screen_contents.find_matching_words(words) 316 | self._write_data(screen_contents, words, matches) 317 | cursor_locations = [] 318 | for locations in matches: 319 | if cursor_position == "before": 320 | coordinates = locations[0].start_coordinates 321 | elif cursor_position == "middle": 322 | coordinates = ( 323 | int((locations[0].left + locations[-1].right) / 2), 324 | int((locations[0].top + locations[-1].bottom) / 2), 325 | ) 326 | elif cursor_position == "after": 327 | coordinates = locations[-1].end_coordinates 328 | else: 329 | raise ValueError(cursor_position) 330 | cursor_locations.append( 331 | CursorLocation( 332 | base_coordinates=coordinates, 333 | visual_coordinates=coordinates, 334 | move_cursor_right=False, 335 | move_distance=0, 336 | move_past_whitespace_left=False, 337 | move_past_whitespace_right=False, 338 | text_height=locations[0].height, 339 | mouse=self.mouse, 340 | keyboard=self.keyboard, 341 | app_actions=self.app_actions, 342 | click_offset_right=self._as_callable(click_offset_right), 343 | ) 344 | ) 345 | location = yield from self._choose_cursor_location( 346 | disambiguate=disambiguate, 347 | matches=cursor_locations, 348 | ) 349 | if not location: 350 | return None 351 | location.move_mouse_cursor() 352 | return location.base_coordinates 353 | 354 | move_cursor_to_word = move_cursor_to_words 355 | 356 | def move_text_cursor_to_words( 357 | self, 358 | words: str, 359 | cursor_position: str = "middle", 360 | filter_location_function: Optional[WordLocationsPredicate] = None, 361 | include_whitespace: bool = False, 362 | time_range: Optional[tuple[float, float]] = None, 363 | click_offset_right: Callable[[], int] | int = 0, 364 | ) -> Optional[CursorLocation]: 365 | """Move the text cursor nearby the specified word or phrase. 366 | 367 | If successful, returns list of screen_ocr.WordLocation of the matching words. 368 | 369 | Arguments: 370 | words: The word or phrase to search for. 371 | cursor_position: "before", "middle", or "after" (relative to the matching word). 372 | filter_location_function: Given a sequence of word locations, return whether to proceed with 373 | cursor movement. 374 | include_whitespace: Include whitespace adjacent to the words. 375 | time_range: If specified, read within the bounds of gaze during that time. 376 | click_offset_right: Adjust the X-coordinate when clicking. 377 | """ 378 | return self._extract_result( 379 | self.move_text_cursor_to_words_generator( 380 | words, 381 | disambiguate=False, 382 | cursor_position=cursor_position, 383 | filter_location_function=filter_location_function, 384 | include_whitespace=include_whitespace, 385 | time_range=time_range, 386 | click_offset_right=click_offset_right, 387 | ) 388 | ) 389 | 390 | def move_text_cursor_to_words_generator( 391 | self, 392 | words: str, 393 | disambiguate: bool, 394 | cursor_position: str = "middle", 395 | filter_location_function: Optional[WordLocationsPredicate] = None, 396 | include_whitespace: bool = False, 397 | time_range: Optional[tuple[float, float]] = None, 398 | click_offset_right: Callable[[], int] | int = 0, 399 | hold_shift: bool = False, 400 | selection_position: Optional[SelectionPosition] = None, 401 | ) -> Generator[Sequence[CursorLocation], CursorLocation, Optional[CursorLocation]]: 402 | """Same as move_text_cursor_to_words, except it supports disambiguation through a generator. 403 | See header comment for details. 404 | """ 405 | if time_range: 406 | self.read_nearby(time_range) 407 | screen_contents = self.latest_screen_contents() 408 | matches = screen_contents.find_matching_words(words) 409 | if filter_location_function: 410 | matches = list(filter(filter_location_function, matches)) 411 | self._write_data(screen_contents, words, matches) 412 | if not selection_position: 413 | # Guess the selection position. 414 | if not hold_shift: 415 | selection_position = self.SelectionPosition.NONE 416 | elif cursor_position == "before": 417 | selection_position = self.SelectionPosition.LEFT 418 | elif cursor_position == "after": 419 | selection_position = self.SelectionPosition.RIGHT 420 | else: 421 | selection_position = self.SelectionPosition.NONE 422 | locations = self._plan_cursor_locations( 423 | matches, 424 | cursor_position=cursor_position, 425 | include_whitespace=include_whitespace, 426 | click_offset_right=click_offset_right, 427 | selection_position=selection_position, 428 | ) 429 | location = yield from self._choose_cursor_location( 430 | disambiguate=disambiguate, 431 | matches=locations, 432 | ) 433 | if not location: 434 | return None 435 | if hold_shift: 436 | self.keyboard.shift_down() 437 | try: 438 | location.move_text_cursor() 439 | finally: 440 | if hold_shift: 441 | self.keyboard.shift_up() 442 | return location 443 | 444 | move_text_cursor_to_word = move_text_cursor_to_words 445 | 446 | def move_text_cursor_to_longest_prefix( 447 | self, 448 | words: str, 449 | cursor_position: str = "middle", 450 | filter_location_function: Optional[WordLocationsPredicate] = None, 451 | time_range: Optional[tuple[float, float]] = None, 452 | click_offset_right: Callable[[], int] | int = 0, 453 | hold_shift: bool = False, 454 | ) -> tuple[Optional[CursorLocation], int]: 455 | """Moves the text cursor to the longest prefix of the provided words that 456 | matches onscreen text. See move_text_cursor_to_words for argument details.""" 457 | return self._extract_result( 458 | self.move_text_cursor_to_longest_prefix_generator( 459 | words, 460 | disambiguate=False, 461 | cursor_position=cursor_position, 462 | filter_location_function=filter_location_function, 463 | time_range=time_range, 464 | click_offset_right=click_offset_right, 465 | hold_shift=hold_shift, 466 | ) 467 | ) 468 | 469 | def move_text_cursor_to_longest_prefix_generator( 470 | self, 471 | words: str, 472 | disambiguate: bool, 473 | cursor_position: str = "middle", 474 | filter_location_function: Optional[WordLocationsPredicate] = None, 475 | time_range: Optional[tuple[float, float]] = None, 476 | click_offset_right: Callable[[], int] | int = 0, 477 | hold_shift: bool = False, 478 | ) -> Generator[ 479 | Sequence[CursorLocation], CursorLocation, tuple[Optional[CursorLocation], int] 480 | ]: 481 | """Same as move_text_cursor_to_longest_prefix, except it supports 482 | disambiguation through a generator. See header comment for details.""" 483 | if time_range: 484 | self.read_nearby(time_range) 485 | screen_contents = self.latest_screen_contents() 486 | matches, prefix_length = screen_contents.find_longest_matching_prefix( 487 | words, filter_location_function=filter_location_function 488 | ) 489 | self._write_data(screen_contents, words, matches) 490 | # Guess the selection position. 491 | selection_position = ( 492 | self.SelectionPosition.LEFT if hold_shift else self.SelectionPosition.NONE 493 | ) 494 | locations = self._plan_cursor_locations( 495 | matches, 496 | cursor_position=cursor_position, 497 | include_whitespace=False, 498 | click_offset_right=click_offset_right, 499 | selection_position=selection_position, 500 | ) 501 | location = yield from self._choose_cursor_location( 502 | disambiguate=disambiguate, 503 | matches=locations, 504 | ) 505 | if not location: 506 | return None, 0 507 | if hold_shift: 508 | self.keyboard.shift_down() 509 | try: 510 | location.move_text_cursor() 511 | finally: 512 | if hold_shift: 513 | self.keyboard.shift_up() 514 | return location, prefix_length 515 | 516 | def move_text_cursor_to_longest_suffix( 517 | self, 518 | words: str, 519 | cursor_position: str = "middle", 520 | filter_location_function: Optional[WordLocationsPredicate] = None, 521 | time_range: Optional[tuple[float, float]] = None, 522 | click_offset_right: Callable[[], int] | int = 0, 523 | hold_shift: bool = False, 524 | ) -> tuple[Optional[CursorLocation], int]: 525 | """Moves the text cursor to the longest suffix of the provided words that 526 | matches onscreen text. See move_text_cursor_to_words for argument details.""" 527 | return self._extract_result( 528 | self.move_text_cursor_to_longest_suffix_generator( 529 | words, 530 | disambiguate=False, 531 | cursor_position=cursor_position, 532 | filter_location_function=filter_location_function, 533 | time_range=time_range, 534 | click_offset_right=click_offset_right, 535 | hold_shift=hold_shift, 536 | ) 537 | ) 538 | 539 | def move_text_cursor_to_longest_suffix_generator( 540 | self, 541 | words: str, 542 | disambiguate: bool, 543 | cursor_position: str = "middle", 544 | filter_location_function: Optional[WordLocationsPredicate] = None, 545 | time_range: Optional[tuple[float, float]] = None, 546 | click_offset_right: Callable[[], int] | int = 0, 547 | hold_shift: bool = False, 548 | ) -> Generator[ 549 | Sequence[CursorLocation], CursorLocation, tuple[Optional[CursorLocation], int] 550 | ]: 551 | """Same as move_text_cursor_to_longest_suffix, except it supports 552 | disambiguation through a generator. See header comment for details.""" 553 | if time_range: 554 | self.read_nearby(time_range) 555 | screen_contents = self.latest_screen_contents() 556 | matches, suffix_length = screen_contents.find_longest_matching_suffix( 557 | words, filter_location_function=filter_location_function 558 | ) 559 | self._write_data(screen_contents, words, matches) 560 | # Guess the selection position. 561 | selection_position = ( 562 | self.SelectionPosition.RIGHT if hold_shift else self.SelectionPosition.NONE 563 | ) 564 | locations = self._plan_cursor_locations( 565 | matches, 566 | cursor_position=cursor_position, 567 | include_whitespace=False, 568 | click_offset_right=click_offset_right, 569 | selection_position=selection_position, 570 | ) 571 | location = yield from self._choose_cursor_location( 572 | disambiguate=disambiguate, 573 | matches=locations, 574 | ) 575 | if not location: 576 | return None, 0 577 | if hold_shift: 578 | self.keyboard.shift_down() 579 | try: 580 | location.move_text_cursor() 581 | finally: 582 | if hold_shift: 583 | self.keyboard.shift_up() 584 | return location, suffix_length 585 | 586 | def move_text_cursor_to_difference_generator( 587 | self, 588 | words: str, 589 | disambiguate: bool, 590 | time_range: Optional[tuple[float, float]] = None, 591 | click_offset_right: Callable[[], int] | int = 0, 592 | ) -> Generator[Sequence[CursorLocation], CursorLocation, Optional[tuple[int, int]]]: 593 | """Finds onscreen text that matches the start and/or end of the provided words, 594 | and moves the text cursor to the start of where the words differ. Returns the 595 | start and end indices of the differing text in the provided words, if found.""" 596 | if time_range: 597 | self.read_nearby(time_range) 598 | screen_contents = self.latest_screen_contents() 599 | prefix_matches, prefix_length = screen_contents.find_longest_matching_prefix( 600 | words 601 | ) 602 | suffix_matches, suffix_length = screen_contents.find_longest_matching_suffix( 603 | words 604 | ) 605 | matches = list(prefix_matches) + list(suffix_matches) 606 | self._write_data(screen_contents, words, matches) 607 | # Find any pairs of matches that are adjacent onscreen. Track whether there is 608 | # whitespace between the pairs. 609 | adjacent_prefix_matches = [] 610 | whitespace_between_matches_list = [] 611 | for prefix_match in prefix_matches: 612 | for suffix_match in suffix_matches: 613 | if prefix_match[-1].is_adjacent_left_of( 614 | suffix_match[0], allow_whitespace=True 615 | ): 616 | adjacent_prefix_matches.append(prefix_match) 617 | whitespace_between_matches_list.append( 618 | not prefix_match[-1].is_adjacent_left_of( 619 | suffix_match[0], allow_whitespace=False 620 | ) 621 | ) 622 | 623 | if adjacent_prefix_matches: 624 | locations = self._plan_cursor_locations( 625 | adjacent_prefix_matches, 626 | cursor_position="after", 627 | include_whitespace=False, 628 | click_offset_right=click_offset_right, 629 | selection_position=self.SelectionPosition.NONE, 630 | ) 631 | 632 | location = yield from self._choose_cursor_location( 633 | disambiguate=disambiguate, 634 | matches=locations, 635 | ) 636 | if not location: 637 | return None 638 | location.move_text_cursor() 639 | 640 | whitespace_between_matches = whitespace_between_matches_list[ 641 | locations.index(location) 642 | ] 643 | if whitespace_between_matches and words[-suffix_length - 1] == " ": 644 | return (prefix_length, len(words) - suffix_length - 1) 645 | else: 646 | return (prefix_length, len(words) - suffix_length) 647 | 648 | else: 649 | prefix_locations = self._plan_cursor_locations( 650 | prefix_matches, 651 | cursor_position="after", 652 | include_whitespace=False, 653 | click_offset_right=click_offset_right, 654 | selection_position=self.SelectionPosition.NONE, 655 | ) 656 | suffix_locations = self._plan_cursor_locations( 657 | suffix_matches, 658 | cursor_position="before", 659 | include_whitespace=False, 660 | click_offset_right=click_offset_right, 661 | selection_position=self.SelectionPosition.NONE, 662 | ) 663 | locations = [*prefix_locations, *suffix_locations] 664 | 665 | location = yield from self._choose_cursor_location( 666 | disambiguate=disambiguate, 667 | matches=locations, 668 | ) 669 | if not location: 670 | return None 671 | location.move_text_cursor() 672 | 673 | if location in prefix_locations: 674 | return (prefix_length, len(words)) 675 | else: 676 | assert location in suffix_locations 677 | return (0, len(words) - suffix_length) 678 | 679 | def select_text( 680 | self, 681 | start_words: str, 682 | end_words: Optional[str] = None, 683 | for_deletion: bool = False, 684 | start_time_range: Optional[tuple[float, float]] = None, 685 | end_time_range: Optional[tuple[float, float]] = None, 686 | click_offset_right: Callable[[], int] | int = 0, 687 | after_start: bool = False, 688 | before_end: bool = False, 689 | ) -> Optional[CursorLocation]: 690 | """Select a range of onscreen text. 691 | 692 | If only start_words is provided, the full word or phrase is selected. If 693 | end_word is provided, a range from the start words to end words will be 694 | selected. 695 | 696 | Arguments: 697 | for_deletion: If True, select adjacent whitespace for clean deletion of 698 | the selected text. 699 | start_time_range: If specified, search for start_words within the bounds of gaze during that time. 700 | end_time_range: If specified, search for end_words within the bounds of gaze during that time. 701 | click_offset_right: Adjust the X-coordinate when clicking. 702 | after_start: If true, begin selection after the start word. 703 | before_end: If true, end selection before the end word. 704 | """ 705 | return self._extract_result( 706 | self.select_text_generator( 707 | start_words, 708 | disambiguate=False, 709 | end_words=end_words, 710 | for_deletion=for_deletion, 711 | start_time_range=start_time_range, 712 | end_time_range=end_time_range, 713 | click_offset_right=click_offset_right, 714 | after_start=after_start, 715 | before_end=before_end, 716 | ) 717 | ) 718 | 719 | def select_text_generator( 720 | self, 721 | start_words: str, 722 | disambiguate: bool, 723 | end_words: Optional[str] = None, 724 | for_deletion: bool = False, 725 | start_time_range: Optional[tuple[float, float]] = None, 726 | end_time_range: Optional[tuple[float, float]] = None, 727 | click_offset_right: Callable[[], int] | int = 0, 728 | after_start: bool = False, 729 | before_end: bool = False, 730 | select_pause_seconds: Callable[[], float] | float = 0.01, 731 | ) -> Generator[Sequence[CursorLocation], CursorLocation, Optional[CursorLocation]]: 732 | """Same as select_text, except it supports disambiguation through a generator. 733 | See header comment for details. 734 | """ 735 | if start_time_range: 736 | self.read_nearby(start_time_range) 737 | screen_contents = self.latest_screen_contents() 738 | start_matches = screen_contents.find_matching_words(start_words) 739 | self._write_data(screen_contents, start_words, start_matches) 740 | start_locations = self._plan_cursor_locations( 741 | start_matches, 742 | cursor_position="after" if after_start else "before", 743 | include_whitespace=for_deletion and not after_start, 744 | click_offset_right=click_offset_right, 745 | selection_position=self.SelectionPosition.LEFT, 746 | ) 747 | start_location = yield from self._choose_cursor_location( 748 | disambiguate=disambiguate, 749 | matches=start_locations, 750 | ) 751 | if not start_location: 752 | return None 753 | start_location.move_text_cursor() 754 | time.sleep(self._resolve_value(select_pause_seconds)) 755 | if end_words: 756 | if end_time_range: 757 | self.read_nearby(end_time_range) 758 | else: 759 | self._read_nearby_if_gaze_moved() 760 | 761 | def filter_function(location): 762 | return self._is_valid_selection( 763 | start_location.base_coordinates, location[-1].end_coordinates 764 | ) 765 | 766 | return ( 767 | yield from self.move_text_cursor_to_words_generator( 768 | end_words, 769 | disambiguate=disambiguate, 770 | cursor_position="before" if before_end else "after", 771 | filter_location_function=filter_function, 772 | include_whitespace=False, 773 | click_offset_right=click_offset_right, 774 | hold_shift=True, 775 | selection_position=self.SelectionPosition.RIGHT, 776 | ) 777 | ) 778 | else: 779 | # Select until the end of the start_words match. 780 | end_match = start_matches[start_locations.index(start_location)] 781 | end_location = self._plan_cursor_location( 782 | end_match, 783 | cursor_position="before" if before_end else "after", 784 | include_whitespace=False, 785 | click_offset_right=click_offset_right, 786 | selection_position=self.SelectionPosition.RIGHT, 787 | ) 788 | self.keyboard.shift_down() 789 | try: 790 | end_location.move_text_cursor() 791 | finally: 792 | self.keyboard.shift_up() 793 | return end_location 794 | 795 | def select_matching_text( 796 | self, 797 | words: str, 798 | time_range: Optional[tuple[float, float]] = None, 799 | click_offset_right: Callable[[], int] | int = 0, 800 | ) -> Optional[tuple[int, int]]: 801 | """Selects onscreen text that matches the beginning and/or end of the provided 802 | text. Returns the start and end indices corresponding to the changed text, if 803 | found. See select_text for argument details.""" 804 | return self._extract_result( 805 | self.select_matching_text_generator( 806 | words, 807 | disambiguate=False, 808 | time_range=time_range, 809 | click_offset_right=click_offset_right, 810 | ) 811 | ) 812 | 813 | def select_matching_text_generator( 814 | self, 815 | words: str, 816 | disambiguate: bool, 817 | time_range: Optional[tuple[float, float]] = None, 818 | click_offset_right: Callable[[], int] | int = 0, 819 | select_pause_seconds: Callable[[], float] | float = 0.01, 820 | ) -> Generator[Sequence[CursorLocation], CursorLocation, Optional[tuple[int, int]]]: 821 | """Same as select_matching_text, except it supports disambiguation through a 822 | generator. See header comment for details.""" 823 | if time_range: 824 | self.read_nearby(time_range) 825 | screen_contents = self.latest_screen_contents() 826 | prefix_matches, prefix_length = screen_contents.find_longest_matching_prefix( 827 | words 828 | ) 829 | before_prefix_locations = self._plan_cursor_locations( 830 | prefix_matches, 831 | cursor_position="before", 832 | include_whitespace=False, 833 | click_offset_right=click_offset_right, 834 | selection_position=self.SelectionPosition.LEFT, 835 | ) 836 | before_prefix_location = yield from self._choose_cursor_location( 837 | disambiguate=disambiguate, 838 | matches=before_prefix_locations, 839 | ) 840 | if before_prefix_location: 841 | before_prefix_location.move_text_cursor() 842 | time.sleep(self._resolve_value(select_pause_seconds)) 843 | if not time_range: 844 | self._read_nearby_if_gaze_moved() 845 | screen_contents = self.latest_screen_contents() 846 | if before_prefix_location: 847 | 848 | def filter_function(location): 849 | return self._is_valid_selection( 850 | before_prefix_location.base_coordinates, 851 | location[-1].end_coordinates, 852 | ) 853 | else: 854 | filter_function = None # type: ignore[assignment] 855 | suffix_matches, suffix_length = screen_contents.find_longest_matching_suffix( 856 | words, filter_location_function=filter_function 857 | ) 858 | after_suffix_locations = self._plan_cursor_locations( 859 | suffix_matches, 860 | cursor_position="after", 861 | include_whitespace=False, 862 | click_offset_right=click_offset_right, 863 | selection_position=self.SelectionPosition.RIGHT, 864 | ) 865 | after_suffix_location = yield from self._choose_cursor_location( 866 | disambiguate=disambiguate, 867 | matches=after_suffix_locations, 868 | ) 869 | if before_prefix_location and after_suffix_location: 870 | self.keyboard.shift_down() 871 | try: 872 | after_suffix_location.move_text_cursor() 873 | finally: 874 | self.keyboard.shift_up() 875 | return (prefix_length, len(words) - suffix_length) 876 | elif not before_prefix_location and not after_suffix_location: 877 | return None 878 | elif before_prefix_location: 879 | assert not after_suffix_location 880 | prefix_match = prefix_matches[ 881 | before_prefix_locations.index(before_prefix_location) 882 | ] 883 | after_prefix_location = self._plan_cursor_location( 884 | prefix_match, 885 | cursor_position="after", 886 | include_whitespace=False, 887 | click_offset_right=click_offset_right, 888 | selection_position=self.SelectionPosition.RIGHT, 889 | ) 890 | self.keyboard.shift_down() 891 | try: 892 | after_prefix_location.move_text_cursor() 893 | finally: 894 | self.keyboard.shift_up() 895 | return (0, prefix_length) 896 | else: 897 | assert after_suffix_location and not before_prefix_location 898 | suffix_match = suffix_matches[ 899 | after_suffix_locations.index(after_suffix_location) 900 | ] 901 | before_suffix_location = self._plan_cursor_location( 902 | suffix_match, 903 | cursor_position="before", 904 | include_whitespace=False, 905 | click_offset_right=click_offset_right, 906 | selection_position=self.SelectionPosition.LEFT, 907 | ) 908 | before_suffix_location.move_text_cursor() 909 | time.sleep(self._resolve_value(select_pause_seconds)) 910 | self.keyboard.shift_down() 911 | try: 912 | after_suffix_location.move_text_cursor() 913 | finally: 914 | self.keyboard.shift_up() 915 | return (len(words) - suffix_length, len(words)) 916 | 917 | def find_nearest_cursor_location( 918 | self, locations: Sequence[CursorLocation] 919 | ) -> Optional[CursorLocation]: 920 | """Returns the cursor location nearest to the current gaze point, if 921 | available.""" 922 | if not locations: 923 | return None 924 | contents = self.latest_screen_contents() 925 | reference_point = contents.screen_coordinates 926 | if not reference_point: 927 | if contents.bounding_box: 928 | # Use center of bounding box as reference point 929 | left, top, right, bottom = contents.bounding_box 930 | reference_point = ((left + right) // 2, (top + bottom) // 2) 931 | else: 932 | # "Nearest" is undefined. 933 | return None 934 | distance_to_words = [ 935 | ( 936 | _distance_squared(location.base_coordinates, reference_point), 937 | location, 938 | ) 939 | for location in locations 940 | ] 941 | return min(distance_to_words, key=lambda x: x[0])[1] 942 | 943 | def move_cursor_to_word_action(self): 944 | raise RuntimeError( 945 | "controller.move_cursor_to_word_action no longer supported. " 946 | "Use gaze_ocr.dragonfly.MoveCursorToWordAction instead." 947 | ) 948 | 949 | def move_text_cursor_action(self, word, cursor_position="middle"): 950 | """Return a dragonfly action for moving the text cursor nearby a word.""" 951 | raise RuntimeError( 952 | "controller.move_text_cursor_action no longer supported. " 953 | "Use gaze_ocr.dragonfly.MoveTextCursorAction instead." 954 | ) 955 | 956 | def select_text_action(self, start_word, end_word=None, for_deletion=False): 957 | """Return a Dragonfly action for selecting text.""" 958 | raise RuntimeError( 959 | "controller.select_text_action no longer supported. " 960 | "Use gaze_ocr.dragonfly.SelectTextAction instead." 961 | ) 962 | 963 | def _read_nearby_if_gaze_moved(self): 964 | current_gaze = ( 965 | self.eye_tracker.get_gaze_point() 966 | if self.eye_tracker and self.eye_tracker.is_connected 967 | else None 968 | ) 969 | latest_screen_contents = self.latest_screen_contents() 970 | previous_gaze = latest_screen_contents.screen_coordinates 971 | threshold_squared = ( 972 | _squared(latest_screen_contents.search_radius / 2.0) 973 | if latest_screen_contents.search_radius 974 | else 0.0 975 | ) 976 | if ( 977 | current_gaze 978 | and previous_gaze 979 | and _distance_squared(current_gaze, previous_gaze) > threshold_squared 980 | ): 981 | self.read_nearby() 982 | 983 | def _plan_cursor_locations( 984 | self, 985 | matches: Sequence[Sequence[WordLocation]], 986 | cursor_position: str, 987 | include_whitespace: bool, 988 | click_offset_right: Callable[[], int] | int, 989 | selection_position: SelectionPosition, 990 | ) -> Sequence[CursorLocation]: 991 | return [ 992 | self._plan_cursor_location( 993 | match, 994 | cursor_position=cursor_position, 995 | include_whitespace=include_whitespace, 996 | click_offset_right=click_offset_right, 997 | selection_position=selection_position, 998 | ) 999 | for match in matches 1000 | ] 1001 | 1002 | def _plan_cursor_location( 1003 | self, 1004 | locations: Sequence[WordLocation], 1005 | cursor_position: str, 1006 | include_whitespace: bool, 1007 | click_offset_right: Callable[[], int] | int, 1008 | selection_position: SelectionPosition, 1009 | ) -> CursorLocation: 1010 | if cursor_position == "before": 1011 | distance_from_left = locations[0].left_char_offset 1012 | distance_from_right = locations[0].right_char_offset + len( 1013 | locations[0].text 1014 | ) 1015 | move_past_whitespace_left = include_whitespace and not distance_from_left 1016 | move_past_whitespace_right = False 1017 | return self._plan_cursor_movement( 1018 | start_coordinates=locations[0].start_coordinates, 1019 | end_coordinates=locations[0].end_coordinates, 1020 | click_offset_right=click_offset_right, 1021 | distance_from_left=distance_from_left, 1022 | distance_from_right=distance_from_right, 1023 | selection_position=selection_position, 1024 | move_past_whitespace_left=move_past_whitespace_left, 1025 | move_past_whitespace_right=move_past_whitespace_right, 1026 | text_height=locations[0].height, 1027 | ) 1028 | elif cursor_position == "middle": 1029 | # Note: if it's helpful, we could change this to position the cursor 1030 | # in the middle of the word. 1031 | coordinates = ( 1032 | int((locations[0].left + locations[-1].right) / 2), 1033 | int((locations[0].top + locations[-1].bottom) / 2), 1034 | ) 1035 | return CursorLocation( 1036 | base_coordinates=coordinates, 1037 | visual_coordinates=coordinates, 1038 | move_cursor_right=False, 1039 | move_distance=0, 1040 | move_past_whitespace_left=False, 1041 | move_past_whitespace_right=False, 1042 | text_height=locations[0].height, 1043 | mouse=self.mouse, 1044 | keyboard=self.keyboard, 1045 | app_actions=self.app_actions, 1046 | click_offset_right=self._as_callable(click_offset_right), 1047 | ) 1048 | else: 1049 | assert cursor_position == "after" 1050 | distance_from_right = locations[-1].right_char_offset 1051 | distance_from_left = locations[-1].left_char_offset + len( 1052 | locations[-1].text 1053 | ) 1054 | move_past_whitespace_left = False 1055 | move_past_whitespace_right = include_whitespace and not distance_from_right 1056 | return self._plan_cursor_movement( 1057 | start_coordinates=locations[-1].start_coordinates, 1058 | end_coordinates=locations[-1].end_coordinates, 1059 | click_offset_right=click_offset_right, 1060 | distance_from_left=distance_from_left, 1061 | distance_from_right=distance_from_right, 1062 | selection_position=selection_position, 1063 | move_past_whitespace_left=move_past_whitespace_left, 1064 | move_past_whitespace_right=move_past_whitespace_right, 1065 | text_height=locations[0].height, 1066 | ) 1067 | 1068 | def _plan_cursor_movement( 1069 | self, 1070 | start_coordinates: tuple[int, int], 1071 | end_coordinates: tuple[int, int], 1072 | click_offset_right: Callable[[], int] | int, 1073 | distance_from_left: int, 1074 | distance_from_right: int, 1075 | selection_position: SelectionPosition, 1076 | move_past_whitespace_left: bool, 1077 | move_past_whitespace_right: bool, 1078 | text_height: int, 1079 | ) -> CursorLocation: 1080 | estimated_char_width = (end_coordinates[0] - start_coordinates[0]) / float( 1081 | distance_from_left + distance_from_right 1082 | ) 1083 | visual_coordinates = ( 1084 | int(start_coordinates[0] + distance_from_left * estimated_char_width), 1085 | int((start_coordinates[1] + end_coordinates[1]) / 2.0), 1086 | ) 1087 | # Determine whether to start from the left or the right. 1088 | if not distance_from_left: 1089 | start_from_left = True 1090 | elif not distance_from_right: 1091 | start_from_left = False 1092 | elif selection_position == self.SelectionPosition.RIGHT: 1093 | # Mac selection can only be reliably expanded outward. 1094 | start_from_left = True 1095 | elif selection_position == self.SelectionPosition.LEFT: 1096 | # Mac selection can only be reliably expanded outward. 1097 | start_from_left = False 1098 | elif distance_from_left <= distance_from_right: 1099 | start_from_left = True 1100 | else: 1101 | start_from_left = False 1102 | if start_from_left: 1103 | return CursorLocation( 1104 | base_coordinates=start_coordinates, 1105 | visual_coordinates=visual_coordinates, 1106 | move_cursor_right=True, 1107 | move_distance=distance_from_left, 1108 | move_past_whitespace_left=move_past_whitespace_left, 1109 | move_past_whitespace_right=move_past_whitespace_right, 1110 | text_height=text_height, 1111 | mouse=self.mouse, 1112 | keyboard=self.keyboard, 1113 | app_actions=self.app_actions, 1114 | click_offset_right=self._as_callable(click_offset_right), 1115 | ) 1116 | else: 1117 | # Start from the right. 1118 | return CursorLocation( 1119 | base_coordinates=end_coordinates, 1120 | visual_coordinates=visual_coordinates, 1121 | move_cursor_right=False, 1122 | move_distance=distance_from_right, 1123 | move_past_whitespace_left=move_past_whitespace_left, 1124 | move_past_whitespace_right=move_past_whitespace_right, 1125 | text_height=text_height, 1126 | mouse=self.mouse, 1127 | keyboard=self.keyboard, 1128 | app_actions=self.app_actions, 1129 | click_offset_right=self._as_callable(click_offset_right), 1130 | ) 1131 | 1132 | def _choose_cursor_location( 1133 | self, 1134 | disambiguate: bool, 1135 | matches: Sequence[CursorLocation], 1136 | ) -> Generator[Sequence[CursorLocation], CursorLocation, Optional[CursorLocation]]: 1137 | if not matches: 1138 | return None 1139 | if len(matches) == 1: 1140 | return matches[0] 1141 | if disambiguate: 1142 | return (yield matches) 1143 | else: 1144 | return self.find_nearest_cursor_location(matches) 1145 | 1146 | @staticmethod 1147 | def _extract_result(generator): 1148 | """Extracts final return value from generator, assuming no values are generated.""" 1149 | try: 1150 | next(generator) 1151 | raise AssertionError() 1152 | except StopIteration as e: 1153 | return e.value 1154 | 1155 | def _write_data(self, screen_contents, word, word_locations): 1156 | if not self.save_data_directory: 1157 | return 1158 | if word_locations: 1159 | result = "multiple" if len(word_locations) > 1 else "success" 1160 | else: 1161 | result = "failure" 1162 | file_name_prefix = f"{result}_{time.time():.2f}" 1163 | file_path_prefix = os.path.join(self.save_data_directory, file_name_prefix) 1164 | if hasattr(screen_contents.screenshot, "save"): 1165 | screen_contents.screenshot.save(file_path_prefix + ".png") 1166 | else: 1167 | screen_contents.screenshot.write_file(file_path_prefix + ".png") 1168 | with open(file_path_prefix + ".txt", "w") as file: 1169 | file.write(word) 1170 | 1171 | def _is_valid_selection(self, start_coordinates, end_coordinates): 1172 | epsilon = 5 # pixels 1173 | (start_x, start_y) = start_coordinates 1174 | (end_x, end_y) = end_coordinates 1175 | # Selection goes to previous line. 1176 | if end_y - start_y < -epsilon: 1177 | return False 1178 | # Selection stays on same line. 1179 | elif end_y - start_y < epsilon: 1180 | return end_x > start_x 1181 | # Selection moves to following line. 1182 | else: 1183 | return True 1184 | 1185 | 1186 | def _squared(x): 1187 | return x * x 1188 | 1189 | 1190 | def _distance_squared(coordinate1, coordinate2): 1191 | x_diff = coordinate1[0] - coordinate2[0] 1192 | y_diff = coordinate1[1] - coordinate2[1] 1193 | return _squared(x_diff) + _squared(y_diff) 1194 | --------------------------------------------------------------------------------