├── cli
├── __init__.py
├── completer.py
└── output.py
├── core
├── __init__.py
└── voice_system.py
├── .gitignore
├── commands
├── __init__.py
├── type_command.py
├── base.py
├── stop_command.py
├── click_command.py
├── read_command.py
├── command_processor.py
├── scrap_command.py
└── computer_command.py
├── requirements.txt
├── README.md
├── audio
└── vad.py
├── print_project.py
├── hotkey_listener.py
├── speech
└── whisper_processor.py
├── main.py
└── LICENSE
/cli/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
1 | """Core package for voice command system."""
2 |
3 | from .voice_system import VoiceCommandSystem
4 |
5 | __all__ = ['VoiceCommandSystem']
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python
2 | pyenv
3 | __pycache__/
4 | *.py[cod]
5 | *$py.class
6 | *.so
7 | .Python
8 | build/
9 | develop-eggs/
10 | dist/
11 | downloads/
12 | eggs/
13 | .eggs/
14 | lib/
15 | lib64/
16 | parts/
17 | sdist/
18 | var/
19 | wheels/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 |
24 | # Virtual Environment
25 | venv/
26 | ENV/
27 | env/
28 | .env
29 |
30 | # IDE
31 | .idea/
32 | .vscode/
33 | *.swp
34 | *.swo
35 | *.swn
36 | .*.kate-swp
37 |
38 | # OS
39 | .DS_Store
40 | Thumbs.db
41 |
--------------------------------------------------------------------------------
/commands/__init__.py:
--------------------------------------------------------------------------------
1 | # commands/__init__.py
2 | import pkgutil
3 | import inspect
4 | import importlib
5 | from .base import Command
6 |
7 | __all__ = ['Command']
8 |
9 | # Discover and export all Command subclasses
10 | for _, module_name, _ in pkgutil.iter_modules(__path__):
11 | if module_name not in ['__init__', 'base']:
12 | # Dynamically import the module
13 | module = importlib.import_module(f".{module_name}", package=__name__)
14 | # Find all Command subclasses in the module
15 | for name, obj in inspect.getmembers(module):
16 | if inspect.isclass(obj) and issubclass(obj, Command) and obj is not Command:
17 | __all__.append(name)
18 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Core ML/AI Dependencies
2 | nemo_toolkit[asr]==2.2.1
3 | torch==2.6.0
4 | onnx==1.17.0
5 | transformers==4.48.3
6 | pytorch-lightning==2.5.1.post0
7 | cuda-python>=12.3
8 |
9 | # Audio Processing
10 | PyAudio==0.2.12
11 | webrtcvad-wheels==2.0.14
12 | soundfile==0.13.1
13 | pydub==0.25.1
14 | sox==1.5.0
15 | resampy==0.4.3
16 | # Note: libsora package removed - not available on PyPI
17 |
18 | # Screen/GUI Interaction
19 | PyAutoGUI==0.9.54
20 | pynput==1.7.7
21 | pytesseract==0.3.13
22 | pillow==11.1.0
23 | python3-Xlib==0.15
24 |
25 | # CLI and Output
26 | prompt_toolkit==3.0.51
27 | # espeak and xclip are system packages, not pip packages
28 |
29 | # Core dependencies
30 | numpy<2.0
31 | sentencepiece==0.2.0
32 | protobuf==3.20.3
33 | accelerate==1.3.0
34 | datasets==3.2.0
35 | safetensors==0.5.2
36 | sounddevice==0.5.1
37 | evdev==1.9.2
38 | pycairo==1.28.0
39 | texterrors==0.5.1
40 | kaldi-python-io==1.2.2
41 | wget==3.2
42 |
43 | # Additional dependencies that might be needed
44 | requests>=2.28.0
45 | aiohttp>=3.8.0
46 |
--------------------------------------------------------------------------------
/commands/type_command.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | from .base import Command
3 |
4 | class TypeCommand(Command):
5 | def __init__(self):
6 | super().__init__(
7 | name="type",
8 | aliases=["type in"],
9 | description="Type text using keyboard",
10 | execute=self._execute
11 | )
12 |
13 | async def _execute(self, text: str) -> str:
14 | """Handle type commands by using xdotool to type text."""
15 | try:
16 | # Capitalize first letter if original command was capitalized
17 | if text and not text[0].isupper():
18 | text = text[0].upper() + text[1:]
19 |
20 | print(f"Typing text: '{text}'")
21 | subprocess.run(['xdotool', 'type', text], check=True)
22 | return f"Typed: '{text}'"
23 | except subprocess.CalledProcessError as e:
24 | error_msg = f"Type command failed: {str(e)}"
25 | print(error_msg)
26 | return error_msg
27 | except Exception as e:
28 | error_msg = f"Unexpected typing error: {str(e)}"
29 | print(error_msg)
30 | return error_msg
31 |
--------------------------------------------------------------------------------
/commands/base.py:
--------------------------------------------------------------------------------
1 | # commands/base.py
2 | from dataclasses import dataclass, field
3 | from typing import Dict, Any, Callable, Optional, Awaitable # Added Awaitable
4 | from contextlib import asynccontextmanager
5 |
6 | @dataclass(frozen=True) # Keep frozen for simplicity unless state needs mutation often
7 | class Command:
8 | """Base class for all commands."""
9 | name: str
10 | aliases: list[str]
11 | description: str
12 | # <<< Updated signature: Now expects an async function that might not return anything significant >>>
13 | execute: Callable[[str], Awaitable[None]]
14 | # Flag to indicate this command should only match single-word inputs (no arguments)
15 | single_word_only: bool = False
16 | # State might still be useful for complex, long-running commands, but less so now
17 | state: Dict[str, bool] = field(default_factory=lambda: {'is_running': False})
18 |
19 | @property
20 | def is_active(self) -> bool:
21 | """Check if the command is currently running (basic state check)."""
22 | return self.state['is_running']
23 |
24 | @asynccontextmanager
25 | async def running(self):
26 | """Context manager for command execution state (optional use)."""
27 | # This might be less necessary if commands are simpler now, but keep for potential use
28 | self.state['is_running'] = True
29 | try:
30 | yield
31 | finally:
32 | self.state['is_running'] = False
33 |
--------------------------------------------------------------------------------
/commands/stop_command.py:
--------------------------------------------------------------------------------
1 | # commands/stop_command.py
2 | import subprocess
3 | import logging
4 | from .base import Command
5 | # <<< Import output functions >>>
6 | from cli.output import schedule_print # Only need print for this command
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 | class StopCommand(Command):
11 | def __init__(self):
12 | super().__init__(
13 | name="stop",
14 | aliases=["cancel", "shutup", "silence"],
15 | description="Stops any active text-to-speech feedback (espeak).",
16 | # <<< Reference the updated _execute >>>
17 | execute=self._execute
18 | )
19 |
20 | # <<< Updated signature and implementation >>>
21 | async def _execute(self, args: str) -> None:
22 | """
23 | Executes the stop command by killing espeak processes.
24 | Prints status to CLI, does not speak. Args are ignored.
25 | """
26 | logger.info("Executing stop command...")
27 | try:
28 | result = subprocess.run(['pkill', '-f', 'espeak'], capture_output=True, check=False)
29 |
30 | if result.returncode == 0:
31 | msg = "Stopped active speech."
32 | logger.info(msg)
33 | schedule_print("System", msg) # <<< Explicitly print
34 | elif result.returncode == 1:
35 | msg = "No active speech found to stop."
36 | logger.info(msg)
37 | schedule_print("System", msg) # <<< Explicitly print
38 | else:
39 | error_msg = f"pkill command failed with code {result.returncode}: {result.stderr.decode('utf-8', errors='ignore').strip()}"
40 | logger.error(error_msg)
41 | schedule_print("Error", f"Error trying to stop speech: {error_msg}") # <<< Explicitly print error
42 |
43 | except FileNotFoundError:
44 | error_msg = "Error: 'pkill' command not found. Cannot stop speech."
45 | logger.error(error_msg)
46 | schedule_print("Error", error_msg) # <<< Explicitly print error
47 | except Exception as e:
48 | error_msg = f"Unexpected error stopping speech: {str(e)}"
49 | logger.error(error_msg, exc_info=True)
50 | schedule_print("Error", error_msg) # <<< Explicitly print error
51 | # No return value needed now
52 |
--------------------------------------------------------------------------------
/cli/completer.py:
--------------------------------------------------------------------------------
1 | # cli/completer.py
2 | from prompt_toolkit.completion import Completer, Completion
3 | from typing import Iterable, List, Set
4 |
5 | # This list will be updated by main.py
6 | ollama_models_for_completion: List[str] = ["mistral"]
7 |
8 | class CLICompleter(Completer):
9 | def __init__(self, command_processor):
10 | """
11 | Initialize the completer.
12 | Args:
13 | command_processor: The initialized CommandProcessor instance.
14 | """
15 | self.command_processor = command_processor
16 | # <<< FIX: Define static_keywords BEFORE calling _update_command_triggers >>>
17 | self.static_keywords = sorted(["select", "help", "exit", "quit", "refresh_models"])
18 | self.select_options = ["model"]
19 | # Now call update, which uses self.static_keywords
20 | self._update_command_triggers() # Initial fetch
21 |
22 | def _update_command_triggers(self):
23 | """Updates the list of command names and aliases from the processor."""
24 | self.command_triggers: Set[str] = set()
25 | if self.command_processor:
26 | self.command_triggers.update(self.command_processor.commands.keys())
27 | for command in self.command_processor.commands.values():
28 | self.command_triggers.update(command.aliases)
29 | # Use the now defined self.static_keywords
30 | self.all_triggers = sorted(list(self.command_triggers.union(self.static_keywords)))
31 | # print(f"Completer updated triggers: {self.all_triggers}") # Debug
32 |
33 | # --- get_completions method remains the same ---
34 | def get_completions(self, document, complete_event):
35 | # (Previous implementation)
36 | text = document.text_before_cursor.lstrip()
37 | words = text.split()
38 | word_before_cursor = document.get_word_before_cursor(WORD=True)
39 |
40 | try:
41 | if not text or ' ' not in text: # Top Level Completion
42 | for trigger in self.all_triggers:
43 | if trigger.startswith(word_before_cursor):
44 | yield Completion(trigger, start_position=-len(word_before_cursor))
45 | return
46 | if len(words) >= 1: # Contextual Completion
47 | first_word = words[0]
48 | if first_word == "select": # 'select' command completion
49 | if len(words) == 1 and text.endswith(' '):
50 | for opt in self.select_options: yield Completion(opt, start_position=0)
51 | elif len(words) == 2 and not text.endswith(' '): # Typing 'model'
52 | if self.select_options[0].startswith(word_before_cursor): yield Completion(self.select_options[0], start_position=-len(word_before_cursor))
53 | elif len(words) == 2 and words[1] == "model" and text.endswith(' '): # After 'select model '
54 | for model in ollama_models_for_completion: yield Completion(model, start_position=0)
55 | elif len(words) >= 3 and words[1] == "model": # Typing model name
56 | for model in ollama_models_for_completion:
57 | if model.startswith(word_before_cursor): yield Completion(model, start_position=-len(word_before_cursor))
58 | return
59 | except Exception: pass # Avoid completer errors crashing app
60 |
--------------------------------------------------------------------------------
/commands/click_command.py:
--------------------------------------------------------------------------------
1 | import pyautogui
2 | import pytesseract
3 | from .base import Command
4 |
5 | class ClickCommand(Command):
6 | def __init__(self):
7 | super().__init__(
8 | name="click",
9 | aliases=[],
10 | description="Click text or buttons on screen",
11 | execute=self._execute
12 | )
13 |
14 | async def _execute(self, text: str) -> str:
15 | """Handle click commands by finding and clicking matching text on screen."""
16 | try:
17 | print(f"Searching for text: '{text}'")
18 | screenshot = pyautogui.screenshot()
19 |
20 | # Configure Tesseract for better accuracy
21 | custom_config = '--psm 11 --oem 3'
22 | ocr_data = pytesseract.image_to_data(
23 | screenshot,
24 | output_type=pytesseract.Output.DICT,
25 | config=custom_config
26 | )
27 |
28 | # Debug OCR results
29 | print("\nOCR Results:")
30 | found_words = []
31 | for i, word in enumerate(ocr_data['text']):
32 | if word.strip():
33 | conf = float(ocr_data['conf'][i])
34 | found_words.append(f"'{word}' (confidence: {conf:.1f}%)")
35 | print("Detected words:", ", ".join(found_words[:10]) + "..." if len(found_words) > 10 else ", ".join(found_words))
36 |
37 | best_match = None
38 | highest_confidence = 0
39 | search_text = text.lower()
40 |
41 | for i, word in enumerate(ocr_data['text']):
42 | if not word.strip():
43 | continue
44 |
45 | word_lower = word.strip().lower()
46 | confidence = float(ocr_data['conf'][i])
47 |
48 | # Various matching strategies
49 | matched = False
50 | match_type = None
51 |
52 | if search_text == word_lower:
53 | matched = True
54 | match_type = "exact"
55 | confidence *= 1.2
56 | elif search_text in word_lower:
57 | matched = True
58 | match_type = "contains"
59 | elif word_lower in search_text:
60 | matched = True
61 | match_type = "partial"
62 | confidence *= 0.8
63 |
64 | if matched and confidence > highest_confidence:
65 | highest_confidence = confidence
66 | x = ocr_data['left'][i] + ocr_data['width'][i] // 2
67 | y = ocr_data['top'][i] + ocr_data['height'][i] // 2
68 | best_match = (x, y, word, match_type, confidence)
69 |
70 | if best_match:
71 | x, y, matched_word, match_type, conf = best_match
72 | print(f"\nBest match: '{matched_word}' ({match_type} match, confidence: {conf:.1f}%)")
73 | print(f"Clicking at position: ({x}, {y})")
74 |
75 | pyautogui.moveTo(x, y, duration=0.2)
76 | pyautogui.click()
77 |
78 | return f"Clicked '{matched_word}' at ({x}, {y})"
79 |
80 | print("\nNo matching text found on screen")
81 | return "Text not found on screen"
82 |
83 | except Exception as e:
84 | error_msg = f"Click command failed: {str(e)}"
85 | print(error_msg)
86 | return error_msg
87 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Voice Command System
2 |
3 | Control your computer with a command-line voice interface. Uses NVIDIA's Parakeet-TDT model for speech recognition and supports clicking UI elements, typing text, reading text aloud, interacting with a local LLM, screen capture with OCR, and more.
4 |
5 | ## Features
6 |
7 | * Speech recognition using **NVIDIA Parakeet-TDT 0.6B V2 via NeMo toolkit**, providing accurate transcription with punctuation and capitalization.
8 | * Click commands: Find and click text/buttons on screen using OCR.
9 | * Type commands: Type text using keyboard emulation.
10 | * Read commands: Read highlighted text aloud using text-to-speech.
11 | * Computer commands: Interact with your system (run shell commands, manage apps/windows, query about highlighted text) using a local LLM (Ollama).
12 | * scrap command: Select a screen area, perform OCR, and copy the extracted text.
13 | * Stop command: Immediately halts any active text-to-speech playback.
14 | * Rolling buffer: Captures audio just before hotkey activation to avoid missed words.
15 | * Hotkey controls: Use keyboard shortcuts to trigger recording and interrupt actions.
16 |
17 | ## Installation on openSUSE Tumbleweed
18 |
19 | Follow these steps precisely to set up the project environment.
20 |
21 | ### Step 1: Install System Dependencies
22 |
23 | First, install `pyenv` for managing Python versions. Follow the official `pyenv` installation instructions. After that, install the necessary system packages for both building Python and running the application using `zypper`:
24 |
25 | ```bash
26 | sudo zypper install git-core gcc automake make zlib-devel libbz2-devel libopenssl-devel readline-devel sqlite3-devel xz-devel libffi-devel tk-devel xdotool espeak xclip tesseract-ocr pkill wmctrl ffmpeg gnome-screenshot
27 | ```
28 |
29 | ### Step 2: Install Correct Python Version
30 |
31 | The heavy dependencies like nemo_toolkit require a specific Python version for which pre-compiled packages (wheels) are available. We will use pyenv to install Python 3.11.
32 |
33 | ```bash
34 | # Install Python 3.11.10 (or latest 3.11.x)
35 | pyenv install 3.11.10
36 |
37 | # Create a dedicated virtual environment for the project
38 | pyenv virtualenv 3.11.10 voice-command-311
39 | ```
40 |
41 | ### Step 3: Set Up Project and Install Python Packages
42 |
43 | Now, clone the repository and use the pyenv virtual environment you just created.
44 |
45 | ```bash
46 | # Clone the repository (if you haven't already)
47 | git clone https://github.com/ruapotato/Voice-Command
48 | cd Voice-Command
49 |
50 | # Set the local python version for this directory
51 | pyenv local voice-command-311
52 |
53 | # Upgrade pip and install the required packages
54 | pip install --upgrade pip
55 | pip install -r requirements.txt
56 | ```
57 |
58 | ### Step 4: Local LLM Setup
59 |
60 | This project uses Ollama for the computer command.
61 |
62 | 1. Install Ollama from [ollama.com](https://ollama.com).
63 | 2. Pull your desired model. For example:
64 |
65 | ```bash
66 | ollama pull mistral
67 | ```
68 |
69 | ## Running the Application
70 |
71 | 1. **Ensure Ollama is running**: Before starting the app, make sure the Ollama service is active in the background if you intend to use the computer command.
72 |
73 | ```bash
74 | ollama serve
75 | ```
76 |
77 | 2. **Navigate and Run**: Open a new terminal and go to the project directory. The pyenv environment should activate automatically. Then, run the main script.
78 |
79 | ```bash
80 | cd /path/to/Voice-Command
81 | python main.py
82 | ```
83 |
84 | *Note: The first time you run it, NeMo will download the Parakeet model, which may take some time.*
85 |
86 | ## Keyboard Controls
87 |
88 | * **Record Voice**: Press and hold `Ctrl+Shift`
89 | * **Interrupt/Stop**: Press `Ctrl+C`
90 | * **Exit**: Type `exit` or `quit` at the prompt, or press `Ctrl+D`
91 |
92 | ## License
93 |
94 | GPL3 by David Hamner
95 |
--------------------------------------------------------------------------------
/cli/output.py:
--------------------------------------------------------------------------------
1 | # cli/output.py
2 | import asyncio
3 | from prompt_toolkit import print_formatted_text, HTML
4 | import subprocess
5 | import logging
6 | import re # Keep for potential future use? Or remove.
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 | print_queue = asyncio.Queue()
11 | ESPEAK_CONFIG = [] # Voice config
12 |
13 | # --- Speak Utility (using async subprocess for proper interruption) ---
14 |
15 | async def speak(text: str):
16 | """
17 | Asynchronously speaks text using espeak with proper cancellation support.
18 | The caller is responsible for deciding IF text should be spoken.
19 | """
20 | if not text: # Only proceed if text is not empty
21 | return
22 |
23 | logger.debug(f"Attempting to speak: {text[:50]}...")
24 | # Basic cleaning for shell safety
25 | text_to_speak = text.replace('`', '').replace('"', "'").replace(';', '.')
26 | if not text_to_speak:
27 | return # Check again after cleaning
28 |
29 | try:
30 | # Kill any existing espeak processes first
31 | subprocess.run(['pkill', '-f', 'espeak'], check=False, capture_output=True)
32 |
33 | # Use async subprocess for proper cancellation support
34 | process = await asyncio.create_subprocess_exec(
35 | 'espeak',
36 | *ESPEAK_CONFIG,
37 | text_to_speak,
38 | stdout=asyncio.subprocess.DEVNULL,
39 | stderr=asyncio.subprocess.DEVNULL
40 | )
41 |
42 | # Wait for completion - this will properly respond to task cancellation
43 | try:
44 | await asyncio.wait_for(process.wait(), timeout=20)
45 | except asyncio.TimeoutError:
46 | logger.warning(f"espeak command timed out for: {text_to_speak[:50]}...")
47 | # Kill the process if it times out
48 | process.kill()
49 | await process.wait()
50 |
51 | except asyncio.CancelledError:
52 | logger.debug("Speech was cancelled by user interrupt.")
53 | # Kill any espeak processes
54 | subprocess.run(['pkill', '-f', 'espeak'], check=False)
55 | raise # Re-raise to propagate cancellation
56 | except FileNotFoundError:
57 | logger.error("espeak command not found. Cannot speak.")
58 | except Exception as e:
59 | logger.error(f"Speech failed for '{text_to_speak[:50]}...': {e}")
60 |
61 | # --- Print Queue Logic (remains same) ---
62 | async def safe_print(formatted_message: str):
63 | """Asynchronously prints pre-formatted HTML messages without disrupting the prompt."""
64 | print_formatted_text(HTML(formatted_message))
65 |
66 | async def print_consumer():
67 | """Consumes messages from the print_queue and prints them safely."""
68 | while True:
69 | message_type, message = await print_queue.get()
70 | if message_type is None: print_queue.task_done(); break # Sentinel
71 |
72 | prefix = f"[{message_type}]"
73 | # Apply colors based on type
74 | if message_type == "Voice": formatted_message = f"{prefix} {message}"
75 | elif message_type == "System": formatted_message = f"{prefix} {message}"
76 | elif message_type == "LLM": formatted_message = f"{prefix} {message}"
77 | elif message_type == "Error": formatted_message = f"{prefix} {message}"
78 | elif message_type == "Help": formatted_message = f"{prefix}\n{message}"
79 | elif message_type == "Typed": formatted_message = f"{prefix} {message}"
80 | else: formatted_message = f"{prefix} {message}" # Default
81 |
82 | await safe_print(formatted_message)
83 | print_queue.task_done()
84 |
85 | def schedule_print(message_type: str, message: str):
86 | """Puts a message onto the print queue from any thread."""
87 | # Ensure message is a string
88 | message_str = str(message) if message is not None else ""
89 | try:
90 | loop = asyncio.get_running_loop()
91 | loop.call_soon_threadsafe(print_queue.put_nowait, (message_type, message_str))
92 | except RuntimeError:
93 | # Fallback if called before loop is running or from non-async context without loop access
94 | print(f"[Fallback Print {message_type}] {message_str}")
95 |
--------------------------------------------------------------------------------
/audio/vad.py:
--------------------------------------------------------------------------------
1 | """Whisper-based speech recognition."""
2 | import torch
3 | from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
4 | import logging
5 | import warnings
6 | import numpy as np
7 |
8 | logging.getLogger("transformers").setLevel(logging.ERROR)
9 | warnings.filterwarnings("ignore", category=FutureWarning)
10 |
11 | class WhisperProcessor:
12 | def __init__(self):
13 | print("Initializing Whisper processor...")
14 | self.setup_model()
15 |
16 | def setup_model(self):
17 | """Initialize the Whisper model and pipeline."""
18 | try:
19 | # Setup device
20 | self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
21 | self.torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
22 | print(f"Using device: {self.device}")
23 |
24 | # Load model
25 | model_id = "openai/whisper-large-v3" # Changed from turbo version
26 | model = AutoModelForSpeechSeq2Seq.from_pretrained(
27 | model_id,
28 | torch_dtype=self.torch_dtype,
29 | low_cpu_mem_usage=True,
30 | use_safetensors=True
31 | )
32 | model.to(self.device)
33 |
34 | # Load processor
35 | processor = AutoProcessor.from_pretrained(model_id)
36 |
37 | # Setup pipeline with adjusted parameters
38 | self.pipe = pipeline(
39 | "automatic-speech-recognition",
40 | model=model,
41 | tokenizer=processor.tokenizer,
42 | feature_extractor=processor.feature_extractor,
43 | torch_dtype=self.torch_dtype,
44 | device=self.device,
45 | model_kwargs={
46 | "language": "en",
47 | "task": "transcribe",
48 | "use_auth_token": None,
49 | "return_timestamps": False
50 | },
51 | chunk_length_s=30,
52 | stride_length_s=5,
53 | batch_size=1,
54 | ignore_warning=True
55 | )
56 | print("Whisper model initialized")
57 |
58 | except Exception as e:
59 | print(f"Error initializing Whisper: {e}")
60 | raise
61 |
62 | def _preprocess_audio(self, audio_data):
63 | """Preprocess audio data for Whisper."""
64 | try:
65 | # Debug original audio
66 | print(f"Input audio - Shape: {audio_data.shape}, Type: {audio_data.dtype}, Range: [{audio_data.min()}, {audio_data.max()}]")
67 |
68 | # Ensure data is in float32
69 | audio_float = audio_data.astype(np.float32)
70 |
71 | # Apply pre-emphasis filter
72 | pre_emphasis = 0.97
73 | emphasized_audio = np.append(
74 | audio_float[0],
75 | audio_float[1:] - pre_emphasis * audio_float[:-1]
76 | )
77 |
78 | # Normalize using RMS normalization
79 | rms = np.sqrt(np.mean(np.square(emphasized_audio)))
80 | if rms > 0:
81 | normalized_audio = emphasized_audio / rms
82 | else:
83 | normalized_audio = emphasized_audio
84 |
85 | # Clip to prevent extreme values
86 | normalized_audio = np.clip(normalized_audio, -1.0, 1.0)
87 |
88 | # Debug processed audio
89 | print(f"Processed audio - Shape: {normalized_audio.shape}, Range: [{normalized_audio.min():.3f}, {normalized_audio.max():.3f}]")
90 |
91 | return normalized_audio
92 |
93 | except Exception as e:
94 | print(f"Error preprocessing audio: {e}")
95 | return None
96 |
97 | async def transcribe(self, audio_data):
98 | """Process audio data and return transcribed text."""
99 | try:
100 | if audio_data is None:
101 | print("Received empty audio data")
102 | return None
103 |
104 | # Preprocess audio
105 | audio_processed = self._preprocess_audio(audio_data)
106 | if audio_processed is None:
107 | return None
108 |
109 | # Process with adjusted parameters
110 | inputs = {
111 | "raw": audio_processed,
112 | "sampling_rate": 16000
113 | }
114 |
115 | print("Processing audio segment...")
116 | result = self.pipe(
117 | inputs,
118 | batch_size=1,
119 | generate_kwargs={
120 | "temperature": 0, # Deterministic decoding
121 | "compression_ratio_threshold": 2.4,
122 | "logprob_threshold": -1.0,
123 | "no_speech_threshold": 0.6
124 | }
125 | )
126 |
127 | transcribed_text = result["text"].strip()
128 | print(f"Transcribed: {transcribed_text}")
129 | return transcribed_text
130 |
131 | except Exception as e:
132 | print(f"Error processing audio: {e}")
133 | return None
134 |
--------------------------------------------------------------------------------
/commands/read_command.py:
--------------------------------------------------------------------------------
1 | # commands/read_command.py
2 | import subprocess
3 | import asyncio
4 | import logging
5 | from typing import Optional
6 | from .base import Command
7 | import re
8 |
9 | # Add logger instance
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | class ReadCommand(Command):
14 | def __init__(self):
15 | super().__init__(
16 | name="read",
17 | aliases=["reed", "red", "three"], # Consider removing "three" if it's a misrecognition
18 | description="Read highlighted text aloud",
19 | execute=self._execute,
20 | single_word_only=True # Only match single-word "read", not "read the readme..."
21 | )
22 | self.espeak_config = []
23 | # Check if espeak exists on init
24 | try:
25 | subprocess.run(['which', 'espeak'], check=True, capture_output=True)
26 | except (subprocess.CalledProcessError, FileNotFoundError):
27 | logger.error("'espeak' command not found. Read command will not work.")
28 |
29 | async def _execute(self, text: str) -> Optional[str]:
30 | """Handle read command by reading highlighted text aloud."""
31 | try:
32 | # Get highlighted text using xclip
33 | highlighted_process = subprocess.run(
34 | ['xclip', '-o', '-selection', 'primary'],
35 | capture_output=True, check=False, timeout=10
36 | )
37 | if highlighted_process.returncode != 0:
38 | stderr_output = highlighted_process.stderr.decode('utf-8', errors='ignore')
39 | error_msg = "Failed to get highlighted text."
40 | if "Error: Can't open display" in stderr_output:
41 | error_msg += " (Cannot open display)"
42 | elif "Error: target STRING not available" in stderr_output:
43 | error_msg = "No text is highlighted (or not available as STRING)."
44 | else:
45 | error_msg += f" (xclip error: {stderr_output.strip()})"
46 | logger.warning(error_msg)
47 | return error_msg
48 |
49 | highlighted = highlighted_process.stdout.decode('utf-8', errors='ignore').strip()
50 |
51 | if not highlighted:
52 | message = "No text is highlighted."
53 | logger.info(message)
54 | await self._speak(message)
55 | return message
56 |
57 | # Sanitize the text to remove emojis and other non-ASCII characters
58 | # that espeak might not handle well.
59 | sanitized_text = highlighted.encode('ascii', 'ignore').decode('ascii')
60 |
61 | # Also, collapse whitespace to prevent long pauses
62 | sanitized_text = re.sub(r'\s+', ' ', sanitized_text).strip()
63 |
64 | if not sanitized_text:
65 | message = "No speakable text found after removing special characters."
66 | logger.info(message)
67 | # We won't speak this message, just return it to the CLI
68 | return message
69 |
70 | logger.info(f"Reading sanitized text (length: {len(sanitized_text)})...")
71 | await self._speak(sanitized_text)
72 |
73 | return f"Finished reading highlighted text ({len(sanitized_text)} chars)."
74 |
75 | except FileNotFoundError:
76 | error_msg = "Error: 'xclip' command not found. Cannot read highlighted text."
77 | logger.error(error_msg)
78 | return error_msg
79 | except subprocess.TimeoutExpired:
80 | error_msg = "Error: 'xclip' command timed out."
81 | logger.error(error_msg)
82 | return error_msg
83 | except Exception as e:
84 | error_msg = f"Read command failed: {str(e)}"
85 | logger.error(error_msg, exc_info=True)
86 | return error_msg
87 |
88 | async def _speak(self, text: str) -> None:
89 | """Speak text using espeak with the command's config (async and interruptible)."""
90 | if not text:
91 | return
92 | try:
93 | # Kill any existing espeak processes first
94 | subprocess.run(['pkill', '-f', 'espeak'], check=False)
95 |
96 | # Use async subprocess for proper cancellation support
97 | command = ['espeak'] + self.espeak_config
98 | logger.debug(f"Executing internal speak for text length: {len(text)}")
99 |
100 | # Create async subprocess
101 | process = await asyncio.create_subprocess_exec(
102 | *command,
103 | stdin=asyncio.subprocess.PIPE,
104 | stdout=asyncio.subprocess.DEVNULL,
105 | stderr=asyncio.subprocess.DEVNULL
106 | )
107 |
108 | # Send text to espeak and wait for completion
109 | # This will properly respond to task cancellation
110 | await process.communicate(input=text.encode('utf-8'))
111 |
112 | except asyncio.CancelledError:
113 | logger.debug("Speech was cancelled by user interrupt.")
114 | # Kill the espeak process if it's still running
115 | subprocess.run(['pkill', '-f', 'espeak'], check=False)
116 | raise # Re-raise to propagate cancellation
117 | except FileNotFoundError:
118 | logger.error("Internal speak failed: 'espeak' command not found.")
119 | except Exception as e:
120 | logger.error(f"Unexpected internal speech error: {str(e)}")
121 |
--------------------------------------------------------------------------------
/print_project.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Print all project files in a nicely formatted way and copy to clipboard"""
3 | from pathlib import Path
4 | import sys
5 | from typing import List, Set
6 | import io
7 | import subprocess
8 | from shutil import which
9 |
10 | HEADER_TEMPLATE = """
11 | {title}
12 | {underline}
13 | """
14 |
15 | FILE_TEMPLATE = """
16 | File: {filename}
17 | {separator}
18 | {content}
19 | """
20 |
21 | # Files to ignore
22 | IGNORE_PATTERNS = {
23 | '__pycache__',
24 | '.git',
25 | '.pyc',
26 | '.env',
27 | 'pyenv',
28 | '.vscode',
29 | '.idea'
30 | }
31 |
32 | def should_process(path: Path) -> bool:
33 | """Check if the path should be processed."""
34 | return not any(ignore in str(path) for ignore in IGNORE_PATTERNS)
35 |
36 | def print_header(title: str, char: str = "=") -> str:
37 | """Print a formatted header and return it as a string."""
38 | header = HEADER_TEMPLATE.format(
39 | title=title,
40 | underline=char * len(title)
41 | )
42 | print(header)
43 | return header
44 |
45 | def print_file_content(file_path: Path) -> str:
46 | """Print the content of a file with nice formatting and return it as a string."""
47 | try:
48 | content = file_path.read_text()
49 | formatted = FILE_TEMPLATE.format(
50 | filename=file_path,
51 | separator="-" * 80,
52 | content=content
53 | )
54 | print(formatted)
55 | return formatted
56 | except Exception as e:
57 | error_msg = f"Error reading {file_path}: {e}"
58 | print(error_msg, file=sys.stderr)
59 | return error_msg
60 |
61 | def find_all_files(directory: Path) -> List[Path]:
62 | """Recursively find all files in the directory."""
63 | files = []
64 | try:
65 | for item in directory.iterdir():
66 | if not should_process(item):
67 | continue
68 |
69 | if item.is_file():
70 | files.append(item)
71 | elif item.is_dir():
72 | files.extend(find_all_files(item))
73 | except Exception as e:
74 | print(f"Error accessing {directory}: {e}", file=sys.stderr)
75 |
76 | return sorted(files)
77 |
78 | def print_directory_structure(directory: Path, prefix: str = "") -> str:
79 | """Print the directory structure in a tree-like format and return it as a string."""
80 | output = []
81 | try:
82 | items = sorted(directory.iterdir())
83 | for i, item in enumerate(items):
84 | if not should_process(item):
85 | continue
86 |
87 | is_last = i == len(items) - 1
88 | current_prefix = "└── " if is_last else "├── "
89 | next_prefix = " " if is_last else "│ "
90 |
91 | line = f"{prefix}{current_prefix}{item.name}"
92 | print(line)
93 | output.append(line)
94 |
95 | if item.is_dir():
96 | dir_output = print_directory_structure(item, prefix + next_prefix)
97 | output.append(dir_output)
98 | except Exception as e:
99 | error_msg = f"Error accessing {directory}: {e}"
100 | print(error_msg, file=sys.stderr)
101 | output.append(error_msg)
102 |
103 | return "\n".join(output)
104 |
105 | def copy_to_clipboard(text: str) -> bool:
106 | """Copy text to clipboard based on the platform."""
107 | try:
108 | # macOS
109 | if which('pbcopy'):
110 | subprocess.run(['pbcopy'], input=text.encode('utf-8'), check=True)
111 | return True
112 | # Linux with xclip
113 | elif which('xclip'):
114 | subprocess.run(['xclip', '-selection', 'clipboard'], input=text.encode('utf-8'), check=True)
115 | return True
116 | # Linux with wl-copy (Wayland)
117 | elif which('wl-copy'):
118 | subprocess.run(['wl-copy'], input=text.encode('utf-8'), check=True)
119 | return True
120 | # Windows
121 | elif sys.platform == 'win32':
122 | import pyperclip
123 | pyperclip.copy(text)
124 | return True
125 | else:
126 | print("Could not find a suitable clipboard tool. Consider installing pyperclip.", file=sys.stderr)
127 | return False
128 | except Exception as e:
129 | print(f"Failed to copy to clipboard: {e}", file=sys.stderr)
130 | return False
131 |
132 | def main():
133 | """Main function to print project files and copy to clipboard."""
134 | # Use StringIO to capture all output
135 | output_buffer = io.StringIO()
136 | original_stdout = sys.stdout
137 | sys.stdout = output_buffer
138 |
139 | project_root = Path(__file__).parent
140 |
141 | print_header("Project Structure")
142 | print(f"Root: {project_root}")
143 | print_directory_structure(project_root)
144 | print()
145 |
146 | print_header("Project Files")
147 | for file_path in find_all_files(project_root):
148 | if file_path.suffix in ['.py', '.txt', '.md', '.json', '.yaml', '.yml']:
149 | print_file_content(file_path)
150 |
151 | # Restore stdout and get the captured output
152 | sys.stdout = original_stdout
153 | full_output = output_buffer.getvalue()
154 |
155 | # Print the output to the terminal
156 | print(full_output)
157 |
158 | # Copy to clipboard
159 | if copy_to_clipboard(full_output):
160 | print("\nProject structure and files have been copied to clipboard!")
161 | else:
162 | print("\nFailed to copy to clipboard. You may need to install a clipboard package.")
163 | print("For Python, you can use: pip install pyperclip")
164 |
165 | if __name__ == "__main__":
166 | try:
167 | main()
168 | except KeyboardInterrupt:
169 | print("\nPrinting interrupted.", file=sys.stderr)
170 | sys.exit(1)
171 | except Exception as e:
172 | print(f"Error: {e}", file=sys.stderr)
173 | sys.exit(1)
174 |
--------------------------------------------------------------------------------
/commands/command_processor.py:
--------------------------------------------------------------------------------
1 | # commands/command_processor.py
2 | import importlib
3 | import inspect
4 | import logging
5 | import pkgutil
6 | from pathlib import Path
7 | from typing import AsyncGenerator, Optional, Tuple, Dict, List
8 | from difflib import SequenceMatcher
9 |
10 | from .base import Command
11 |
12 | logger = logging.getLogger(__name__)
13 |
14 | class CommandProcessor:
15 | def __init__(self):
16 | """Initialize the command processor by dynamically discovering commands."""
17 | self.commands: Dict[str, Command] = {}
18 | self.all_triggers_cache: List[str] = []
19 | self._discover_commands()
20 | logger.info(f"Command processor dynamically loaded commands: {list(self.commands.keys())}")
21 |
22 | def _discover_commands(self):
23 | """Dynamically finds and registers command classes."""
24 | commands_package_path = Path(__file__).parent
25 | logger.debug(f"Discovering commands in: {commands_package_path}")
26 | for (_, module_name, _) in pkgutil.iter_modules([str(commands_package_path)]):
27 | if module_name in ['__init__', 'base']: continue
28 | full_module_name = f"commands.{module_name}"
29 | logger.debug(f"Attempting to import module: {full_module_name}")
30 | try:
31 | module = importlib.import_module(full_module_name)
32 | for _, obj in inspect.getmembers(module, inspect.isclass):
33 | if obj.__module__ == full_module_name and issubclass(obj, Command) and obj is not Command:
34 | try:
35 | command_instance = obj()
36 | if command_instance.name in self.commands: logger.warning(f"Duplicate command name '{command_instance.name}'. Overwriting.")
37 | self.commands[command_instance.name] = command_instance
38 | except Exception as inst_e: logger.error(f"Failed to instantiate command {obj.__name__} from {full_module_name}: {inst_e}", exc_info=True)
39 | except Exception as import_e: logger.error(f"Failed to import/process module {full_module_name}: {import_e}", exc_info=True)
40 | self._cache_all_triggers()
41 |
42 | def _cache_all_triggers(self):
43 | """Builds and caches a sorted list of all command triggers."""
44 | triggers = set(self.commands.keys())
45 | for cmd in self.commands.values():
46 | triggers.update(cmd.aliases)
47 | self.all_triggers_cache = sorted(list(triggers), key=len, reverse=True)
48 |
49 | def get_command_details(self) -> List[Tuple[str, List[str], str]]:
50 | """Returns details for all registered commands for the help text."""
51 | details = [(cmd.name, cmd.aliases, getattr(cmd, 'description', 'No description.')) for cmd in sorted(self.commands.values(), key=lambda c: c.name)]
52 | return details
53 |
54 | def _get_command_name_for_trigger(self, trigger: str) -> Optional[str]:
55 | """Helper to find the main command name from a trigger (which could be an alias)."""
56 | if trigger in self.commands:
57 | return trigger
58 | for name, command_obj in self.commands.items():
59 | if trigger in command_obj.aliases:
60 | return name
61 | return None
62 |
63 | def parse_command(self, text: str) -> Tuple[Optional[str], Optional[str]]:
64 | """
65 | Parses the command from the input text with robust prefix matching
66 | and fuzzy matching for single-word commands.
67 | """
68 | text_orig = text.strip()
69 | text_lower = text_orig.lower()
70 |
71 | # First, try robust prefix matching for all triggers.
72 | # This is better for commands that can take arguments.
73 | for trigger in self.all_triggers_cache:
74 | if text_lower.startswith(trigger):
75 | # If it's an exact match
76 | if len(text_lower) == len(trigger):
77 | command_name = self._get_command_name_for_trigger(trigger)
78 | return command_name, ""
79 |
80 | # If it's a prefix match (command with arguments)
81 | # Skip this for single-word-only commands
82 | char_after_trigger = text_lower[len(trigger)]
83 | if char_after_trigger in ' ,.!?':
84 | command_name = self._get_command_name_for_trigger(trigger)
85 | if command_name:
86 | # Check if this command is single-word-only
87 | command = self.commands.get(command_name)
88 | if command and getattr(command, 'single_word_only', False):
89 | # Skip prefix matching for single-word-only commands
90 | continue
91 | args = text_orig[len(trigger):].lstrip(' ,.!?')
92 | return command_name, args
93 |
94 | # If no prefix match, try fuzzy matching for single-word commands.
95 | # This helps with misspellings from voice-to-text.
96 | words = text_lower.split()
97 | if len(words) == 1:
98 | text_norm = words[0].rstrip('.,!?')
99 | # Find the best match among all triggers
100 | best_match_trigger = None
101 | highest_similarity = 0.85 # Minimum similarity threshold
102 |
103 | for trigger in self.all_triggers_cache:
104 | # Only fuzzy match against triggers that don't expect arguments usually
105 | # This is a heuristic: match against single-word triggers
106 | if " " not in trigger:
107 | similarity = SequenceMatcher(None, text_norm, trigger).ratio()
108 | if similarity > highest_similarity:
109 | highest_similarity = similarity
110 | best_match_trigger = trigger
111 |
112 | if best_match_trigger:
113 | command_name = self._get_command_name_for_trigger(best_match_trigger)
114 | logger.debug(f"Fuzzy matched '{text_norm}' to '{best_match_trigger}' with similarity {highest_similarity:.2f}")
115 | return command_name, ""
116 |
117 | return None, None
118 |
119 | async def process_command(self, text: str) -> AsyncGenerator[str, None]:
120 | """Process a command string and yield status messages."""
121 | command_name, args = self.parse_command(text)
122 | if not command_name:
123 | logger.warning(f"process_command called with unparseable text: {text}")
124 | yield f"Unknown command or query format: {text}"
125 | return
126 |
127 | command = self.commands.get(command_name)
128 | if not command:
129 | yield f"Internal error: Command '{command_name}' parsed but not found."
130 | return
131 |
132 | try:
133 | execute_method = command.execute
134 | if inspect.isasyncgenfunction(execute_method):
135 | async for result_part in execute_method(args):
136 | yield result_part
137 | elif inspect.iscoroutinefunction(execute_method):
138 | result_message = await execute_method(args)
139 | if result_message:
140 | yield result_message
141 | else:
142 | logger.warning(f"Command '{command_name}' execute method is synchronous.")
143 | result_message = execute_method(args)
144 | if result_message:
145 | yield result_message
146 | except Exception as e:
147 | error_msg = f"Command '{command_name}' execution failed: {str(e)}"
148 | logger.error(error_msg, exc_info=True)
149 | yield error_msg
150 |
--------------------------------------------------------------------------------
/hotkey_listener.py:
--------------------------------------------------------------------------------
1 | # hotkey_listener.py
2 | import threading
3 | import asyncio
4 | import logging
5 | import subprocess
6 | import time
7 | from pynput import keyboard
8 | from typing import Callable, Optional, Any
9 |
10 | # --- Globals for Hotkey State ---
11 | ctrl_pressed = False
12 | shift_pressed = False
13 | recording_key_pressed = False
14 | ctrl_c_combo_pressed = False
15 |
16 | logger = logging.getLogger(__name__)
17 | logging.getLogger("pynput").setLevel(logging.WARNING)
18 |
19 | # --- References (set during initialization) ---
20 | voice_system_ref = None
21 | print_scheduler_ref = None
22 | main_loop_ref = None
23 | current_task_accessor = None
24 |
25 | def _interrupt_current_action():
26 | """Cancels the current command task and stops speech."""
27 | if not current_task_accessor or not print_scheduler_ref or not main_loop_ref:
28 | logger.warning("Cannot interrupt: Missing references.")
29 | return
30 |
31 | task_cancelled = False
32 | try:
33 | current_task = current_task_accessor()
34 | if current_task and not current_task.done():
35 | logger.debug("Interrupt requested: Cancelling current command task.")
36 | current_task.cancel()
37 | task_cancelled = True
38 | else:
39 | logger.debug("Interrupt requested: No active/cancellable command task found.")
40 | except Exception as e:
41 | logger.error(f"Error accessing/cancelling current task: {e}")
42 |
43 | speech_stopped = False
44 | try:
45 | logger.debug("Interrupt requested: Stopping any active espeak process via pkill.")
46 | result = subprocess.run(['pkill', '-f', 'espeak'], capture_output=True, check=False, timeout=1)
47 | if result.returncode == 0:
48 | logger.debug("pkill stopped espeak process(es).")
49 | speech_stopped = True
50 | elif result.returncode == 1:
51 | logger.debug("pkill found no espeak process to stop.")
52 | else:
53 | stderr_msg = result.stderr.decode(errors='ignore').strip()
54 | logger.warning(f"pkill command failed for espeak (code {result.returncode}): {stderr_msg}")
55 | except FileNotFoundError:
56 | logger.error("Cannot stop speech: 'pkill' command not found.")
57 | except subprocess.TimeoutExpired:
58 | logger.warning("pkill command timed out while trying to stop espeak.")
59 | except Exception as e:
60 | logger.error(f"Error running pkill for espeak: {e}")
61 |
62 | if task_cancelled or speech_stopped:
63 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "System", "Interrupted by user (Ctrl+C).")
64 |
65 | def on_press(key):
66 | """Handles key press events for hotkeys."""
67 | global ctrl_pressed, shift_pressed, recording_key_pressed, ctrl_c_combo_pressed
68 | if not all([voice_system_ref, print_scheduler_ref, main_loop_ref, current_task_accessor]):
69 | return
70 |
71 | try:
72 | is_ctrl = key in (keyboard.Key.ctrl_l, keyboard.Key.ctrl_r)
73 | is_shift = key in (keyboard.Key.shift, keyboard.Key.shift_r)
74 | is_c_key = hasattr(key, 'char') and key.char == 'c'
75 |
76 | if is_ctrl:
77 | ctrl_pressed = True
78 | elif is_shift:
79 | shift_pressed = True
80 |
81 | # --- Recording Hotkey Logic (Ctrl + Shift) ---
82 | if ctrl_pressed and shift_pressed and not recording_key_pressed:
83 | logger.debug("Ctrl+Shift pressed, scheduling recording start.")
84 | recording_key_pressed = True
85 |
86 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "System", "Recording started...")
87 | main_loop_ref.call_soon_threadsafe(
88 | lambda: main_loop_ref.run_in_executor(None, voice_system_ref.start_quick_record)
89 | )
90 | return
91 |
92 | # --- Interruption Hotkey Logic (Ctrl+C) ---
93 | if ctrl_pressed and is_c_key and not ctrl_c_combo_pressed:
94 | logger.debug("Ctrl+C pressed, scheduling interruption.")
95 | ctrl_c_combo_pressed = True
96 | main_loop_ref.call_soon_threadsafe(_interrupt_current_action)
97 | return
98 |
99 | except Exception as e:
100 | logger.error(f"Error in hotkey on_press callback: {e}", exc_info=True)
101 | if print_scheduler_ref and main_loop_ref:
102 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "Error", f"Hotkey press error: {e}")
103 |
104 | def on_release(key):
105 | """Handles key release events for hotkeys."""
106 | global ctrl_pressed, shift_pressed, recording_key_pressed, ctrl_c_combo_pressed
107 | if not all([voice_system_ref, print_scheduler_ref, main_loop_ref]):
108 | return
109 |
110 | try:
111 | is_ctrl = key in (keyboard.Key.ctrl_l, keyboard.Key.ctrl_r)
112 | is_shift = key in (keyboard.Key.shift, keyboard.Key.shift_r)
113 | is_c_key = hasattr(key, 'char') and key.char == 'c'
114 |
115 | # --- CORRECTED RELEASE LOGIC ---
116 | # First, update the state based on which key was released.
117 | if is_ctrl:
118 | ctrl_pressed = False
119 | elif is_shift:
120 | shift_pressed = False
121 |
122 | # Now, check if we should stop recording.
123 | # This only triggers if we WERE recording AND NEITHER Ctrl NOR Shift is still pressed.
124 | if recording_key_pressed and not ctrl_pressed and not shift_pressed:
125 | logger.debug("Ctrl+Shift combo fully released, scheduling recording stop.")
126 | recording_key_pressed = False # Reset state immediately
127 |
128 | # This small delay gives slow applications time to process the key-up event
129 | time.sleep(0.1)
130 |
131 | # Schedule actions on the main loop
132 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "System", "Recording stopped. Processing...")
133 | main_loop_ref.call_soon_threadsafe(
134 | lambda: main_loop_ref.run_in_executor(None, voice_system_ref.stop_quick_record)
135 | )
136 | # --- END CORRECTED LOGIC ---
137 |
138 | # --- Interruption Hotkey Release Logic ---
139 | if (is_ctrl or is_c_key) and ctrl_c_combo_pressed:
140 | logger.debug(f"Ctrl+C combo key released ({key}). Resetting combo flag.")
141 | ctrl_c_combo_pressed = False
142 |
143 | except Exception as e:
144 | logger.error(f"Error in hotkey on_release callback: {e}", exc_info=True)
145 | if print_scheduler_ref and main_loop_ref:
146 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "Error", f"Hotkey release error: {e}")
147 |
148 | def start_listener(loop: asyncio.AbstractEventLoop,
149 | voice_system_instance: Any,
150 | print_scheduler: Callable,
151 | task_accessor_func: Callable[[], Optional[asyncio.Task]]) -> Optional[threading.Thread]:
152 | """Initializes and starts the global hotkey listener."""
153 | global voice_system_ref, print_scheduler_ref, main_loop_ref, current_task_accessor
154 | voice_system_ref = voice_system_instance
155 | print_scheduler_ref = print_scheduler
156 | main_loop_ref = loop
157 | current_task_accessor = task_accessor_func
158 |
159 | logger.info("Starting global hotkey listener thread (Ctrl+Shift for record, Ctrl+C for interrupt)...")
160 | try:
161 | listener = keyboard.Listener(on_press=on_press, on_release=on_release)
162 | listener_thread = threading.Thread(
163 | target=listener.run,
164 | daemon=True,
165 | name="HotkeyListenerThread"
166 | )
167 | listener_thread.start()
168 | logger.info("Hotkey listener thread started successfully.")
169 | return listener_thread
170 | except Exception as e:
171 | logger.error(f"Failed to start pynput hotkey listener: {e}", exc_info=True)
172 | if print_scheduler_ref and main_loop_ref:
173 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "Error", "CRITICAL: Failed to start global hotkey listener!")
174 | else:
175 | print("[CRITICAL ERROR] Failed to start global hotkey listener!")
176 | return None
177 |
--------------------------------------------------------------------------------
/speech/whisper_processor.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from typing import Optional
3 | import nemo.collections.asr as nemo_asr # Import NeMo
4 | import logging
5 | import warnings
6 | import numpy as np
7 | import soundfile as sf # For writing temporary audio files
8 | import tempfile # For creating temporary files
9 | import os # For file operations like remove
10 |
11 | # Configure logging
12 | # Use a more specific logger name if desired, e.g., logging.getLogger("ParakeetASR")
13 | logger = logging.getLogger(__name__) # Using __name__ is a common practice
14 | # Set level for NeMo's logger
15 | logging.getLogger("nemo_toolkit").setLevel(logging.ERROR)
16 | warnings.filterwarnings("ignore", category=UserWarning, module='pytorch_lightning.*') # More specific warning ignore
17 | warnings.filterwarnings("ignore", category=FutureWarning)
18 |
19 |
20 | class ParakeetProcessor:
21 | def __init__(self):
22 | """Initialize the Parakeet ASR processor."""
23 | logger.info("Initializing Parakeet ASR processor...")
24 | self.asr_model = None # Initialize as None
25 | self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
26 | self.setup_model()
27 |
28 | def setup_model(self):
29 | """Initialize the Parakeet model."""
30 | logger.info(f"Setting up Parakeet ASR model on device: {self.device}")
31 | try:
32 | model_id = "nvidia/parakeet-tdt-0.6b-v2"
33 | # NeMo will handle downloading the model if it's not cached locally.
34 | # Ensure you have an internet connection the first time this runs.
35 | # The model requires at least 2GB RAM as per its model card.
36 | self.asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name=model_id)
37 | self.asr_model.to(self.device)
38 | self.asr_model.eval() # Set the model to evaluation mode
39 |
40 | logger.info(f"Parakeet ASR model ({model_id}) initialized and moved to {self.device}.")
41 |
42 | except Exception as e:
43 | logger.error(f"Error initializing Parakeet ASR model: {e}", exc_info=True)
44 | # Depending on desired behavior, you might re-raise or handle this
45 | # such that the application can continue without ASR or exit gracefully.
46 | raise # Re-raise to make the calling code aware of the failure
47 |
48 | def _preprocess_audio(self, audio_data_np: np.ndarray, expected_sample_rate: int = 16000) -> np.ndarray:
49 | """
50 | Prepares audio data for the Parakeet ASR model.
51 | Ensures audio is a 1D float32 NumPy array at the expected sample rate.
52 | The Parakeet model card specifies 16kHz mono channel audio.
53 | """
54 | if audio_data_np is None or audio_data_np.size == 0:
55 | logger.warning("Preprocessing received empty audio data.")
56 | return np.array([], dtype=np.float32)
57 |
58 | # Ensure it's a NumPy array
59 | if not isinstance(audio_data_np, np.ndarray):
60 | logger.warning("Audio data is not a NumPy array. Attempting conversion.")
61 | try:
62 | audio_data_np = np.array(audio_data_np)
63 | except Exception as e:
64 | logger.error(f"Failed to convert audio data to NumPy array: {e}", exc_info=True)
65 | return np.array([], dtype=np.float32)
66 |
67 | # Ensure it's 1D (mono)
68 | if audio_data_np.ndim > 1:
69 | logger.warning(f"Audio data has {audio_data_np.ndim} dimensions. Converting to mono by taking the mean or first channel.")
70 | # Example: take the mean across channels if stereo, or adapt as needed
71 | if audio_data_np.shape[0] < audio_data_np.shape[1]: # (channels, samples)
72 | audio_data_np = np.mean(audio_data_np, axis=0)
73 | else: # (samples, channels)
74 | audio_data_np = np.mean(audio_data_np, axis=1)
75 |
76 |
77 | # Convert to float32 if not already
78 | if audio_data_np.dtype != np.float32:
79 | if np.issubdtype(audio_data_np.dtype, np.integer):
80 | # Normalize integer types to [-1, 1] before converting to float32
81 | # Common for int16 from PyAudio
82 | max_val = np.iinfo(audio_data_np.dtype).max
83 | audio_data_np = audio_data_np.astype(np.float32) / max_val
84 | else:
85 | # For other float types, just convert
86 | audio_data_np = audio_data_np.astype(np.float32)
87 |
88 | # Basic normalization: ensure values are roughly within [-1, 1]
89 | # This step might be redundant if your input audio is already well-normalized.
90 | # NeMo models are generally robust, but good practice.
91 | abs_max = np.abs(audio_data_np).max()
92 | if abs_max > 1.0:
93 | logger.debug(f"Audio data max absolute value {abs_max} > 1.0. Normalizing.")
94 | audio_data_np /= abs_max
95 | elif abs_max == 0: # Avoid division by zero for pure silence
96 | logger.debug("Audio data is pure silence.")
97 | # audio_data_np remains all zeros
98 |
99 | logger.debug(f"Preprocessed audio for Parakeet - Shape: {audio_data_np.shape}, Type: {audio_data_np.dtype}, Range: [{audio_data_np.min():.3f}, {audio_data_np.max():.3f}]")
100 | return audio_data_np
101 |
102 | async def transcribe(self, audio_data: np.ndarray, sample_rate: int = 16000) -> Optional[str]:
103 | """
104 | Transcribes a NumPy array of audio data.
105 | Audio data should be 16kHz mono.
106 | """
107 | if self.asr_model is None:
108 | logger.error("ASR model not initialized. Cannot transcribe.")
109 | return None
110 |
111 | if audio_data is None or audio_data.size == 0:
112 | logger.info("Received empty audio data for transcription.")
113 | return None
114 |
115 | # Preprocess audio (ensure it's a NumPy array at 16kHz, float32, mono)
116 | # Your VoiceCommandSystem provides audio_data as a NumPy array and uses 16kHz.
117 | audio_processed_np = self._preprocess_audio(audio_data, expected_sample_rate=sample_rate)
118 |
119 | if audio_processed_np.size == 0:
120 | logger.warning("Audio processing resulted in empty data. Skipping transcription.")
121 | return None
122 |
123 | temp_file_path = None # Define here for broader scope in finally block
124 | try:
125 | # NeMo's transcribe method primarily takes a list of audio file paths.
126 | # Saving the processed NumPy array to a temporary WAV file is a robust way.
127 | with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_audio_file:
128 | sf.write(tmp_audio_file.name, audio_processed_np, sample_rate)
129 | temp_file_path = tmp_audio_file.name
130 |
131 | logger.debug(f"Transcribing temporary audio file: {temp_file_path}")
132 |
133 | # Transcribe using the NeMo model.
134 | # The `transcribe` method returns a list of transcriptions.
135 | # For a single audio file, it's a list containing one transcription string.
136 | # If `return_hypotheses` is True, the structure is more complex.
137 | # Based on Parakeet model card: `output = asr_model.transcribe(['audio.wav'])`
138 | # `output[0].text` or `output[0]` (if `return_hypotheses=False` which is default).
139 | # Let's assume the simpler case for now.
140 | transcription_results = self.asr_model.transcribe([temp_file_path])
141 |
142 | transcribed_text = None
143 | if transcription_results and isinstance(transcription_results, list) and len(transcription_results) > 0:
144 | # The result for a single file is typically a list containing one string (the transcription).
145 | # Or if return_hypotheses=True (default for some models), it's a list of Hypothesis objects.
146 | # Let's check the type of the first element.
147 | first_result = transcription_results[0]
148 | if isinstance(first_result, str):
149 | transcribed_text = first_result
150 | elif hasattr(first_result, 'text'): # Handles Hypothesis object
151 | transcribed_text = first_result.text
152 | else:
153 | # If the result structure is different (e.g., nested lists for batched input)
154 | # you might need to adjust. For a single file, it's usually simple.
155 | # For `parakeet-tdt-0.6b-v2`, `transcribe()` returns List[str] by default.
156 | logger.warning(f"Unexpected transcription result format: {type(first_result)}. Full result: {transcription_results}")
157 | transcribed_text = str(first_result) # Fallback to string conversion
158 |
159 | # Parakeet includes punctuation and capitalization.
160 | logger.info(f"Transcribed by Parakeet: '{transcribed_text}'")
161 |
162 | else:
163 | logger.info("Parakeet transcription returned no result or an empty result.")
164 |
165 | return transcribed_text
166 |
167 | except Exception as e:
168 | logger.error(f"Error during Parakeet transcription: {e}", exc_info=True)
169 | return None
170 | finally:
171 | # Clean up the temporary file in all cases (success or error)
172 | if temp_file_path and os.path.exists(temp_file_path):
173 | try:
174 | os.remove(temp_file_path)
175 | logger.debug(f"Temporary audio file {temp_file_path} removed.")
176 | except Exception as cleanup_e:
177 | logger.error(f"Error cleaning up temporary audio file {temp_file_path}: {cleanup_e}", exc_info=True)
178 |
--------------------------------------------------------------------------------
/commands/scrap_command.py:
--------------------------------------------------------------------------------
1 | # commands/scrap_command.py
2 | import subprocess
3 | import tempfile
4 | import os
5 | import logging
6 | import shutil
7 | from datetime import datetime
8 | from pathlib import Path
9 |
10 | try:
11 | import pytesseract
12 | from PIL import Image
13 | PIL_AVAILABLE = True
14 | except ImportError:
15 | PIL_AVAILABLE = False
16 | pytesseract = None
17 | Image = None
18 |
19 | from .base import Command
20 |
21 | logger = logging.getLogger(__name__)
22 |
23 | def is_tool(name):
24 | """Check whether `name` is on PATH and marked as executable."""
25 | return shutil.which(name) is not None
26 |
27 | class ScrapCommand(Command):
28 | def __init__(self):
29 | super().__init__(
30 | name="scrap",
31 | aliases=["screengrab", "screen grab", "ocrgrab", "grabtext"],
32 | description="Select screen area, OCR text, copy to clipboard, and save the image.",
33 | execute=self._execute
34 | )
35 | self.check_dependencies()
36 | self.pictures_dir = Path.home() / "Pictures" / "scraps"
37 | os.makedirs(self.pictures_dir, exist_ok=True)
38 |
39 | def check_dependencies(self):
40 | """Checks for required system tools and libraries."""
41 | if not PIL_AVAILABLE:
42 | logger.error("Pillow or Pytesseract not installed. Scrap command disabled.")
43 | return
44 |
45 | self.screenshot_tool = None
46 | if is_tool("gnome-screenshot"):
47 | self.screenshot_tool = "gnome-screenshot"
48 | logger.info("Using 'gnome-screenshot' for screen capture.")
49 | elif is_tool("maim"):
50 | self.screenshot_tool = "maim"
51 | logger.info("Using 'maim' for screen capture. Ensure 'slop' is installed for selection if needed.")
52 | elif is_tool("scrot"):
53 | self.screenshot_tool = "scrot"
54 | logger.info("Using 'scrot' for screen capture.")
55 | else:
56 | logger.error("No suitable screenshot tool found (tried gnome-screenshot, maim, scrot). Scrap command disabled.")
57 |
58 | if not is_tool("xclip"):
59 | logger.error("'xclip' not found. Scrap command cannot copy to clipboard/primary.")
60 | self.screenshot_tool = None
61 |
62 | async def _execute(self, args: str) -> str:
63 | """
64 | Selects a screen area, performs OCR, copies text, and saves the image.
65 | Args are ignored.
66 | """
67 | if not PIL_AVAILABLE:
68 | return "Error: Pillow or Pytesseract library not installed."
69 | if not self.screenshot_tool:
70 | return "Error: No suitable screenshot tool (gnome-screenshot/maim/scrot) or xclip found."
71 | if not is_tool("xclip"):
72 | return "Error: xclip command not found."
73 |
74 | logger.info(f"Starting scrap using {self.screenshot_tool}...")
75 |
76 | try:
77 | with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_img_file:
78 | temp_filename = temp_img_file.name
79 | logger.debug(f"Temporary screenshot file: {temp_filename}")
80 |
81 | screenshot_success = False
82 | cmd = []
83 | if self.screenshot_tool == "gnome-screenshot":
84 | cmd = ['gnome-screenshot', '-a', '-f', temp_filename]
85 | elif self.screenshot_tool == "maim":
86 | cmd = ['maim', '-s', temp_filename]
87 | elif self.screenshot_tool == "scrot":
88 | cmd = ['scrot', '-s', temp_filename]
89 |
90 | if not cmd:
91 | return "Error: Could not determine screenshot command."
92 |
93 | logger.debug(f"Running command: {' '.join(cmd)}")
94 | try:
95 | result = subprocess.run(cmd, check=True, capture_output=True, timeout=60)
96 | if os.path.exists(temp_filename) and os.path.getsize(temp_filename) > 0:
97 | screenshot_success = True
98 | else:
99 | logger.warning(f"{self.screenshot_tool} exited ok, but temp file is missing or empty.")
100 | stderr_output = result.stderr.decode('utf-8', errors='ignore').lower()
101 | if "cancel" in stderr_output or "giblib error" in stderr_output:
102 | screenshot_success = False
103 | else:
104 | screenshot_success = True
105 | logger.warning("Assuming screenshot success despite possible file issue.")
106 |
107 | except FileNotFoundError:
108 | logger.error(f"Screenshot tool '{self.screenshot_tool}' not found during execution.")
109 | os.remove(temp_filename)
110 | return f"Error: Screenshot tool '{self.screenshot_tool}' failed (not found)."
111 | except subprocess.CalledProcessError as e:
112 | logger.info(f"{self.screenshot_tool} exited with error (likely cancelled): {e}")
113 | stderr_output = e.stderr.decode('utf-8', errors='ignore').lower()
114 | if "cancel" in stderr_output or "giblib error" in stderr_output:
115 | screenshot_success = False
116 | else:
117 | logger.error(f"Screenshot command failed: {e.stderr.decode('utf-8', errors='ignore')}")
118 | screenshot_success = False
119 | except subprocess.TimeoutExpired:
120 | logger.error("Screenshot command timed out.")
121 | screenshot_success = False
122 | except Exception as e:
123 | logger.error(f"Unexpected error during screenshot: {e}", exc_info=True)
124 | screenshot_success = False
125 |
126 | if screenshot_success:
127 | # Save a copy of the screenshot
128 | timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
129 | save_path = self.pictures_dir / f"scrap_{timestamp}.png"
130 | shutil.copy(temp_filename, save_path)
131 | logger.info(f"Screenshot saved to {save_path}")
132 |
133 | logger.debug("Screenshot captured successfully. Performing OCR...")
134 | try:
135 | extracted_text = pytesseract.image_to_string(Image.open(temp_filename)).strip()
136 |
137 | if not extracted_text:
138 | logger.info("OCR completed, but no text was found.")
139 | os.remove(temp_filename)
140 | return "Scrap complete. No text found in the selected area."
141 |
142 | logger.info(f"OCR successful. Text length: {len(extracted_text)}")
143 | logger.debug(f"Extracted text (first 100 chars): {extracted_text[:100]}")
144 |
145 | try:
146 | subprocess.run(
147 | ['xclip', '-selection', 'clipboard'],
148 | input=extracted_text.encode('utf-8'),
149 | check=True,
150 | timeout=5
151 | )
152 | subprocess.run(
153 | ['xclip', '-selection', 'primary'],
154 | input=extracted_text.encode('utf-8'),
155 | check=True,
156 | timeout=5
157 | )
158 | logger.info("Text copied to clipboard and primary selection.")
159 | os.remove(temp_filename)
160 | return f"Scrap successful. Copied {len(extracted_text)} characters and saved image."
161 |
162 | except FileNotFoundError:
163 | logger.error("xclip not found during copy step.")
164 | os.remove(temp_filename)
165 | return "Error: xclip not found. Could not copy OCR text."
166 | except subprocess.CalledProcessError as e:
167 | logger.error(f"xclip command failed: {e.stderr.decode('utf-8', errors='ignore')}")
168 | os.remove(temp_filename)
169 | return f"Error copying text with xclip: {e.stderr.decode('utf-8', errors='ignore')}"
170 | except subprocess.TimeoutExpired:
171 | logger.error("xclip command timed out.")
172 | os.remove(temp_filename)
173 | return "Error: Timeout copying text with xclip."
174 | except Exception as e:
175 | logger.error(f"Unexpected error during xclip copy: {e}", exc_info=True)
176 | os.remove(temp_filename)
177 | return f"Error during text copy: {e}"
178 |
179 | except pytesseract.TesseractNotFoundError:
180 | logger.error("pytesseract error: 'tesseract' command not found.")
181 | os.remove(temp_filename)
182 | return "Error: Tesseract OCR engine not found. Please install tesseract-ocr."
183 | except Exception as ocr_err:
184 | logger.error(f"Error during OCR processing: {ocr_err}", exc_info=True)
185 | os.remove(temp_filename)
186 | return f"Error during OCR: {ocr_err}"
187 | else:
188 | if os.path.exists(temp_filename):
189 | os.remove(temp_filename)
190 | logger.info("Scrap cancelled or failed before OCR.")
191 | return "Scrap cancelled or failed."
192 |
193 | except Exception as outer_e:
194 | error_msg = f"Unexpected error during scrap: {str(outer_e)}"
195 | logger.error(error_msg, exc_info=True)
196 | if 'temp_filename' in locals() and os.path.exists(temp_filename):
197 | try: os.remove(temp_filename)
198 | except Exception as cleanup_e: logger.error(f"Failed to clean up temp file {temp_filename}: {cleanup_e}")
199 | return error_msg
200 |
--------------------------------------------------------------------------------
/core/voice_system.py:
--------------------------------------------------------------------------------
1 | # core/voice_system.py
2 | import numpy as np
3 | import warnings
4 | import logging
5 | import pyaudio
6 | import queue
7 | import asyncio
8 | import threading
9 | import psutil
10 | import time
11 | from typing import Optional, Callable, Awaitable, Any, Coroutine
12 |
13 | from webrtcvad import Vad
14 | from speech.whisper_processor import ParakeetProcessor
15 |
16 | logger = logging.getLogger(__name__)
17 |
18 | def is_espeak_running():
19 | """Check if espeak is currently running."""
20 | for proc in psutil.process_iter(['name', 'cmdline']):
21 | try:
22 | if proc.info['name'] == 'espeak' or \
23 | (proc.info['cmdline'] and 'espeak' in proc.info['cmdline'][0]):
24 | return True
25 | except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
26 | pass
27 | return False
28 |
29 |
30 | class VoiceCommandSystem:
31 | def __init__(self,
32 | loop: asyncio.AbstractEventLoop,
33 | speak_func: Callable[[str], Awaitable[None]],
34 | command_trigger_func: Callable[[str], Coroutine[Any, Any, None]]):
35 | logger.info("Initializing voice command system...")
36 | self.loop = loop
37 | self.speak_func = speak_func
38 | self.command_trigger_func = command_trigger_func
39 | self.transcript_callback: Optional[Callable[[str, str], None]] = None
40 | self.input_device_index: Optional[int] = None
41 |
42 | self.asr_processor: Optional[ParakeetProcessor] = None
43 | self.p: Optional[pyaudio.PyAudio] = None
44 | self.vad: Optional[Vad] = None
45 |
46 | # --- REFACTORED FOR THREAD-SAFE AUDIO HANDLING ---
47 | self.background_stream: Optional[pyaudio.Stream] = None
48 | self.quick_record_stream: Optional[pyaudio.Stream] = None
49 |
50 | self.background_queue: queue.Queue = queue.Queue()
51 | self.quick_record_queue: queue.Queue = queue.Queue()
52 |
53 | self.background_worker_thread: Optional[threading.Thread] = None
54 | self.quick_record_worker_thread: Optional[threading.Thread] = None
55 |
56 | self.is_background_listening = threading.Event()
57 | self.is_quick_recording = threading.Event()
58 |
59 | self.current_audio: list[np.ndarray] = []
60 | self.prev_frames: list[np.ndarray] = []
61 | # --- END REFACTOR ---
62 |
63 | # Audio parameters
64 | self.sample_rate: int = 16000
65 | self.channels: int = 1
66 | self.format: int = pyaudio.paInt16
67 | self.frame_duration: int = 30
68 | self.frame_length: int = 0
69 | self.chunk_size: int = 0
70 |
71 | self.prev_frames_duration: float = 0.5
72 | self.prev_frames_maxlen: int = 0
73 |
74 | self.silence_limit: float = 0.7
75 | self.min_silence_detections: int = 0
76 |
77 | try:
78 | self.setup_system()
79 | self.start_background_listener()
80 | except Exception as e:
81 | logger.error(f"Critical error during VoiceCommandSystem setup: {e}", exc_info=True)
82 | self.cleanup()
83 | raise
84 |
85 | logger.info("Voice system initialization complete.")
86 |
87 | def setup_system(self):
88 | logger.debug("Setting up system components...")
89 | self.p = pyaudio.PyAudio()
90 | self._detect_audio_devices()
91 | self.asr_processor = ParakeetProcessor()
92 | self.setup_vad()
93 | self.chunk_size = self.frame_length
94 | logger.debug("System components setup finished.")
95 |
96 | def _detect_audio_devices(self):
97 | # This extensive device detection logic is good. No changes needed here.
98 | logger.info("Detecting audio input devices...")
99 | if not self.p:
100 | logger.error("PyAudio not initialized before _detect_audio_devices.")
101 | self.p = pyaudio.PyAudio()
102 |
103 | self.input_devices = []
104 | default_system_device_index: Optional[int] = None
105 | host_api_info = None
106 | default_host_api_index = 0
107 |
108 | try:
109 | default_host_api_info_dict = self.p.get_default_host_api_info()
110 | default_host_api_index = default_host_api_info_dict['index']
111 | host_api_info = self.p.get_host_api_info_by_index(default_host_api_index)
112 | default_system_device_index = host_api_info.get('defaultInputDevice')
113 | if default_system_device_index == -1:
114 | default_system_device_index = None
115 | logger.info(f"Default Host API: {host_api_info.get('name')}, Default System Input Device Index: {default_system_device_index}")
116 | except Exception as e:
117 | logger.warning(f"Could not get default device via Host API info: {e}. Will iterate.")
118 | try:
119 | default_input_device_info = self.p.get_default_input_device_info()
120 | default_system_device_index = default_input_device_info['index']
121 | default_host_api_index = default_input_device_info['hostApi']
122 | logger.info(f"Found default input device directly: {default_input_device_info.get('name')} (Index: {default_system_device_index})")
123 | except Exception as e2:
124 | logger.warning(f"Could not get default input device info directly: {e2}. Will iterate all devices.")
125 | default_system_device_index = None
126 |
127 | device_count = self.p.get_device_count()
128 | selected_device_info = None
129 |
130 | for i in range(device_count):
131 | try:
132 | device_info = self.p.get_device_info_by_index(i)
133 | is_on_preferred_api = (device_info.get('hostApi') == default_host_api_index)
134 | has_input_channels = device_info.get('maxInputChannels', 0) >= self.channels
135 |
136 | if has_input_channels:
137 | if (default_host_api_index is not None and is_on_preferred_api) or \
138 | (default_host_api_index is None):
139 | try:
140 | supported = self.p.is_format_supported(
141 | rate=self.sample_rate,
142 | input_device=device_info['index'],
143 | input_channels=self.channels,
144 | input_format=self.format
145 | )
146 | if supported:
147 | self.input_devices.append(device_info)
148 | if default_system_device_index == i:
149 | self.input_device_index = i
150 | selected_device_info = device_info
151 | logger.info(f"Selecting system default input device: {device_info.get('name')} (Index: {i})")
152 | break
153 | elif self.input_device_index is None:
154 | self.input_device_index = device_info['index']
155 | selected_device_info = device_info
156 | except ValueError:
157 | pass # Format not supported
158 | except Exception as dev_e:
159 | logger.warning(f"Could not query full device info for index {i}: {dev_e}")
160 |
161 | if self.input_device_index is None:
162 | final_error_msg = f"No compatible audio input devices found."
163 | logger.error(final_error_msg)
164 | raise RuntimeError(final_error_msg)
165 | else:
166 | if not selected_device_info:
167 | selected_device_info = self.p.get_device_info_by_index(self.input_device_index)
168 | logger.info(f"Using audio input device: {selected_device_info.get('name', 'N/A')} (Index: {self.input_device_index})")
169 |
170 | def setup_vad(self):
171 | logger.debug("Setting up VAD...")
172 | self.vad = Vad(3)
173 | self.frame_length = int(self.sample_rate * self.frame_duration / 1000)
174 | self.prev_frames_maxlen = int(self.prev_frames_duration * self.sample_rate / self.frame_length)
175 | self.min_silence_detections = int(self.silence_limit * 1000 / self.frame_duration)
176 |
177 | def set_transcript_callback(self, callback: Callable[[str, str], None]):
178 | self.transcript_callback = callback
179 |
180 | # --- WORKER THREAD METHODS ---
181 |
182 | def _background_worker(self):
183 | """CONSUMER for the background listener. Processes audio from its queue."""
184 | while self.is_background_listening.is_set():
185 | try:
186 | in_data = self.background_queue.get(timeout=0.5)
187 | if is_espeak_running():
188 | continue
189 | audio_chunk = np.frombuffer(in_data, dtype=np.int16)
190 | self.prev_frames.append(audio_chunk.copy())
191 | if len(self.prev_frames) > self.prev_frames_maxlen:
192 | self.prev_frames.pop(0)
193 | except queue.Empty:
194 | continue # This is normal, just loop again
195 | except Exception as e:
196 | logger.error(f"Error in background worker: {e}", exc_info=True)
197 |
198 | def _quick_record_worker(self):
199 | """CONSUMER for the quick recorder. Processes audio from its queue."""
200 | while self.is_quick_recording.is_set() or not self.quick_record_queue.empty():
201 | try:
202 | in_data = self.quick_record_queue.get(timeout=0.5)
203 | if is_espeak_running():
204 | continue
205 | audio_chunk = np.frombuffer(in_data, dtype=np.int16)
206 | self.current_audio.append(audio_chunk.copy())
207 | except queue.Empty:
208 | if not self.is_quick_recording.is_set():
209 | break # Exit if recording is stopped and queue is empty
210 | except Exception as e:
211 | logger.error(f"Error in quick record worker: {e}", exc_info=True)
212 |
213 | # --- CALLBACK METHODS (PRODUCERS) ---
214 |
215 | def background_callback(self, in_data, frame_count, time_info, status_flags):
216 | """PRODUCER: Puts background audio data into a queue. Must be fast."""
217 | if status_flags:
218 | logger.warning(f"Background audio input status flags non-zero: {status_flags}")
219 | self.background_queue.put(in_data)
220 | return (None, pyaudio.paContinue)
221 |
222 | def quick_record_callback(self, in_data, frame_count, time_info, status_flags):
223 | """PRODUCER: Puts quick record audio data into a queue. Must be fast."""
224 | if status_flags:
225 | logger.warning(f"Quick record audio input status flags non-zero: {status_flags}")
226 | self.quick_record_queue.put(in_data)
227 | return (None, pyaudio.paContinue)
228 |
229 | # --- STREAM CONTROL METHODS ---
230 |
231 | def start_background_listener(self):
232 | if self.is_background_listening.is_set():
233 | return
234 | logger.debug("Starting background listener...")
235 | self.is_background_listening.set()
236 |
237 | self.background_worker_thread = threading.Thread(target=self._background_worker)
238 | self.background_worker_thread.start()
239 |
240 | self.background_stream = self.p.open(
241 | format=self.format, channels=self.channels, rate=self.sample_rate,
242 | input=True, input_device_index=self.input_device_index,
243 | frames_per_buffer=self.chunk_size * 2, # A slightly larger buffer is safe
244 | stream_callback=self.background_callback
245 | )
246 | logger.debug("Background listener stream started.")
247 |
248 | def stop_background_listener(self):
249 | if not self.is_background_listening.is_set():
250 | return
251 | logger.debug("Stopping background listener...")
252 | self.is_background_listening.clear()
253 |
254 | if self.background_stream:
255 | self.background_stream.stop_stream()
256 | self.background_stream.close()
257 | self.background_stream = None
258 |
259 | if self.background_worker_thread:
260 | self.background_worker_thread.join()
261 | self.background_worker_thread = None
262 | logger.debug("Background listener stopped.")
263 |
264 | def start_quick_record(self):
265 | if self.is_quick_recording.is_set():
266 | return False
267 | logger.info("Attempting to start quick recording...")
268 | self.stop_background_listener()
269 |
270 | self.is_quick_recording.set()
271 | self.current_audio = list(self.prev_frames)
272 | logger.debug(f"Initialized quick recording with {len(self.current_audio)} frames.")
273 |
274 | self.quick_record_worker_thread = threading.Thread(target=self._quick_record_worker)
275 | self.quick_record_worker_thread.start()
276 |
277 | self.quick_record_stream = self.p.open(
278 | format=self.format, channels=self.channels, rate=self.sample_rate,
279 | input=True, input_device_index=self.input_device_index,
280 | frames_per_buffer=self.chunk_size,
281 | stream_callback=self.quick_record_callback
282 | )
283 | logger.info("Quick recording stream started successfully.")
284 | return True
285 |
286 | def stop_quick_record(self):
287 | if not self.is_quick_recording.is_set():
288 | return
289 | logger.info("Stopping quick recording...")
290 | self.is_quick_recording.clear()
291 |
292 | if self.quick_record_stream:
293 | self.quick_record_stream.stop_stream()
294 | self.quick_record_stream.close()
295 | self.quick_record_stream = None
296 |
297 | if self.quick_record_worker_thread:
298 | self.quick_record_worker_thread.join() # Wait for worker to process all data
299 | self.quick_record_worker_thread = None
300 |
301 | if self.current_audio:
302 | try:
303 | combined_audio_data = np.concatenate(self.current_audio)
304 | self._schedule_process_speech(combined_audio_data)
305 | except ValueError as e:
306 | logger.error(f"Error concatenating audio chunks: {e}", exc_info=True)
307 | finally:
308 | self.current_audio = []
309 | else:
310 | logger.info("No audio was captured.")
311 | if self.transcript_callback:
312 | self.loop.call_soon_threadsafe(self.transcript_callback, "...", "Voice")
313 |
314 | self.start_background_listener() # Restart background listener
315 | logger.debug("Background listener restarted.")
316 |
317 | # --- ASYNC SPEECH PROCESSING ---
318 |
319 | def _schedule_process_speech(self, audio_data: np.ndarray):
320 | """Schedules the async process_speech method in the main event loop."""
321 | if self.loop and self.loop.is_running():
322 | self.loop.call_soon_threadsafe(
323 | lambda: self.loop.create_task(self.process_speech(audio_data))
324 | )
325 |
326 | async def process_speech(self, audio_data: np.ndarray):
327 | """Transcribes speech and triggers command processing."""
328 | # This async logic is well-structured. No changes needed here.
329 | if not self.asr_processor or not self.command_trigger_func:
330 | logger.error("ASR processor or command trigger not initialized.")
331 | return
332 |
333 | try:
334 | transcribed_text = await self.asr_processor.transcribe(audio_data)
335 |
336 | if self.transcript_callback:
337 | text_to_print = transcribed_text if transcribed_text else "..."
338 | self.transcript_callback(text_to_print, "Voice")
339 |
340 | if transcribed_text:
341 | logger.info(f"Transcription successful: '{transcribed_text}'. Triggering command.")
342 | await self.command_trigger_func(transcribed_text)
343 | else:
344 | logger.info("Transcription returned no text.")
345 | except Exception as e:
346 | logger.error(f"Error during async speech processing: {e}", exc_info=True)
347 | if self.transcript_callback:
348 | self.transcript_callback("[Error processing speech]", "Error")
349 |
350 | def cleanup(self):
351 | """Cleans up all resources."""
352 | logger.info("Cleaning up VoiceCommandSystem resources...")
353 | self.stop_quick_record()
354 | self.stop_background_listener()
355 |
356 | if self.p:
357 | self.p.terminate()
358 | self.p = None
359 | logger.info("Voice command system resources released.")
360 |
--------------------------------------------------------------------------------
/commands/computer_command.py:
--------------------------------------------------------------------------------
1 | # commands/computer_command.py
2 | import subprocess
3 | import json
4 | from typing import AsyncGenerator, Dict, List, Optional
5 | import logging
6 | import os
7 | from pathlib import Path
8 | from .base import Command
9 | import aiohttp
10 | import asyncio
11 | import shutil
12 |
13 | from cli.output import schedule_print, speak
14 |
15 | logger = logging.getLogger(__name__)
16 | logger.setLevel(logging.DEBUG)
17 |
18 | # --- is_tool Helper ---
19 | def is_tool(name):
20 | return shutil.which(name) is not None
21 |
22 | # --- ToolRegistry Class (No changes needed from previous version) ---
23 | class ToolRegistry:
24 | def __init__(self):
25 | self.apps: Dict[str, str] = {}
26 | self.active_windows: Dict[str, str] = {}
27 | self.terminal_apps = ['konsole', 'gnome-terminal', 'xterm', 'terminator', 'alacritty', 'kitty']
28 | self.command_history = []
29 | self.update_installed_apps()
30 | self.update_active_windows()
31 |
32 | def update_installed_apps(self):
33 | try:
34 | result = subprocess.run(['find', '/usr/share/applications', '-name', '*.desktop'], capture_output=True, text=True, check=False)
35 | if result.returncode != 0:
36 | logger.warning(f"Finding .desktop files failed: {result.stderr}")
37 | return
38 | self.apps.clear()
39 | for desktop_file in result.stdout.splitlines():
40 | try:
41 | with open(desktop_file, 'r', encoding='utf-8', errors='ignore') as f:
42 | content = f.read()
43 | name = None
44 | exec_path = None
45 | nodisplay = False
46 | for line in content.splitlines():
47 | if line.startswith('Name='):
48 | name = line.split('=', 1)[1].lower().strip()
49 | elif line.startswith('Exec='):
50 | exec_path = line.split('=', 1)[1].split('%')[0].strip()
51 | elif line.startswith('NoDisplay=true'):
52 | nodisplay = True
53 | if name and exec_path and not nodisplay:
54 | cmd_base = exec_path.split()[0]
55 | if '/' in cmd_base or is_tool(cmd_base):
56 | self.apps[name] = exec_path
57 | except Exception as file_e:
58 | logger.warning(f"Could not parse desktop file {desktop_file}: {file_e}")
59 | except Exception as e:
60 | logging.error(f"Failed to update installed apps: {e}", exc_info=True)
61 |
62 | def update_active_windows(self):
63 | if not is_tool('wmctrl'):
64 | logger.error("wmctrl not found.")
65 | self.active_windows.clear()
66 | return
67 | try:
68 | result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=False)
69 | self.active_windows.clear()
70 | if result.returncode != 0:
71 | logger.warning(f"wmctrl command failed: {result.stderr}.")
72 | return
73 | for line in result.stdout.splitlines():
74 | parts = line.split(None, 3)
75 | if len(parts) >= 4:
76 | self.active_windows[parts[0]] = parts[3].lower()
77 | except Exception as e:
78 | logging.error(f"Failed to update active windows: {e}")
79 | self.active_windows.clear()
80 |
81 | def find_app(self, query: str) -> Optional[str]:
82 | query = query.lower()
83 | if query in ['shell', 'terminal', 'command prompt', 'cmd']:
84 | for terminal in self.terminal_apps:
85 | if terminal in self.apps:
86 | return self.apps[terminal]
87 | if terminal_exec := self.apps.get(terminal.split()[0]):
88 | return terminal_exec
89 | logger.warning("Could not find a known terminal application.")
90 | return None
91 | if query in self.apps:
92 | return self.apps[query]
93 | for name, exec_path in self.apps.items():
94 | if query in name:
95 | logger.debug(f"Partial match for '{query}': Found '{name}' -> {exec_path}")
96 | return exec_path
97 | logger.debug(f"No application found matching query: '{query}'")
98 | return None
99 |
100 | def find_window(self, query: str) -> Optional[str]:
101 | query = query.lower()
102 | if not self.active_windows:
103 | logger.warning("Active window list empty.")
104 | return None
105 | for window_id, title in self.active_windows.items():
106 | if query == title:
107 | return window_id
108 | best_match_id = None
109 | best_match_score = 0
110 | for window_id, title in self.active_windows.items():
111 | if query in title:
112 | score = 100 - len(title)
113 | score += 50 if title.startswith(query) else 0
114 | if score > best_match_score:
115 | best_match_id = window_id
116 | best_match_score = score
117 | if best_match_id:
118 | logger.debug(f"Window match for '{query}': Found '{self.active_windows[best_match_id]}' (ID: {best_match_id})")
119 | return best_match_id
120 | logger.debug(f"No active window found matching query: '{query}'")
121 | return None
122 |
123 | def add_command_history(self, command: str):
124 | self.command_history.append(command)
125 | del self.command_history[:-10]
126 |
127 |
128 | # --- ComputerCommand Class ---
129 | class ComputerCommand(Command):
130 | def __init__(self):
131 | super().__init__(
132 | name="computer",
133 | aliases=[],
134 | description="Execute various computer commands using LLM",
135 | execute=self._execute
136 | )
137 | self.tools = ToolRegistry()
138 | self.llm_model = "mistral"
139 | self.ollama_base_url = "http://localhost:11434"
140 |
141 | # --- Prompts (remain same) ---
142 | self.query_prompt = """Context of highlighted text: "{highlighted}"
143 | Now for the User Query: "{query}"
144 | Analyze the highlighted text and answer the query. Keep responses clear and concise. If the query isn't directly related to the highlighted text, just answer the qestion."""
145 |
146 | self.shell_prompt = """You are a desktop command assistant that outputs ONLY a single BASH command suitable for execution via subprocess.run.
147 | Rules:
148 | 1. Task Handling: If the request asks for information obtainable via a bash command (e.g., disk space, list files, current directory), output the command. If the request is a general question or cannot be answered by a simple command, respond conversationally using ONLY plain text (no command output). Start conversational responses with 'ANSWER:'. Provide ONLY the command itself (e.g., `ls -l`) or the conversational answer (e.g., `ANSWER: I cannot perform that action.`). Do NOT add explanations before the command or ANSWER:.
149 | 2. Safety: AVOID destructive commands (rm, mv without care, mkfs, etc.). Prefer read-only commands (ls, pwd, df, ps, top, cat, head, tail, grep, find). Do NOT create files or directories unless specifically asked and safe. Do NOT include `sudo`. Do NOT include `&& espeak ...`.
150 | 3. Formatting: Output exactly ONE line containing either the bash command or the `ANSWER:` prefixed conversational response. Remove any markdown formatting like backticks (`).
151 | Examples: User: "check disk space" -> Assistant: df -h | User: "show current directory" -> Assistant: pwd | User: "list files" -> Assistant: ls -lah | User: "what is the capital of france" -> Assistant: ANSWER: The capital of France is Paris. | User: "delete all my files" -> Assistant: ANSWER: I cannot perform destructive actions like deleting all files.
152 | Current state (informational only, do not rely on for paths): Working Directory (approximated): {working_dir} | Previous Commands (for context): {command_history}
153 | User request: {query}
154 | Assistant:"""
155 |
156 | self.tool_prompts = {
157 | 'open': """You are an application launcher assistant. Output ONLY an app_name tag or NOT_FOUND. Match the user request against the list of installed applications. For shell/terminal, use an installed emulator name.
158 | Installed applications: {apps}
159 | User request: {query}
160 | Assistant:""",
161 | 'window': """You are a window management assistant. Output ONLY a window_query or window_query tag, or NOT_FOUND. Match the user request against the list of active windows. Use lowercase keywords.
162 | Active windows (lowercase titles): {windows}
163 | User request: {query}
164 | Assistant:"""
165 | }
166 |
167 | # get_available_models (remains same)
168 | async def get_available_models(self) -> Optional[List[str]]:
169 | url = f"{self.ollama_base_url}/api/tags"
170 | logger.debug(f"Fetching models from {url}")
171 | try:
172 | async with aiohttp.ClientSession() as session:
173 | async with session.get(url, timeout=5) as response:
174 | if response.status == 200:
175 | data = await response.json()
176 | models_data = data.get("models", [])
177 | model_names = sorted([m.get("name") for m in models_data if m.get("name")])
178 | logger.info(f"Fetched models: {model_names}")
179 | return model_names if model_names else None
180 | else:
181 | logger.error(f"Ollama API model request failed: {response.status} - {await response.text()}")
182 | return None
183 | except (aiohttp.ClientConnectorError, asyncio.TimeoutError) as e:
184 | logger.error(f"Cannot connect to Ollama at {url}: {e}")
185 | return None
186 | except Exception as e:
187 | logger.error(f"Error fetching Ollama models: {e}", exc_info=True)
188 | return None
189 |
190 | # set_llm_model (remains same)
191 | def set_llm_model(self, model_name):
192 | self.llm_model = model_name
193 | logger.info(f"ComputerCommand LLM model set to: {model_name}")
194 |
195 | # _execute (FIXED version)
196 | async def _execute(self, text: str) -> None:
197 | logger.debug(f"ComputerCommand executing with text: '{text}'")
198 | try:
199 | tool_type = self._determine_tool_type(text)
200 | logger.debug(f"Determined tool type: {tool_type}")
201 |
202 | if tool_type == 'query':
203 | await self._handle_text_query(text)
204 | elif tool_type == 'shell':
205 | command_query = text.lower().replace('shell', '', 1).strip()
206 | await self._handle_shell_command(command_query)
207 | else: # 'open' or 'window'
208 | llm_response_tag = "[No response]"
209 | # Fix: Properly structure the async for loop
210 | async for resp in self._get_llm_tool_response(text, tool_type):
211 | llm_response_tag = resp
212 | break
213 |
214 | if llm_response_tag.startswith("[Error:") or llm_response_tag == "[No response]":
215 | schedule_print("Error", f"LLM failed for '{tool_type}': {llm_response_tag}")
216 | return
217 |
218 | tool_name, params = self._parse_tool(llm_response_tag)
219 | logger.debug(f"Parsed tool: {tool_name}, Params: {params}")
220 |
221 | if not tool_name:
222 | schedule_print("Error", f"LLM gave invalid tool response: {llm_response_tag}")
223 | return
224 |
225 | if params is not None and params.upper() == "NOT_FOUND":
226 | msg = f"Could not find target for {tool_type} '{text}'."
227 | schedule_print("System", msg)
228 | return
229 |
230 | await self._execute_tool(tool_name, params if params is not None else "")
231 | except Exception as e:
232 | error_msg = f"ComputerCommand execution failed: {str(e)}"
233 | logger.error(error_msg, exc_info=True)
234 | schedule_print("Error", error_msg)
235 |
236 | # _determine_tool_type (remains same)
237 | def _determine_tool_type(self, query: str) -> str:
238 | query_lower = query.lower().strip()
239 | if query_lower.startswith('shell'):
240 | return 'shell'
241 | if any(query_lower.startswith(v + " ") for v in ['open', 'launch', 'start', 'run']):
242 | return 'open'
243 | if any(query_lower.startswith(v + " ") for v in ['goto', 'go to', 'switch to', 'focus', 'close', 'quit', 'exit']):
244 | self.tools.update_active_windows()
245 | return 'window'
246 | logger.debug(f"Query '{query}' classified as 'query' type.")
247 | return 'query'
248 |
249 | # _handle_shell_command (remains same as last correction)
250 | async def _handle_shell_command(self, command_query: str) -> None:
251 | if not command_query:
252 | schedule_print("System", "No shell command requested.")
253 | return
254 | logger.debug("Handling shell command request...")
255 | history_context = "\n".join(self.tools.command_history[-3:])
256 | current_dir = os.getcwd()
257 | prompt = self.shell_prompt.format(working_dir=current_dir, command_history=history_context, query=command_query)
258 |
259 | llm_output_line = "[No response]"
260 | async for resp in self._ollama_generate(prompt, stream=False):
261 | llm_output_line = resp
262 | break
263 |
264 | if llm_output_line.startswith("[Error:") or llm_output_line == "[No response]":
265 | schedule_print("Error", f"LLM failed for shell command: {llm_output_line}")
266 | return
267 |
268 | llm_output_line = llm_output_line.strip()
269 | logger.debug(f"LLM output for shell: '{llm_output_line}'")
270 |
271 | if llm_output_line.startswith("ANSWER:"):
272 | answer_text = llm_output_line[len("ANSWER:"):].strip()
273 | schedule_print("LLM", answer_text)
274 | if answer_text:
275 | await speak(answer_text)
276 | elif not llm_output_line:
277 | schedule_print("System", "LLM returned empty response.")
278 | else:
279 | command_to_run = llm_output_line
280 | schedule_print("System", f"Suggested command: `{command_to_run}`")
281 | schedule_print("System", f"Executing...")
282 | try:
283 | loop = asyncio.get_running_loop()
284 | result = await loop.run_in_executor(
285 | None,
286 | lambda: subprocess.run(
287 | command_to_run,
288 | shell=True,
289 | capture_output=True,
290 | text=True,
291 | check=False,
292 | timeout=15
293 | )
294 | )
295 | output = f"Return Code: {result.returncode}\n"
296 | stdout_clean = result.stdout.strip()
297 | stderr_clean = result.stderr.strip()
298 | if stdout_clean:
299 | output += f"Output:\n{stdout_clean}\n"
300 | if stderr_clean:
301 | output += f"Error Output:\n{stderr_clean}"
302 | schedule_print("System", output.strip())
303 | self.tools.add_command_history(command_to_run)
304 | except subprocess.TimeoutExpired:
305 | msg = f"Command timed out: `{command_to_run}`"
306 | schedule_print("Error", msg)
307 | except Exception as exec_e:
308 | msg = f"Failed to execute command `{command_to_run}`: {exec_e}"
309 | schedule_print("Error", msg)
310 |
311 | # _handle_text_query (remains same as last correction)
312 | async def _handle_text_query(self, query: str) -> None:
313 | try:
314 | logger.debug("Getting highlighted text...")
315 | result = subprocess.run(['xclip', '-o', '-selection', 'primary'], capture_output=True, text=True, check=False)
316 | if result.returncode != 0:
317 | msg = "Could not get highlighted text." if "Error:" in result.stderr else "No text highlighted."
318 | schedule_print("System", msg)
319 | return
320 |
321 | highlighted = result.stdout.strip()
322 | if not highlighted:
323 | schedule_print("System", "No text is highlighted.")
324 | return
325 |
326 | logger.debug(f"Processing query: '{query}' with context: '{highlighted[:100]}...'")
327 | prompt = self.query_prompt.format(highlighted=highlighted, query=query)
328 | full_response_for_log = ""
329 |
330 | try:
331 | loop = asyncio.get_running_loop()
332 | async for chunk_text in self._ollama_generate(prompt, stream=True):
333 | schedule_print("LLM", chunk_text)
334 | full_response_for_log += chunk_text
335 | speak_text = chunk_text.strip()
336 | if speak_text and not speak_text.startswith("[Error:"):
337 | await speak(speak_text)
338 | finally:
339 | logger.debug(f"Full LLM response for query '{query}': {full_response_for_log}")
340 | except FileNotFoundError:
341 | msg = "Error: 'xclip' command not found."
342 | logger.error(msg)
343 | schedule_print("Error", msg)
344 | except Exception as e:
345 | error_msg = f"Query processing failed: {str(e)}"
346 | logger.error(error_msg, exc_info=True)
347 | schedule_print("Error", error_msg)
348 |
349 | # _get_llm_tool_response (remains same)
350 | async def _get_llm_tool_response(self, query: str, tool_type: str) -> AsyncGenerator[str, None]:
351 | prompt_template = self.tool_prompts.get(tool_type)
352 | if not prompt_template:
353 | logger.error(f"No prompt for tool {tool_type}")
354 | yield f"[Error: No prompt]"
355 | return
356 |
357 | self.tools.update_active_windows()
358 | apps_list = "\n".join(self.tools.apps.keys())
359 | windows_list = "\n".join(self.tools.active_windows.values())
360 | prompt = prompt_template.format(apps=apps_list or "None", windows=windows_list or "None", query=query)
361 | logger.debug(f"Prompt for {tool_type}:\n{prompt}")
362 |
363 | async for response_text in self._ollama_generate(prompt, stream=False):
364 | logger.debug(f"LLM response for {tool_type}: {response_text}")
365 | yield response_text
366 |
367 | # _parse_tool (remains same)
368 | def _parse_tool(self, response: str) -> tuple[Optional[str], Optional[str]]:
369 | response = response.strip()
370 | for tool in ['open', 'goto', 'close', 'shell']:
371 | start_tag = f"<{tool}>"
372 | end_tag = f"{tool}>"
373 | if response.startswith(start_tag) and response.endswith(end_tag):
374 | params = response[len(start_tag):-len(end_tag)].strip()
375 | return (tool, params) if params else (None, None)
376 | logger.debug(f"Could not parse tool tag from: {response}")
377 | return None, None
378 |
379 | # _execute_tool (remains same)
380 | async def _execute_tool(self, tool_type: str, params: str) -> None:
381 | """Executes the selected tool, printing status."""
382 | logger.info(f"Executing tool '{tool_type}' with params '{params}'")
383 | # Initial status message (printed immediately)
384 | schedule_print("System", f"Attempting action: {tool_type} '{params}'...")
385 | status_msg = f"Tool '{tool_type}' finished." # Default success
386 | error_occurred = False
387 |
388 | try:
389 | if tool_type == 'open':
390 | exec_path = self.tools.find_app(params)
391 | # <<< FIX: Expanded if/try/except/else block >>>
392 | if exec_path:
393 | try:
394 | # Use Popen for non-blocking GUI app launch
395 | subprocess.Popen(exec_path.split(), start_new_session=True)
396 | status_msg = f"Attempted launch: '{params}' (Command: {exec_path})."
397 | except Exception as e:
398 | status_msg = f"Error launching '{params}': {e}"
399 | error_occurred = True
400 | else:
401 | status_msg = f"Application matching '{params}' not found."
402 | # This isn't strictly an error, but a failure condition
403 | error_occurred = True # Treat as error for printing logic
404 |
405 | elif tool_type == 'goto':
406 | self.tools.update_active_windows()
407 | window_id = self.tools.find_window(params)
408 | # <<< FIX: Expanded if/try/except/else block >>>
409 | if window_id:
410 | try:
411 | # Run wmctrl synchronously (usually fast)
412 | subprocess.run(['wmctrl', '-i', '-a', window_id], check=True, timeout=3)
413 | status_msg = f"Focused window matching '{params}'."
414 | except Exception as e:
415 | status_msg = f"Error focusing '{params}': {e}"
416 | error_occurred = True
417 | else:
418 | status_msg = f"Window matching '{params}' not found."
419 | error_occurred = True
420 |
421 | elif tool_type == 'close':
422 | self.tools.update_active_windows()
423 | window_id = self.tools.find_window(params)
424 | # <<< FIX: Expanded if/try/except/else block >>>
425 | if window_id:
426 | try:
427 | subprocess.run(['wmctrl', '-i', '-c', window_id], check=True, timeout=3)
428 | status_msg = f"Closed window matching '{params}'."
429 | except Exception as e:
430 | status_msg = f"Error closing '{params}': {e}"
431 | error_occurred = True
432 | else:
433 | status_msg = f"Window matching '{params}' not found."
434 | error_occurred = True
435 | else:
436 | status_msg = f"Unknown tool type: {tool_type}"
437 | error_occurred = True
438 |
439 | except Exception as e:
440 | # Catch unexpected errors during tool logic
441 | status_msg = f"Tool execution failed unexpectedly: {str(e)}"
442 | logger.error(status_msg, exc_info=True)
443 | error_occurred = True
444 |
445 | # Print final status message using appropriate type
446 | schedule_print("Error" if error_occurred else "System", status_msg)
447 | # Optionally speak success/failure here?
448 | # if not error_occurred: await speak(status_msg)
449 |
450 | # _ollama_generate (remains same as last correction)
451 | async def _ollama_generate(self, prompt: str, stream: bool) -> AsyncGenerator[str, None]:
452 | url = f"{self.ollama_base_url}/api/generate"
453 | payload = {
454 | "model": self.llm_model,
455 | "prompt": prompt,
456 | "stream": stream
457 | }
458 | try:
459 | async with aiohttp.ClientSession() as session:
460 | async with session.post(url, json=payload, timeout=60) as response:
461 | if response.status != 200:
462 | error_text = await response.text()
463 | logger.error(f"Ollama API error {response.status}: {error_text}")
464 | yield f"[Error: API {response.status}]"
465 | return
466 | if stream:
467 | buffer = ""
468 | async for line in response.content:
469 | if line:
470 | try:
471 | decoded_line = line.decode('utf-8')
472 | data = json.loads(decoded_line)
473 | chunk_text = data.get('response', '')
474 | if chunk_text:
475 | buffer += chunk_text
476 | while True:
477 | try:
478 | split_idx = min(idx for idx in (buffer.find('.'), buffer.find('!'), buffer.find('?'), buffer.find('\n')) if idx != -1)
479 | yield buffer[:split_idx+1]
480 | buffer = buffer[split_idx+1:]
481 | except ValueError:
482 | break
483 | if data.get('done'):
484 | if buffer:
485 | yield buffer
486 | break
487 | except json.JSONDecodeError:
488 | logger.warning(f"Failed decode: {line}")
489 | yield "[Error: Invalid JSON]"
490 | except Exception as stream_e:
491 | logger.error(f"Stream err: {stream_e}")
492 | yield f"[Error: Stream]"
493 | return
494 | else: # Non-streaming
495 | try:
496 | data = await response.json()
497 | response_text = data.get('response', '').strip()
498 | yield response_text if response_text else "[Warning: LLM empty]"
499 | except json.JSONDecodeError:
500 | logger.error(f"Non-stream JSON decode failed.")
501 | yield "[Error: Invalid LLM JSON]"
502 | return
503 | except aiohttp.ClientConnectorError as e:
504 | logger.error(f"Ollama Connect {self.ollama_base_url}: {e}")
505 | yield "[Error: Cannot connect]"
506 | except asyncio.TimeoutError:
507 | logger.error(f"Ollama Timeout")
508 | yield "[Error: Ollama timeout]"
509 | except Exception as e:
510 | logger.error(f"Ollama generate call failed: {e}", exc_info=True)
511 | yield f"[Error: LLM call failed]"
512 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import asyncio
4 | import sys
5 | import logging
6 | from typing import List, Optional, Callable, Any, Coroutine
7 | import textwrap
8 | import subprocess
9 | import re # <<< For input normalization
10 |
11 | # Third-party imports
12 | from prompt_toolkit import PromptSession
13 | from prompt_toolkit.patch_stdout import patch_stdout
14 | # Optional: For history and suggestions
15 | # from prompt_toolkit.history import FileHistory
16 | # from prompt_toolkit.auto_suggest import AutoSuggestFromHistory
17 |
18 | # Project imports
19 | from cli.output import schedule_print, print_consumer, print_queue, safe_print, speak
20 | from cli.completer import CLICompleter, ollama_models_for_completion
21 | import hotkey_listener # Keep import
22 | from commands.command_processor import CommandProcessor
23 | from commands.computer_command import ComputerCommand # Keep direct import if needed
24 | from core.voice_system import VoiceCommandSystem
25 |
26 | # Configure logging
27 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
28 | logger = logging.getLogger(__name__)
29 |
30 | # --- Global Variables ---
31 | ollama_models_list: List[str] = ["mistral"] # Default model
32 | computer_command_instance: Optional[ComputerCommand] = None
33 | # --- Global for tracking the current command task ---
34 | current_command_task: Optional[asyncio.Task] = None
35 |
36 | # --- Accessor function for the hotkey listener ---
37 | def get_current_task() -> Optional[asyncio.Task]:
38 | """Returns the currently active command task, if any."""
39 | global current_command_task
40 | return current_command_task
41 |
42 | # --- Voice Command Handler ---
43 | async def handle_voice_command(text: str, command_processor: CommandProcessor):
44 | """Processes voice commands, manages the task, handles cancellation."""
45 | global current_command_task
46 | if current_command_task and not current_command_task.done():
47 | logger.warning("New voice command received while previous task running. Cancelling previous.")
48 | current_command_task.cancel()
49 | try:
50 | # Give cancellation a chance to propagate
51 | await asyncio.wait_for(current_command_task, timeout=0.5)
52 | except asyncio.CancelledError:
53 | logger.debug("Previous voice task cancelled successfully.")
54 | except asyncio.TimeoutError:
55 | logger.warning("Timeout waiting for previous voice task cancellation.")
56 | except Exception as e:
57 | logger.error(f"Error awaiting previous cancelled voice task: {e}")
58 | finally:
59 | current_command_task = None # Explicitly clear
60 |
61 | logger.info(f"Processing voice command: {text}")
62 | # Create a task for the command processing using the helper
63 | current_command_task = asyncio.create_task(
64 | _execute_command_stream(text, command_processor, "Voice")
65 | )
66 |
67 | try:
68 | await current_command_task
69 | except asyncio.CancelledError:
70 | # Message is printed by the interrupt handler or the task itself
71 | logger.info("Voice command task was cancelled.")
72 | except Exception as e:
73 | logger.error(f"Error executing voice command task '{text}': {e}", exc_info=True)
74 | schedule_print("Error", f"Failed processing voice command: {e}")
75 | finally:
76 | # Ensure the task reference is cleared after completion/cancellation
77 | if current_command_task and current_command_task.done():
78 | current_command_task = None
79 |
80 | # --- Transcript Callback (for printing only) ---
81 | def handle_transcript(text: str, source: str = "Voice"):
82 | """Callback from Voice System. Schedules print updates."""
83 | schedule_print(source, text)
84 |
85 | # --- Typed Command Processing ---
86 | async def process_typed_command(text: str, command_processor: CommandProcessor):
87 | """Processes commands entered via the CLI, manages the task, handles cancellation."""
88 | global current_command_task
89 | if current_command_task and not current_command_task.done():
90 | logger.warning("New typed command received while previous task running. Cancelling previous.")
91 | current_command_task.cancel()
92 | try:
93 | # Give cancellation a chance to propagate
94 | await asyncio.wait_for(current_command_task, timeout=0.5)
95 | except asyncio.CancelledError:
96 | logger.debug("Previous typed task cancelled successfully.")
97 | except asyncio.TimeoutError:
98 | logger.warning("Timeout waiting for previous typed task cancellation.")
99 | except Exception as e:
100 | logger.error(f"Error awaiting previous cancelled typed task: {e}")
101 | finally:
102 | current_command_task = None # Explicitly clear
103 |
104 |
105 | schedule_print("Typed", text)
106 | # Create a task for the command processing using the helper
107 | current_command_task = asyncio.create_task(
108 | _execute_command_stream(text, command_processor, "Typed")
109 | )
110 |
111 | try:
112 | await current_command_task
113 | except asyncio.CancelledError:
114 | # Message is printed by the interrupt handler or the task itself
115 | logger.info("Typed command task was cancelled.")
116 | except Exception as e:
117 | logger.error(f"Error executing typed command task '{text}': {e}", exc_info=True)
118 | schedule_print("Error", f"Failed processing command: {e}")
119 | finally:
120 | # Ensure the task reference is cleared after completion/cancellation
121 | if current_command_task and current_command_task.done():
122 | current_command_task = None
123 |
124 | # --- Helper for executing commands and handling streams/speaking ---
125 | async def _execute_command_stream(text: str, command_processor: CommandProcessor, source: str):
126 | """Internal helper to run command processor and handle output/speech."""
127 | original_cmd_name = None # Keep track of the originally matched command
128 | try:
129 | # <<< Normalize the input text FIRST >>>
130 | # Lowercase, strip leading/trailing whitespace
131 | normalized_text = text.lower().strip()
132 | # Remove common trailing punctuation (periods, question marks, exclamation points)
133 | # using regex for cleaner removal than multiple .rstrip() calls.
134 | normalized_text = re.sub(r'[.!?]+$', '', normalized_text).strip()
135 | # Example: "Read." -> "read", "Computer what is this?" -> "computer what is this"
136 | # Keep the original 'text' variable intact in case we need it for typing.
137 | logger.debug(f"Original text: '{text}', Normalized for parsing: '{normalized_text}'")
138 | # <<< END Normalization >>>
139 |
140 | # Parse the NORMALIZED text to find the command
141 | cmd_name, args = command_processor.parse_command(normalized_text)
142 | original_cmd_name = cmd_name # Store the matched command name
143 |
144 | processor_input = normalized_text # Start with normalized for command execution by default
145 | is_general_query = False
146 |
147 | if cmd_name:
148 | # --- Specific command matched ---
149 | schedule_print("System", f"Executing: {cmd_name} {args if args else ''}")
150 | # processor_input is already normalized_text, which includes args if found by parser
151 | is_general_query = (cmd_name == "computer")
152 | else:
153 | # --- No specific command matched: Treat as Type command ---
154 | logger.info(f"No command keyword matched for normalized input: '{normalized_text}'. Treating as 'type' command.")
155 | schedule_print("System", f"No command matched. Typing...") # Update printed message
156 |
157 | # IMPORTANT: Use the ORIGINAL, un-normalized 'text' for typing
158 | # so that punctuation and capitalization are preserved.
159 | processor_input = f"type {text}"
160 | original_cmd_name = "type" # Update for speaking logic
161 | is_general_query = False
162 |
163 | # --- Execute the command (either original or the reconstructed 'type' command) ---
164 | # Pass the appropriate input (normalized for commands, reconstructed 'type' for fallback)
165 | async for result in command_processor.process_command(processor_input):
166 | # Determine message type
167 | msg_type = "LLM" if is_general_query else "System"
168 | schedule_print(msg_type, f"{result}") # Always print the result
169 |
170 | # --- Speaking Logic ---
171 | should_speak = False # Default to not speaking
172 | if result and not isinstance(result, (list, dict)):
173 | result_str = str(result)
174 | # Speak only if it's NOT from 'read', 'computer', 'type', 'stop', etc.
175 | # And not an error or common status message.
176 | if original_cmd_name not in ["read", "computer", "type", "stop"] and \
177 | not result_str.startswith("[Error:") and \
178 | not result_str.startswith("Suggested command:") and \
179 | not result_str.startswith("Attempting action:") and \
180 | not result_str.startswith("Interrupted") and \
181 | not result_str.startswith("Finished reading") and \
182 | not result_str.startswith("Typed:") and \
183 | not result_str.startswith("Read command executed.") and \
184 | not result_str.startswith("No command matched."):
185 | should_speak = True
186 |
187 | if should_speak:
188 | await speak(result_str) # Call global speak function
189 | # --- End Speaking Logic ---
190 |
191 | except asyncio.CancelledError:
192 | # Log cancellation specifically for this execution context
193 | logger.info(f"Command execution for '{text}' cancelled within stream helper.")
194 | # Don't schedule print here, handled by caller or interrupt handler
195 | raise # Re-raise cancellation error to be caught by the caller
196 | except Exception as e:
197 | # Log and schedule print for errors during command execution itself
198 | logger.error(f"Error processing command '{text}' in _execute_command_stream: {e}", exc_info=True)
199 | schedule_print("Error", f"Failed processing command: {e}")
200 | # Don't re-raise normal exceptions, let the task finish with error logged.
201 |
202 |
203 | # --- Help Text Generation ---
204 | def generate_help_text(command_processor: CommandProcessor) -> str:
205 | """Generates help text dynamically from registered commands."""
206 | lines = []; lines.append("Available commands:")
207 | details = command_processor.get_command_details(); max_len = 0
208 | if details:
209 | # Calculate max length for alignment
210 | for name, aliases, _ in details:
211 | alias_str = f" ({', '.join(aliases)})" if aliases else ""
212 | max_len = max(max_len, len(name) + len(alias_str))
213 | static_cmds = ["select model [name]", "refresh_models", "help", "exit / quit"] # Removed 'stop'
214 | if static_cmds: # Avoid error if list is empty
215 | max_len = max(max_len, max(len(s) for s in static_cmds))
216 |
217 | indent = " "; padding = 2; desc_width = 70 # Adjust desc_width if needed
218 |
219 | # Add registered commands details
220 | for name, aliases, description in sorted(details, key=lambda x: x[0]): # Sort commands alphabetically
221 | alias_str = f" ({', '.join(aliases)})" if aliases else ""
222 | command_part = f"{indent}{name}{alias_str}".ljust(max_len + len(indent) + padding)
223 | # Wrap description
224 | wrapped_desc = textwrap.wrap(description or "No description.", width=desc_width)
225 | # Add first line of description (or only line)
226 | lines.append(f" {command_part} {wrapped_desc[0] if wrapped_desc else ''}")
227 | # Add subsequent lines of description indented
228 | for line in wrapped_desc[1:]:
229 | lines.append(f"{indent}{' ' * (max_len + len(indent) + padding)} {line}")
230 |
231 | lines.append("\nOther CLI Commands:")
232 | lines.append(f"{indent}{'select model [name]'.ljust(max_len + padding)} - Switch the Ollama LLM model.")
233 | lines.append(f"{indent}{'refresh_models'.ljust(max_len + padding)} - Reload list of available Ollama models.")
234 | # lines.append(f"{indent}{'stop'.ljust(max_len + padding)} - Stops active text-to-speech feedback (use Ctrl+C).") # Optional: Keep or remove stop command
235 | lines.append(f"{indent}{'help'.ljust(max_len + padding)} - Shows this help message.")
236 | lines.append(f"{indent}{'exit / quit'.ljust(max_len + padding)} - Exits the application.")
237 |
238 | lines.append("\nUsage:")
239 | # --- Updated Usage Section ---
240 | lines.append(f"{indent}- Start input with a known command keyword (e.g., 'click OK', 'read', 'screengrab')")
241 | lines.append(f"{indent} to execute that specific command.")
242 | lines.append(f"{indent}- To query the LLM, you MUST start with the 'computer' keyword")
243 | lines.append(f"{indent} (e.g., 'computer what is the weather in Sutherlin, Oregon?').") # Added location
244 | lines.append(f"{indent}- Any input (voice or typed) that DOES NOT start with a known command keyword")
245 | lines.append(f"{indent} will be automatically TYPED out, similar to the 'type' command.")
246 | lines.append(f"{indent} Example: Saying 'hello world' will result in 'hello world' being typed.")
247 | lines.append(f"\n{indent}Hotkeys:")
248 | lines.append(f"{indent}- Voice Activation: Press and hold Ctrl+Alt to record voice input.")
249 | lines.append(f"{indent}- Interruption: Press Ctrl+C to stop the current command or speech output.")
250 | # --- End Updated Usage Section ---
251 | return "\n".join(lines)
252 |
253 | # --- Dynamic Prompt Function ---
254 | def get_dynamic_prompt() -> str:
255 | """Returns the prompt string including the current model."""
256 | model = computer_command_instance.llm_model if computer_command_instance else "???"
257 | # Changed prompt slightly to indicate default action
258 | return f"Cmd/Type ({model})> "
259 |
260 | # --- Main Application Logic ---
261 | async def async_main():
262 | """Main asynchronous function for the CLI."""
263 | global ollama_models_list, computer_command_instance, current_command_task
264 |
265 | main_event_loop = asyncio.get_running_loop()
266 | print_task = asyncio.create_task(print_consumer())
267 | # Allow print consumer to start up
268 | await asyncio.sleep(0.05)
269 |
270 | schedule_print("System", "Initializing Voice Command CLI...")
271 |
272 | # Initialize Command Processor
273 | try:
274 | command_processor = CommandProcessor()
275 | computer_command_instance = command_processor.commands.get("computer")
276 | except Exception as e:
277 | logger.critical(f"Failed to initialize CommandProcessor: {e}", exc_info=True)
278 | schedule_print("Error", f"CRITICAL: Failed to load commands: {e}")
279 | # Attempt cleanup and exit
280 | await print_queue.put((None, None))
281 | try: await asyncio.wait_for(print_task, timeout=1.0)
282 | except Exception: pass
283 | sys.exit(1)
284 |
285 |
286 | # --- Initialize Ollama Models ---
287 | if not isinstance(computer_command_instance, ComputerCommand):
288 | schedule_print("Warning", "ComputerCommand module not loaded correctly. LLM features disabled.")
289 | else:
290 | schedule_print("System", "Fetching Ollama models...")
291 | try:
292 | fetched_models = await computer_command_instance.get_available_models()
293 | if fetched_models: # Successfully fetched list (might be empty)
294 | ollama_models_list = fetched_models
295 | ollama_models_for_completion[:] = ollama_models_list
296 | if ollama_models_list: # List is not empty
297 | # Check if current model (default 'mistral') is valid, else use first available
298 | current_model = computer_command_instance.llm_model # Get current default
299 | if current_model not in ollama_models_list:
300 | new_model = ollama_models_list[0]
301 | computer_command_instance.set_llm_model(new_model)
302 | schedule_print("System", f"Default model '{current_model}' not found. Switched to: {new_model}")
303 | else:
304 | schedule_print("System", f"Ollama models loaded. Using: {current_model}")
305 |
306 | else: # List is empty
307 | schedule_print("Warning", "No Ollama models found. LLM commands may fail.")
308 | computer_command_instance.set_llm_model("mistral") # Keep fallback
309 | else: # API call failed (returned None)
310 | schedule_print("Warning", f"Could not fetch Ollama models (API error?). Using default: {ollama_models_list[0]}")
311 | ollama_models_for_completion[:] = ollama_models_list # Update completer with default
312 | computer_command_instance.set_llm_model(ollama_models_list[0]) # Ensure default is set
313 | except Exception as e:
314 | schedule_print("Error", f"Failed during Ollama model fetch: {e}. Using default.")
315 | ollama_models_for_completion[:] = ollama_models_list # Update completer with default
316 | if computer_command_instance: computer_command_instance.set_llm_model(ollama_models_list[0])
317 |
318 |
319 | # --- Initialize Voice System & Hotkey ---
320 | voice_system = None # Define voice_system before try block
321 | listener_thread = None # Define listener_thread before try block
322 |
323 | # --- Define the callback function that VoiceCommandSystem will call ---
324 | async def trigger_command_processing(transcribed_text: str):
325 | """Callback from Voice System to handle final transcription."""
326 | # This function runs in the main async context
327 | await handle_voice_command(transcribed_text, command_processor)
328 |
329 | try:
330 | # Pass the new trigger function during initialization
331 | voice_system = VoiceCommandSystem(
332 | loop=main_event_loop,
333 | speak_func=speak,
334 | command_trigger_func=trigger_command_processing # Pass the async callback
335 | )
336 | # Set the callback for printing transcripts/status
337 | voice_system.set_transcript_callback(handle_transcript)
338 | schedule_print("System", "Voice system initialized.")
339 |
340 | # Pass the task accessor function to the listener
341 | listener_thread = hotkey_listener.start_listener(
342 | main_event_loop,
343 | voice_system,
344 | schedule_print,
345 | get_current_task # Pass the accessor function
346 | )
347 | if not listener_thread:
348 | raise RuntimeError("Hotkey listener failed to start.")
349 |
350 | except Exception as e:
351 | logger.error(f"Failed to initialize VoiceCommandSystem or Hotkey Listener: {e}", exc_info=True)
352 | schedule_print("Error", "Failed to initialize voice system or hotkey. Voice commands/hotkeys disabled.")
353 | if voice_system: # Attempt cleanup if voice system partially initialized
354 | try:
355 | # Check if cleanup is async or sync
356 | if asyncio.iscoroutinefunction(voice_system.cleanup):
357 | await voice_system.cleanup()
358 | else:
359 | # Run synchronous cleanup in executor if needed, or directly if safe
360 | await main_event_loop.run_in_executor(None, voice_system.cleanup)
361 | except Exception as cleanup_e: logger.error(f"Error during voice system cleanup after init failure: {cleanup_e}")
362 | voice_system = None # Ensure voice_system is None if init fails
363 |
364 |
365 | # --- Setup Prompt Session ---
366 | cli_completer = CLICompleter(command_processor)
367 | session = PromptSession(
368 | get_dynamic_prompt, # Dynamic prompt function
369 | completer=cli_completer,
370 | complete_while_typing=True,
371 | # history=FileHistory('cli_history.txt'), # Optional: Uncomment for history
372 | # auto_suggest=AutoSuggestFromHistory(), # Optional: Uncomment for suggestions
373 | )
374 |
375 | schedule_print("System", f"CLI Ready. Type 'help' for commands or use hotkeys.")
376 |
377 | # --- Main Input Loop ---
378 | while True:
379 | input_text = "" # Ensure defined in outer scope
380 | try:
381 | # Use patch_stdout to ensure prompt redraws correctly after async prints
382 | with patch_stdout():
383 | input_text = await session.prompt_async() # Use await
384 |
385 | input_text = input_text.strip()
386 | if not input_text: continue # Ignore empty input
387 |
388 | # --- Handle Special CLI Commands ---
389 | if input_text.lower() in ["exit", "quit"]:
390 | schedule_print("System", "Exiting...")
391 | break # Exit the main loop
392 |
393 | elif input_text.lower() == "help":
394 | help_content = generate_help_text(command_processor)
395 | # Use safe_print directly for potentially long help text with formatting
396 | await safe_print(help_content)
397 | continue
398 |
399 | elif input_text.lower() == "refresh_models":
400 | if computer_command_instance:
401 | schedule_print("System", "Refreshing Ollama models...")
402 | try:
403 | fetched_models = await computer_command_instance.get_available_models()
404 | if fetched_models is not None: # Check for None explicitly
405 | ollama_models_list = fetched_models
406 | ollama_models_for_completion[:] = ollama_models_list
407 | if not ollama_models_list: # List could be empty
408 | schedule_print("Warning", "No Ollama models found after refresh.")
409 | if computer_command_instance.llm_model not in ollama_models_list:
410 | computer_command_instance.set_llm_model("mistral") # Fallback
411 | schedule_print("System", "Model reset to fallback 'mistral'.")
412 | elif computer_command_instance.llm_model not in ollama_models_list:
413 | new_model = ollama_models_list[0] # Use first available
414 | computer_command_instance.set_llm_model(new_model)
415 | schedule_print("System", f"Models refreshed: {ollama_models_list}. Current model reset to {new_model}")
416 | else:
417 | schedule_print("System", f"Models refreshed: {ollama_models_list}")
418 | else: # get_available_models returned None (error occurred)
419 | schedule_print("Error", "Failed to fetch models (API error or connection issue).")
420 | except Exception as e: schedule_print("Error", f"Failed to refresh models: {e}")
421 | else: schedule_print("Error", "Computer command module unavailable.")
422 | continue
423 |
424 | elif input_text.lower().startswith("select model "):
425 | parts = input_text.split(maxsplit=2)
426 | if len(parts) == 3:
427 | model_name = parts[2]
428 | # Check against the potentially empty list
429 | if ollama_models_list and model_name in ollama_models_list:
430 | if computer_command_instance:
431 | computer_command_instance.set_llm_model(model_name)
432 | schedule_print("System", f"LLM model set to: {model_name}")
433 | else: schedule_print("Error", "Computer command module unavailable.")
434 | elif not ollama_models_list:
435 | schedule_print("Error", "No models available to select.")
436 | else:
437 | # Provide available models in error message
438 | available_models_str = ', '.join(ollama_models_list)
439 | schedule_print("Error", f"Model '{model_name}' not found. Available: {available_models_str}")
440 | else: schedule_print("Error", "Usage: select model ")
441 | continue
442 |
443 | # --- Process Regular Commands / Default Typing ---
444 | await process_typed_command(input_text, command_processor)
445 |
446 | except KeyboardInterrupt:
447 | # Handle Ctrl+C pressed *at the prompt*
448 | if current_command_task and not current_command_task.done():
449 | logger.debug("Ctrl+C at prompt: Cancelling active task.")
450 | current_command_task.cancel()
451 | # Interrupt handler will print the message
452 | else:
453 | # Schedule print message for Ctrl+C when idle if needed
454 | # schedule_print("System", "(Ctrl+C at prompt)") # Can be noisy
455 | pass # Just redraw prompt by continuing
456 | # Continue to redraw prompt
457 | continue
458 | except EOFError:
459 | # Exit gracefully on Ctrl+D
460 | schedule_print("System", "EOF received. Exiting...")
461 | break
462 | except Exception as e:
463 | logger.error(f"Error in main loop processing input '{input_text}': {e}", exc_info=True)
464 | schedule_print("Error", f"An unexpected error occurred: {e}")
465 | # Prevent rapid error loops if prompt_async fails repeatedly
466 | await asyncio.sleep(0.1)
467 |
468 | # --- Application Cleanup ---
469 | schedule_print("System", "Shutting down...")
470 |
471 | # --- Cancel any lingering task ---
472 | if current_command_task and not current_command_task.done():
473 | logger.info("Shutting down: Cancelling active command task.")
474 | current_command_task.cancel()
475 | try:
476 | await asyncio.wait_for(current_command_task, timeout=1.0) # Wait briefly
477 | except asyncio.CancelledError: pass # Expected
478 | except asyncio.TimeoutError: logger.warning("Timeout waiting for final task cancellation.")
479 | except Exception as e: logger.error(f"Error awaiting final task cancellation: {e}")
480 |
481 |
482 | # --- Voice System Cleanup ---
483 | if voice_system and hasattr(voice_system, 'cleanup'):
484 | logger.info("Calling voice system cleanup...")
485 | try:
486 | # Check if cleanup is async or sync
487 | if asyncio.iscoroutinefunction(voice_system.cleanup):
488 | await voice_system.cleanup()
489 | else:
490 | # Run synchronous cleanup in executor if needed, or directly if safe
491 | await main_event_loop.run_in_executor(None, voice_system.cleanup)
492 | logger.info("Voice system cleaned up.")
493 | except Exception as e:
494 | logger.error(f"Error during voice system cleanup: {e}", exc_info=True)
495 |
496 |
497 | # --- Print Consumer Cleanup ---
498 | logger.info("Stopping print consumer...")
499 | try:
500 | await print_queue.put((None, None)) # Send sentinel
501 | # Wait briefly for the consumer to process the sentinel
502 | await asyncio.wait_for(print_task, timeout=2.0)
503 | logger.info("Print consumer stopped.")
504 | except asyncio.TimeoutError:
505 | logger.warning("Print consumer task did not finish promptly. Cancelling.")
506 | print_task.cancel()
507 | try: await print_task # Allow cancellation to be processed
508 | except asyncio.CancelledError: pass # Expected
509 | except Exception as e:
510 | logger.error(f"Error stopping print consumer: {e}")
511 |
512 | # Listener thread is daemon, will exit when main thread exits.
513 |
514 | # --- Main execution block ---
515 | if __name__ == "__main__":
516 | try:
517 | # Ensure terminal is reset properly on exit, especially if errors occur
518 | import os
519 | original_stty = None
520 | if sys.stdin.isatty(): # Check if running in a real terminal
521 | try:
522 | # Use os.read rather than os.popen for potentially better compatibility/security
523 | # We need a way to run 'stty -g' and read its output. subprocess is better.
524 | stty_process = subprocess.run(['stty', '-g'], capture_output=True, text=True, check=False)
525 | if stty_process.returncode == 0:
526 | original_stty = stty_process.stdout.strip()
527 | else:
528 | logger.debug(f"stty -g failed: {stty_process.stderr}")
529 | except FileNotFoundError:
530 | logger.debug("'stty' command not found, cannot save terminal settings.")
531 | except Exception as e: # Catch other potential errors
532 | logger.warning(f"Could not get terminal settings via stty: {e}")
533 | original_stty = None
534 |
535 | try:
536 | asyncio.run(async_main())
537 | finally:
538 | # Restore terminal settings if they were saved
539 | if original_stty and sys.stdin.isatty(): # Check again if it's a tty
540 | logger.debug(f"Restoring stty settings: {original_stty}")
541 | try:
542 | # Use subprocess again for consistency
543 | restore_process = subprocess.run(['stty', original_stty], check=False)
544 | if restore_process.returncode != 0:
545 | logger.warning(f"Failed to restore stty settings: {restore_process.stderr}")
546 | except FileNotFoundError:
547 | logger.warning("Cannot restore terminal settings: 'stty' not found.")
548 | except Exception as e:
549 | logger.error(f"Error restoring stty settings: {e}")
550 |
551 | except KeyboardInterrupt:
552 | # This catches Ctrl+C if it happens *before* the asyncio loop starts or *after* it exits
553 | logger.info("Application interrupted by user (Ctrl+C outside main loop).")
554 | except Exception as e:
555 | # Log critical errors that occur outside the main async loop
556 | logging.critical(f"Application failed to run: {e}", exc_info=True)
557 | # Ensure error is printed to stderr if logging isn't fully set up
558 | print(f"\n[CRITICAL ERROR] Application failed: {e}", file=sys.stderr)
559 | sys.exit(1) # Exit with error code
560 | finally:
561 | # Ensure this message always prints on exit
562 | print("\nVoice Command CLI exited.")
563 | # Ensure a clean exit code, especially after KeyboardInterrupt handled gracefully
564 | sys.exit(0)
565 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------