├── cli ├── __init__.py ├── completer.py └── output.py ├── core ├── __init__.py └── voice_system.py ├── .gitignore ├── commands ├── __init__.py ├── type_command.py ├── base.py ├── stop_command.py ├── click_command.py ├── read_command.py ├── command_processor.py ├── scrap_command.py └── computer_command.py ├── requirements.txt ├── README.md ├── audio └── vad.py ├── print_project.py ├── hotkey_listener.py ├── speech └── whisper_processor.py ├── main.py └── LICENSE /cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | """Core package for voice command system.""" 2 | 3 | from .voice_system import VoiceCommandSystem 4 | 5 | __all__ = ['VoiceCommandSystem'] 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | pyenv 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | *.so 7 | .Python 8 | build/ 9 | develop-eggs/ 10 | dist/ 11 | downloads/ 12 | eggs/ 13 | .eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | wheels/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | 24 | # Virtual Environment 25 | venv/ 26 | ENV/ 27 | env/ 28 | .env 29 | 30 | # IDE 31 | .idea/ 32 | .vscode/ 33 | *.swp 34 | *.swo 35 | *.swn 36 | .*.kate-swp 37 | 38 | # OS 39 | .DS_Store 40 | Thumbs.db 41 | -------------------------------------------------------------------------------- /commands/__init__.py: -------------------------------------------------------------------------------- 1 | # commands/__init__.py 2 | import pkgutil 3 | import inspect 4 | import importlib 5 | from .base import Command 6 | 7 | __all__ = ['Command'] 8 | 9 | # Discover and export all Command subclasses 10 | for _, module_name, _ in pkgutil.iter_modules(__path__): 11 | if module_name not in ['__init__', 'base']: 12 | # Dynamically import the module 13 | module = importlib.import_module(f".{module_name}", package=__name__) 14 | # Find all Command subclasses in the module 15 | for name, obj in inspect.getmembers(module): 16 | if inspect.isclass(obj) and issubclass(obj, Command) and obj is not Command: 17 | __all__.append(name) 18 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Core ML/AI Dependencies 2 | nemo_toolkit[asr]==2.2.1 3 | torch==2.6.0 4 | onnx==1.17.0 5 | transformers==4.48.3 6 | pytorch-lightning==2.5.1.post0 7 | cuda-python>=12.3 8 | 9 | # Audio Processing 10 | PyAudio==0.2.12 11 | webrtcvad-wheels==2.0.14 12 | soundfile==0.13.1 13 | pydub==0.25.1 14 | sox==1.5.0 15 | resampy==0.4.3 16 | # Note: libsora package removed - not available on PyPI 17 | 18 | # Screen/GUI Interaction 19 | PyAutoGUI==0.9.54 20 | pynput==1.7.7 21 | pytesseract==0.3.13 22 | pillow==11.1.0 23 | python3-Xlib==0.15 24 | 25 | # CLI and Output 26 | prompt_toolkit==3.0.51 27 | # espeak and xclip are system packages, not pip packages 28 | 29 | # Core dependencies 30 | numpy<2.0 31 | sentencepiece==0.2.0 32 | protobuf==3.20.3 33 | accelerate==1.3.0 34 | datasets==3.2.0 35 | safetensors==0.5.2 36 | sounddevice==0.5.1 37 | evdev==1.9.2 38 | pycairo==1.28.0 39 | texterrors==0.5.1 40 | kaldi-python-io==1.2.2 41 | wget==3.2 42 | 43 | # Additional dependencies that might be needed 44 | requests>=2.28.0 45 | aiohttp>=3.8.0 46 | -------------------------------------------------------------------------------- /commands/type_command.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from .base import Command 3 | 4 | class TypeCommand(Command): 5 | def __init__(self): 6 | super().__init__( 7 | name="type", 8 | aliases=["type in"], 9 | description="Type text using keyboard", 10 | execute=self._execute 11 | ) 12 | 13 | async def _execute(self, text: str) -> str: 14 | """Handle type commands by using xdotool to type text.""" 15 | try: 16 | # Capitalize first letter if original command was capitalized 17 | if text and not text[0].isupper(): 18 | text = text[0].upper() + text[1:] 19 | 20 | print(f"Typing text: '{text}'") 21 | subprocess.run(['xdotool', 'type', text], check=True) 22 | return f"Typed: '{text}'" 23 | except subprocess.CalledProcessError as e: 24 | error_msg = f"Type command failed: {str(e)}" 25 | print(error_msg) 26 | return error_msg 27 | except Exception as e: 28 | error_msg = f"Unexpected typing error: {str(e)}" 29 | print(error_msg) 30 | return error_msg 31 | -------------------------------------------------------------------------------- /commands/base.py: -------------------------------------------------------------------------------- 1 | # commands/base.py 2 | from dataclasses import dataclass, field 3 | from typing import Dict, Any, Callable, Optional, Awaitable # Added Awaitable 4 | from contextlib import asynccontextmanager 5 | 6 | @dataclass(frozen=True) # Keep frozen for simplicity unless state needs mutation often 7 | class Command: 8 | """Base class for all commands.""" 9 | name: str 10 | aliases: list[str] 11 | description: str 12 | # <<< Updated signature: Now expects an async function that might not return anything significant >>> 13 | execute: Callable[[str], Awaitable[None]] 14 | # Flag to indicate this command should only match single-word inputs (no arguments) 15 | single_word_only: bool = False 16 | # State might still be useful for complex, long-running commands, but less so now 17 | state: Dict[str, bool] = field(default_factory=lambda: {'is_running': False}) 18 | 19 | @property 20 | def is_active(self) -> bool: 21 | """Check if the command is currently running (basic state check).""" 22 | return self.state['is_running'] 23 | 24 | @asynccontextmanager 25 | async def running(self): 26 | """Context manager for command execution state (optional use).""" 27 | # This might be less necessary if commands are simpler now, but keep for potential use 28 | self.state['is_running'] = True 29 | try: 30 | yield 31 | finally: 32 | self.state['is_running'] = False 33 | -------------------------------------------------------------------------------- /commands/stop_command.py: -------------------------------------------------------------------------------- 1 | # commands/stop_command.py 2 | import subprocess 3 | import logging 4 | from .base import Command 5 | # <<< Import output functions >>> 6 | from cli.output import schedule_print # Only need print for this command 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | class StopCommand(Command): 11 | def __init__(self): 12 | super().__init__( 13 | name="stop", 14 | aliases=["cancel", "shutup", "silence"], 15 | description="Stops any active text-to-speech feedback (espeak).", 16 | # <<< Reference the updated _execute >>> 17 | execute=self._execute 18 | ) 19 | 20 | # <<< Updated signature and implementation >>> 21 | async def _execute(self, args: str) -> None: 22 | """ 23 | Executes the stop command by killing espeak processes. 24 | Prints status to CLI, does not speak. Args are ignored. 25 | """ 26 | logger.info("Executing stop command...") 27 | try: 28 | result = subprocess.run(['pkill', '-f', 'espeak'], capture_output=True, check=False) 29 | 30 | if result.returncode == 0: 31 | msg = "Stopped active speech." 32 | logger.info(msg) 33 | schedule_print("System", msg) # <<< Explicitly print 34 | elif result.returncode == 1: 35 | msg = "No active speech found to stop." 36 | logger.info(msg) 37 | schedule_print("System", msg) # <<< Explicitly print 38 | else: 39 | error_msg = f"pkill command failed with code {result.returncode}: {result.stderr.decode('utf-8', errors='ignore').strip()}" 40 | logger.error(error_msg) 41 | schedule_print("Error", f"Error trying to stop speech: {error_msg}") # <<< Explicitly print error 42 | 43 | except FileNotFoundError: 44 | error_msg = "Error: 'pkill' command not found. Cannot stop speech." 45 | logger.error(error_msg) 46 | schedule_print("Error", error_msg) # <<< Explicitly print error 47 | except Exception as e: 48 | error_msg = f"Unexpected error stopping speech: {str(e)}" 49 | logger.error(error_msg, exc_info=True) 50 | schedule_print("Error", error_msg) # <<< Explicitly print error 51 | # No return value needed now 52 | -------------------------------------------------------------------------------- /cli/completer.py: -------------------------------------------------------------------------------- 1 | # cli/completer.py 2 | from prompt_toolkit.completion import Completer, Completion 3 | from typing import Iterable, List, Set 4 | 5 | # This list will be updated by main.py 6 | ollama_models_for_completion: List[str] = ["mistral"] 7 | 8 | class CLICompleter(Completer): 9 | def __init__(self, command_processor): 10 | """ 11 | Initialize the completer. 12 | Args: 13 | command_processor: The initialized CommandProcessor instance. 14 | """ 15 | self.command_processor = command_processor 16 | # <<< FIX: Define static_keywords BEFORE calling _update_command_triggers >>> 17 | self.static_keywords = sorted(["select", "help", "exit", "quit", "refresh_models"]) 18 | self.select_options = ["model"] 19 | # Now call update, which uses self.static_keywords 20 | self._update_command_triggers() # Initial fetch 21 | 22 | def _update_command_triggers(self): 23 | """Updates the list of command names and aliases from the processor.""" 24 | self.command_triggers: Set[str] = set() 25 | if self.command_processor: 26 | self.command_triggers.update(self.command_processor.commands.keys()) 27 | for command in self.command_processor.commands.values(): 28 | self.command_triggers.update(command.aliases) 29 | # Use the now defined self.static_keywords 30 | self.all_triggers = sorted(list(self.command_triggers.union(self.static_keywords))) 31 | # print(f"Completer updated triggers: {self.all_triggers}") # Debug 32 | 33 | # --- get_completions method remains the same --- 34 | def get_completions(self, document, complete_event): 35 | # (Previous implementation) 36 | text = document.text_before_cursor.lstrip() 37 | words = text.split() 38 | word_before_cursor = document.get_word_before_cursor(WORD=True) 39 | 40 | try: 41 | if not text or ' ' not in text: # Top Level Completion 42 | for trigger in self.all_triggers: 43 | if trigger.startswith(word_before_cursor): 44 | yield Completion(trigger, start_position=-len(word_before_cursor)) 45 | return 46 | if len(words) >= 1: # Contextual Completion 47 | first_word = words[0] 48 | if first_word == "select": # 'select' command completion 49 | if len(words) == 1 and text.endswith(' '): 50 | for opt in self.select_options: yield Completion(opt, start_position=0) 51 | elif len(words) == 2 and not text.endswith(' '): # Typing 'model' 52 | if self.select_options[0].startswith(word_before_cursor): yield Completion(self.select_options[0], start_position=-len(word_before_cursor)) 53 | elif len(words) == 2 and words[1] == "model" and text.endswith(' '): # After 'select model ' 54 | for model in ollama_models_for_completion: yield Completion(model, start_position=0) 55 | elif len(words) >= 3 and words[1] == "model": # Typing model name 56 | for model in ollama_models_for_completion: 57 | if model.startswith(word_before_cursor): yield Completion(model, start_position=-len(word_before_cursor)) 58 | return 59 | except Exception: pass # Avoid completer errors crashing app 60 | -------------------------------------------------------------------------------- /commands/click_command.py: -------------------------------------------------------------------------------- 1 | import pyautogui 2 | import pytesseract 3 | from .base import Command 4 | 5 | class ClickCommand(Command): 6 | def __init__(self): 7 | super().__init__( 8 | name="click", 9 | aliases=[], 10 | description="Click text or buttons on screen", 11 | execute=self._execute 12 | ) 13 | 14 | async def _execute(self, text: str) -> str: 15 | """Handle click commands by finding and clicking matching text on screen.""" 16 | try: 17 | print(f"Searching for text: '{text}'") 18 | screenshot = pyautogui.screenshot() 19 | 20 | # Configure Tesseract for better accuracy 21 | custom_config = '--psm 11 --oem 3' 22 | ocr_data = pytesseract.image_to_data( 23 | screenshot, 24 | output_type=pytesseract.Output.DICT, 25 | config=custom_config 26 | ) 27 | 28 | # Debug OCR results 29 | print("\nOCR Results:") 30 | found_words = [] 31 | for i, word in enumerate(ocr_data['text']): 32 | if word.strip(): 33 | conf = float(ocr_data['conf'][i]) 34 | found_words.append(f"'{word}' (confidence: {conf:.1f}%)") 35 | print("Detected words:", ", ".join(found_words[:10]) + "..." if len(found_words) > 10 else ", ".join(found_words)) 36 | 37 | best_match = None 38 | highest_confidence = 0 39 | search_text = text.lower() 40 | 41 | for i, word in enumerate(ocr_data['text']): 42 | if not word.strip(): 43 | continue 44 | 45 | word_lower = word.strip().lower() 46 | confidence = float(ocr_data['conf'][i]) 47 | 48 | # Various matching strategies 49 | matched = False 50 | match_type = None 51 | 52 | if search_text == word_lower: 53 | matched = True 54 | match_type = "exact" 55 | confidence *= 1.2 56 | elif search_text in word_lower: 57 | matched = True 58 | match_type = "contains" 59 | elif word_lower in search_text: 60 | matched = True 61 | match_type = "partial" 62 | confidence *= 0.8 63 | 64 | if matched and confidence > highest_confidence: 65 | highest_confidence = confidence 66 | x = ocr_data['left'][i] + ocr_data['width'][i] // 2 67 | y = ocr_data['top'][i] + ocr_data['height'][i] // 2 68 | best_match = (x, y, word, match_type, confidence) 69 | 70 | if best_match: 71 | x, y, matched_word, match_type, conf = best_match 72 | print(f"\nBest match: '{matched_word}' ({match_type} match, confidence: {conf:.1f}%)") 73 | print(f"Clicking at position: ({x}, {y})") 74 | 75 | pyautogui.moveTo(x, y, duration=0.2) 76 | pyautogui.click() 77 | 78 | return f"Clicked '{matched_word}' at ({x}, {y})" 79 | 80 | print("\nNo matching text found on screen") 81 | return "Text not found on screen" 82 | 83 | except Exception as e: 84 | error_msg = f"Click command failed: {str(e)}" 85 | print(error_msg) 86 | return error_msg 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Voice Command System 2 | 3 | Control your computer with a command-line voice interface. Uses NVIDIA's Parakeet-TDT model for speech recognition and supports clicking UI elements, typing text, reading text aloud, interacting with a local LLM, screen capture with OCR, and more. 4 | 5 | ## Features 6 | 7 | * Speech recognition using **NVIDIA Parakeet-TDT 0.6B V2 via NeMo toolkit**, providing accurate transcription with punctuation and capitalization. 8 | * Click commands: Find and click text/buttons on screen using OCR. 9 | * Type commands: Type text using keyboard emulation. 10 | * Read commands: Read highlighted text aloud using text-to-speech. 11 | * Computer commands: Interact with your system (run shell commands, manage apps/windows, query about highlighted text) using a local LLM (Ollama). 12 | * scrap command: Select a screen area, perform OCR, and copy the extracted text. 13 | * Stop command: Immediately halts any active text-to-speech playback. 14 | * Rolling buffer: Captures audio just before hotkey activation to avoid missed words. 15 | * Hotkey controls: Use keyboard shortcuts to trigger recording and interrupt actions. 16 | 17 | ## Installation on openSUSE Tumbleweed 18 | 19 | Follow these steps precisely to set up the project environment. 20 | 21 | ### Step 1: Install System Dependencies 22 | 23 | First, install `pyenv` for managing Python versions. Follow the official `pyenv` installation instructions. After that, install the necessary system packages for both building Python and running the application using `zypper`: 24 | 25 | ```bash 26 | sudo zypper install git-core gcc automake make zlib-devel libbz2-devel libopenssl-devel readline-devel sqlite3-devel xz-devel libffi-devel tk-devel xdotool espeak xclip tesseract-ocr pkill wmctrl ffmpeg gnome-screenshot 27 | ``` 28 | 29 | ### Step 2: Install Correct Python Version 30 | 31 | The heavy dependencies like nemo_toolkit require a specific Python version for which pre-compiled packages (wheels) are available. We will use pyenv to install Python 3.11. 32 | 33 | ```bash 34 | # Install Python 3.11.10 (or latest 3.11.x) 35 | pyenv install 3.11.10 36 | 37 | # Create a dedicated virtual environment for the project 38 | pyenv virtualenv 3.11.10 voice-command-311 39 | ``` 40 | 41 | ### Step 3: Set Up Project and Install Python Packages 42 | 43 | Now, clone the repository and use the pyenv virtual environment you just created. 44 | 45 | ```bash 46 | # Clone the repository (if you haven't already) 47 | git clone https://github.com/ruapotato/Voice-Command 48 | cd Voice-Command 49 | 50 | # Set the local python version for this directory 51 | pyenv local voice-command-311 52 | 53 | # Upgrade pip and install the required packages 54 | pip install --upgrade pip 55 | pip install -r requirements.txt 56 | ``` 57 | 58 | ### Step 4: Local LLM Setup 59 | 60 | This project uses Ollama for the computer command. 61 | 62 | 1. Install Ollama from [ollama.com](https://ollama.com). 63 | 2. Pull your desired model. For example: 64 | 65 | ```bash 66 | ollama pull mistral 67 | ``` 68 | 69 | ## Running the Application 70 | 71 | 1. **Ensure Ollama is running**: Before starting the app, make sure the Ollama service is active in the background if you intend to use the computer command. 72 | 73 | ```bash 74 | ollama serve 75 | ``` 76 | 77 | 2. **Navigate and Run**: Open a new terminal and go to the project directory. The pyenv environment should activate automatically. Then, run the main script. 78 | 79 | ```bash 80 | cd /path/to/Voice-Command 81 | python main.py 82 | ``` 83 | 84 | *Note: The first time you run it, NeMo will download the Parakeet model, which may take some time.* 85 | 86 | ## Keyboard Controls 87 | 88 | * **Record Voice**: Press and hold `Ctrl+Shift` 89 | * **Interrupt/Stop**: Press `Ctrl+C` 90 | * **Exit**: Type `exit` or `quit` at the prompt, or press `Ctrl+D` 91 | 92 | ## License 93 | 94 | GPL3 by David Hamner 95 | -------------------------------------------------------------------------------- /cli/output.py: -------------------------------------------------------------------------------- 1 | # cli/output.py 2 | import asyncio 3 | from prompt_toolkit import print_formatted_text, HTML 4 | import subprocess 5 | import logging 6 | import re # Keep for potential future use? Or remove. 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | print_queue = asyncio.Queue() 11 | ESPEAK_CONFIG = [] # Voice config 12 | 13 | # --- Speak Utility (using async subprocess for proper interruption) --- 14 | 15 | async def speak(text: str): 16 | """ 17 | Asynchronously speaks text using espeak with proper cancellation support. 18 | The caller is responsible for deciding IF text should be spoken. 19 | """ 20 | if not text: # Only proceed if text is not empty 21 | return 22 | 23 | logger.debug(f"Attempting to speak: {text[:50]}...") 24 | # Basic cleaning for shell safety 25 | text_to_speak = text.replace('`', '').replace('"', "'").replace(';', '.') 26 | if not text_to_speak: 27 | return # Check again after cleaning 28 | 29 | try: 30 | # Kill any existing espeak processes first 31 | subprocess.run(['pkill', '-f', 'espeak'], check=False, capture_output=True) 32 | 33 | # Use async subprocess for proper cancellation support 34 | process = await asyncio.create_subprocess_exec( 35 | 'espeak', 36 | *ESPEAK_CONFIG, 37 | text_to_speak, 38 | stdout=asyncio.subprocess.DEVNULL, 39 | stderr=asyncio.subprocess.DEVNULL 40 | ) 41 | 42 | # Wait for completion - this will properly respond to task cancellation 43 | try: 44 | await asyncio.wait_for(process.wait(), timeout=20) 45 | except asyncio.TimeoutError: 46 | logger.warning(f"espeak command timed out for: {text_to_speak[:50]}...") 47 | # Kill the process if it times out 48 | process.kill() 49 | await process.wait() 50 | 51 | except asyncio.CancelledError: 52 | logger.debug("Speech was cancelled by user interrupt.") 53 | # Kill any espeak processes 54 | subprocess.run(['pkill', '-f', 'espeak'], check=False) 55 | raise # Re-raise to propagate cancellation 56 | except FileNotFoundError: 57 | logger.error("espeak command not found. Cannot speak.") 58 | except Exception as e: 59 | logger.error(f"Speech failed for '{text_to_speak[:50]}...': {e}") 60 | 61 | # --- Print Queue Logic (remains same) --- 62 | async def safe_print(formatted_message: str): 63 | """Asynchronously prints pre-formatted HTML messages without disrupting the prompt.""" 64 | print_formatted_text(HTML(formatted_message)) 65 | 66 | async def print_consumer(): 67 | """Consumes messages from the print_queue and prints them safely.""" 68 | while True: 69 | message_type, message = await print_queue.get() 70 | if message_type is None: print_queue.task_done(); break # Sentinel 71 | 72 | prefix = f"[{message_type}]" 73 | # Apply colors based on type 74 | if message_type == "Voice": formatted_message = f"{prefix} {message}" 75 | elif message_type == "System": formatted_message = f"{prefix} {message}" 76 | elif message_type == "LLM": formatted_message = f"{prefix} {message}" 77 | elif message_type == "Error": formatted_message = f"{prefix} {message}" 78 | elif message_type == "Help": formatted_message = f"{prefix}\n{message}" 79 | elif message_type == "Typed": formatted_message = f"{prefix} {message}" 80 | else: formatted_message = f"{prefix} {message}" # Default 81 | 82 | await safe_print(formatted_message) 83 | print_queue.task_done() 84 | 85 | def schedule_print(message_type: str, message: str): 86 | """Puts a message onto the print queue from any thread.""" 87 | # Ensure message is a string 88 | message_str = str(message) if message is not None else "" 89 | try: 90 | loop = asyncio.get_running_loop() 91 | loop.call_soon_threadsafe(print_queue.put_nowait, (message_type, message_str)) 92 | except RuntimeError: 93 | # Fallback if called before loop is running or from non-async context without loop access 94 | print(f"[Fallback Print {message_type}] {message_str}") 95 | -------------------------------------------------------------------------------- /audio/vad.py: -------------------------------------------------------------------------------- 1 | """Whisper-based speech recognition.""" 2 | import torch 3 | from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline 4 | import logging 5 | import warnings 6 | import numpy as np 7 | 8 | logging.getLogger("transformers").setLevel(logging.ERROR) 9 | warnings.filterwarnings("ignore", category=FutureWarning) 10 | 11 | class WhisperProcessor: 12 | def __init__(self): 13 | print("Initializing Whisper processor...") 14 | self.setup_model() 15 | 16 | def setup_model(self): 17 | """Initialize the Whisper model and pipeline.""" 18 | try: 19 | # Setup device 20 | self.device = "cuda:0" if torch.cuda.is_available() else "cpu" 21 | self.torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 22 | print(f"Using device: {self.device}") 23 | 24 | # Load model 25 | model_id = "openai/whisper-large-v3" # Changed from turbo version 26 | model = AutoModelForSpeechSeq2Seq.from_pretrained( 27 | model_id, 28 | torch_dtype=self.torch_dtype, 29 | low_cpu_mem_usage=True, 30 | use_safetensors=True 31 | ) 32 | model.to(self.device) 33 | 34 | # Load processor 35 | processor = AutoProcessor.from_pretrained(model_id) 36 | 37 | # Setup pipeline with adjusted parameters 38 | self.pipe = pipeline( 39 | "automatic-speech-recognition", 40 | model=model, 41 | tokenizer=processor.tokenizer, 42 | feature_extractor=processor.feature_extractor, 43 | torch_dtype=self.torch_dtype, 44 | device=self.device, 45 | model_kwargs={ 46 | "language": "en", 47 | "task": "transcribe", 48 | "use_auth_token": None, 49 | "return_timestamps": False 50 | }, 51 | chunk_length_s=30, 52 | stride_length_s=5, 53 | batch_size=1, 54 | ignore_warning=True 55 | ) 56 | print("Whisper model initialized") 57 | 58 | except Exception as e: 59 | print(f"Error initializing Whisper: {e}") 60 | raise 61 | 62 | def _preprocess_audio(self, audio_data): 63 | """Preprocess audio data for Whisper.""" 64 | try: 65 | # Debug original audio 66 | print(f"Input audio - Shape: {audio_data.shape}, Type: {audio_data.dtype}, Range: [{audio_data.min()}, {audio_data.max()}]") 67 | 68 | # Ensure data is in float32 69 | audio_float = audio_data.astype(np.float32) 70 | 71 | # Apply pre-emphasis filter 72 | pre_emphasis = 0.97 73 | emphasized_audio = np.append( 74 | audio_float[0], 75 | audio_float[1:] - pre_emphasis * audio_float[:-1] 76 | ) 77 | 78 | # Normalize using RMS normalization 79 | rms = np.sqrt(np.mean(np.square(emphasized_audio))) 80 | if rms > 0: 81 | normalized_audio = emphasized_audio / rms 82 | else: 83 | normalized_audio = emphasized_audio 84 | 85 | # Clip to prevent extreme values 86 | normalized_audio = np.clip(normalized_audio, -1.0, 1.0) 87 | 88 | # Debug processed audio 89 | print(f"Processed audio - Shape: {normalized_audio.shape}, Range: [{normalized_audio.min():.3f}, {normalized_audio.max():.3f}]") 90 | 91 | return normalized_audio 92 | 93 | except Exception as e: 94 | print(f"Error preprocessing audio: {e}") 95 | return None 96 | 97 | async def transcribe(self, audio_data): 98 | """Process audio data and return transcribed text.""" 99 | try: 100 | if audio_data is None: 101 | print("Received empty audio data") 102 | return None 103 | 104 | # Preprocess audio 105 | audio_processed = self._preprocess_audio(audio_data) 106 | if audio_processed is None: 107 | return None 108 | 109 | # Process with adjusted parameters 110 | inputs = { 111 | "raw": audio_processed, 112 | "sampling_rate": 16000 113 | } 114 | 115 | print("Processing audio segment...") 116 | result = self.pipe( 117 | inputs, 118 | batch_size=1, 119 | generate_kwargs={ 120 | "temperature": 0, # Deterministic decoding 121 | "compression_ratio_threshold": 2.4, 122 | "logprob_threshold": -1.0, 123 | "no_speech_threshold": 0.6 124 | } 125 | ) 126 | 127 | transcribed_text = result["text"].strip() 128 | print(f"Transcribed: {transcribed_text}") 129 | return transcribed_text 130 | 131 | except Exception as e: 132 | print(f"Error processing audio: {e}") 133 | return None 134 | -------------------------------------------------------------------------------- /commands/read_command.py: -------------------------------------------------------------------------------- 1 | # commands/read_command.py 2 | import subprocess 3 | import asyncio 4 | import logging 5 | from typing import Optional 6 | from .base import Command 7 | import re 8 | 9 | # Add logger instance 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class ReadCommand(Command): 14 | def __init__(self): 15 | super().__init__( 16 | name="read", 17 | aliases=["reed", "red", "three"], # Consider removing "three" if it's a misrecognition 18 | description="Read highlighted text aloud", 19 | execute=self._execute, 20 | single_word_only=True # Only match single-word "read", not "read the readme..." 21 | ) 22 | self.espeak_config = [] 23 | # Check if espeak exists on init 24 | try: 25 | subprocess.run(['which', 'espeak'], check=True, capture_output=True) 26 | except (subprocess.CalledProcessError, FileNotFoundError): 27 | logger.error("'espeak' command not found. Read command will not work.") 28 | 29 | async def _execute(self, text: str) -> Optional[str]: 30 | """Handle read command by reading highlighted text aloud.""" 31 | try: 32 | # Get highlighted text using xclip 33 | highlighted_process = subprocess.run( 34 | ['xclip', '-o', '-selection', 'primary'], 35 | capture_output=True, check=False, timeout=10 36 | ) 37 | if highlighted_process.returncode != 0: 38 | stderr_output = highlighted_process.stderr.decode('utf-8', errors='ignore') 39 | error_msg = "Failed to get highlighted text." 40 | if "Error: Can't open display" in stderr_output: 41 | error_msg += " (Cannot open display)" 42 | elif "Error: target STRING not available" in stderr_output: 43 | error_msg = "No text is highlighted (or not available as STRING)." 44 | else: 45 | error_msg += f" (xclip error: {stderr_output.strip()})" 46 | logger.warning(error_msg) 47 | return error_msg 48 | 49 | highlighted = highlighted_process.stdout.decode('utf-8', errors='ignore').strip() 50 | 51 | if not highlighted: 52 | message = "No text is highlighted." 53 | logger.info(message) 54 | await self._speak(message) 55 | return message 56 | 57 | # Sanitize the text to remove emojis and other non-ASCII characters 58 | # that espeak might not handle well. 59 | sanitized_text = highlighted.encode('ascii', 'ignore').decode('ascii') 60 | 61 | # Also, collapse whitespace to prevent long pauses 62 | sanitized_text = re.sub(r'\s+', ' ', sanitized_text).strip() 63 | 64 | if not sanitized_text: 65 | message = "No speakable text found after removing special characters." 66 | logger.info(message) 67 | # We won't speak this message, just return it to the CLI 68 | return message 69 | 70 | logger.info(f"Reading sanitized text (length: {len(sanitized_text)})...") 71 | await self._speak(sanitized_text) 72 | 73 | return f"Finished reading highlighted text ({len(sanitized_text)} chars)." 74 | 75 | except FileNotFoundError: 76 | error_msg = "Error: 'xclip' command not found. Cannot read highlighted text." 77 | logger.error(error_msg) 78 | return error_msg 79 | except subprocess.TimeoutExpired: 80 | error_msg = "Error: 'xclip' command timed out." 81 | logger.error(error_msg) 82 | return error_msg 83 | except Exception as e: 84 | error_msg = f"Read command failed: {str(e)}" 85 | logger.error(error_msg, exc_info=True) 86 | return error_msg 87 | 88 | async def _speak(self, text: str) -> None: 89 | """Speak text using espeak with the command's config (async and interruptible).""" 90 | if not text: 91 | return 92 | try: 93 | # Kill any existing espeak processes first 94 | subprocess.run(['pkill', '-f', 'espeak'], check=False) 95 | 96 | # Use async subprocess for proper cancellation support 97 | command = ['espeak'] + self.espeak_config 98 | logger.debug(f"Executing internal speak for text length: {len(text)}") 99 | 100 | # Create async subprocess 101 | process = await asyncio.create_subprocess_exec( 102 | *command, 103 | stdin=asyncio.subprocess.PIPE, 104 | stdout=asyncio.subprocess.DEVNULL, 105 | stderr=asyncio.subprocess.DEVNULL 106 | ) 107 | 108 | # Send text to espeak and wait for completion 109 | # This will properly respond to task cancellation 110 | await process.communicate(input=text.encode('utf-8')) 111 | 112 | except asyncio.CancelledError: 113 | logger.debug("Speech was cancelled by user interrupt.") 114 | # Kill the espeak process if it's still running 115 | subprocess.run(['pkill', '-f', 'espeak'], check=False) 116 | raise # Re-raise to propagate cancellation 117 | except FileNotFoundError: 118 | logger.error("Internal speak failed: 'espeak' command not found.") 119 | except Exception as e: 120 | logger.error(f"Unexpected internal speech error: {str(e)}") 121 | -------------------------------------------------------------------------------- /print_project.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Print all project files in a nicely formatted way and copy to clipboard""" 3 | from pathlib import Path 4 | import sys 5 | from typing import List, Set 6 | import io 7 | import subprocess 8 | from shutil import which 9 | 10 | HEADER_TEMPLATE = """ 11 | {title} 12 | {underline} 13 | """ 14 | 15 | FILE_TEMPLATE = """ 16 | File: {filename} 17 | {separator} 18 | {content} 19 | """ 20 | 21 | # Files to ignore 22 | IGNORE_PATTERNS = { 23 | '__pycache__', 24 | '.git', 25 | '.pyc', 26 | '.env', 27 | 'pyenv', 28 | '.vscode', 29 | '.idea' 30 | } 31 | 32 | def should_process(path: Path) -> bool: 33 | """Check if the path should be processed.""" 34 | return not any(ignore in str(path) for ignore in IGNORE_PATTERNS) 35 | 36 | def print_header(title: str, char: str = "=") -> str: 37 | """Print a formatted header and return it as a string.""" 38 | header = HEADER_TEMPLATE.format( 39 | title=title, 40 | underline=char * len(title) 41 | ) 42 | print(header) 43 | return header 44 | 45 | def print_file_content(file_path: Path) -> str: 46 | """Print the content of a file with nice formatting and return it as a string.""" 47 | try: 48 | content = file_path.read_text() 49 | formatted = FILE_TEMPLATE.format( 50 | filename=file_path, 51 | separator="-" * 80, 52 | content=content 53 | ) 54 | print(formatted) 55 | return formatted 56 | except Exception as e: 57 | error_msg = f"Error reading {file_path}: {e}" 58 | print(error_msg, file=sys.stderr) 59 | return error_msg 60 | 61 | def find_all_files(directory: Path) -> List[Path]: 62 | """Recursively find all files in the directory.""" 63 | files = [] 64 | try: 65 | for item in directory.iterdir(): 66 | if not should_process(item): 67 | continue 68 | 69 | if item.is_file(): 70 | files.append(item) 71 | elif item.is_dir(): 72 | files.extend(find_all_files(item)) 73 | except Exception as e: 74 | print(f"Error accessing {directory}: {e}", file=sys.stderr) 75 | 76 | return sorted(files) 77 | 78 | def print_directory_structure(directory: Path, prefix: str = "") -> str: 79 | """Print the directory structure in a tree-like format and return it as a string.""" 80 | output = [] 81 | try: 82 | items = sorted(directory.iterdir()) 83 | for i, item in enumerate(items): 84 | if not should_process(item): 85 | continue 86 | 87 | is_last = i == len(items) - 1 88 | current_prefix = "└── " if is_last else "├── " 89 | next_prefix = " " if is_last else "│ " 90 | 91 | line = f"{prefix}{current_prefix}{item.name}" 92 | print(line) 93 | output.append(line) 94 | 95 | if item.is_dir(): 96 | dir_output = print_directory_structure(item, prefix + next_prefix) 97 | output.append(dir_output) 98 | except Exception as e: 99 | error_msg = f"Error accessing {directory}: {e}" 100 | print(error_msg, file=sys.stderr) 101 | output.append(error_msg) 102 | 103 | return "\n".join(output) 104 | 105 | def copy_to_clipboard(text: str) -> bool: 106 | """Copy text to clipboard based on the platform.""" 107 | try: 108 | # macOS 109 | if which('pbcopy'): 110 | subprocess.run(['pbcopy'], input=text.encode('utf-8'), check=True) 111 | return True 112 | # Linux with xclip 113 | elif which('xclip'): 114 | subprocess.run(['xclip', '-selection', 'clipboard'], input=text.encode('utf-8'), check=True) 115 | return True 116 | # Linux with wl-copy (Wayland) 117 | elif which('wl-copy'): 118 | subprocess.run(['wl-copy'], input=text.encode('utf-8'), check=True) 119 | return True 120 | # Windows 121 | elif sys.platform == 'win32': 122 | import pyperclip 123 | pyperclip.copy(text) 124 | return True 125 | else: 126 | print("Could not find a suitable clipboard tool. Consider installing pyperclip.", file=sys.stderr) 127 | return False 128 | except Exception as e: 129 | print(f"Failed to copy to clipboard: {e}", file=sys.stderr) 130 | return False 131 | 132 | def main(): 133 | """Main function to print project files and copy to clipboard.""" 134 | # Use StringIO to capture all output 135 | output_buffer = io.StringIO() 136 | original_stdout = sys.stdout 137 | sys.stdout = output_buffer 138 | 139 | project_root = Path(__file__).parent 140 | 141 | print_header("Project Structure") 142 | print(f"Root: {project_root}") 143 | print_directory_structure(project_root) 144 | print() 145 | 146 | print_header("Project Files") 147 | for file_path in find_all_files(project_root): 148 | if file_path.suffix in ['.py', '.txt', '.md', '.json', '.yaml', '.yml']: 149 | print_file_content(file_path) 150 | 151 | # Restore stdout and get the captured output 152 | sys.stdout = original_stdout 153 | full_output = output_buffer.getvalue() 154 | 155 | # Print the output to the terminal 156 | print(full_output) 157 | 158 | # Copy to clipboard 159 | if copy_to_clipboard(full_output): 160 | print("\nProject structure and files have been copied to clipboard!") 161 | else: 162 | print("\nFailed to copy to clipboard. You may need to install a clipboard package.") 163 | print("For Python, you can use: pip install pyperclip") 164 | 165 | if __name__ == "__main__": 166 | try: 167 | main() 168 | except KeyboardInterrupt: 169 | print("\nPrinting interrupted.", file=sys.stderr) 170 | sys.exit(1) 171 | except Exception as e: 172 | print(f"Error: {e}", file=sys.stderr) 173 | sys.exit(1) 174 | -------------------------------------------------------------------------------- /commands/command_processor.py: -------------------------------------------------------------------------------- 1 | # commands/command_processor.py 2 | import importlib 3 | import inspect 4 | import logging 5 | import pkgutil 6 | from pathlib import Path 7 | from typing import AsyncGenerator, Optional, Tuple, Dict, List 8 | from difflib import SequenceMatcher 9 | 10 | from .base import Command 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | class CommandProcessor: 15 | def __init__(self): 16 | """Initialize the command processor by dynamically discovering commands.""" 17 | self.commands: Dict[str, Command] = {} 18 | self.all_triggers_cache: List[str] = [] 19 | self._discover_commands() 20 | logger.info(f"Command processor dynamically loaded commands: {list(self.commands.keys())}") 21 | 22 | def _discover_commands(self): 23 | """Dynamically finds and registers command classes.""" 24 | commands_package_path = Path(__file__).parent 25 | logger.debug(f"Discovering commands in: {commands_package_path}") 26 | for (_, module_name, _) in pkgutil.iter_modules([str(commands_package_path)]): 27 | if module_name in ['__init__', 'base']: continue 28 | full_module_name = f"commands.{module_name}" 29 | logger.debug(f"Attempting to import module: {full_module_name}") 30 | try: 31 | module = importlib.import_module(full_module_name) 32 | for _, obj in inspect.getmembers(module, inspect.isclass): 33 | if obj.__module__ == full_module_name and issubclass(obj, Command) and obj is not Command: 34 | try: 35 | command_instance = obj() 36 | if command_instance.name in self.commands: logger.warning(f"Duplicate command name '{command_instance.name}'. Overwriting.") 37 | self.commands[command_instance.name] = command_instance 38 | except Exception as inst_e: logger.error(f"Failed to instantiate command {obj.__name__} from {full_module_name}: {inst_e}", exc_info=True) 39 | except Exception as import_e: logger.error(f"Failed to import/process module {full_module_name}: {import_e}", exc_info=True) 40 | self._cache_all_triggers() 41 | 42 | def _cache_all_triggers(self): 43 | """Builds and caches a sorted list of all command triggers.""" 44 | triggers = set(self.commands.keys()) 45 | for cmd in self.commands.values(): 46 | triggers.update(cmd.aliases) 47 | self.all_triggers_cache = sorted(list(triggers), key=len, reverse=True) 48 | 49 | def get_command_details(self) -> List[Tuple[str, List[str], str]]: 50 | """Returns details for all registered commands for the help text.""" 51 | details = [(cmd.name, cmd.aliases, getattr(cmd, 'description', 'No description.')) for cmd in sorted(self.commands.values(), key=lambda c: c.name)] 52 | return details 53 | 54 | def _get_command_name_for_trigger(self, trigger: str) -> Optional[str]: 55 | """Helper to find the main command name from a trigger (which could be an alias).""" 56 | if trigger in self.commands: 57 | return trigger 58 | for name, command_obj in self.commands.items(): 59 | if trigger in command_obj.aliases: 60 | return name 61 | return None 62 | 63 | def parse_command(self, text: str) -> Tuple[Optional[str], Optional[str]]: 64 | """ 65 | Parses the command from the input text with robust prefix matching 66 | and fuzzy matching for single-word commands. 67 | """ 68 | text_orig = text.strip() 69 | text_lower = text_orig.lower() 70 | 71 | # First, try robust prefix matching for all triggers. 72 | # This is better for commands that can take arguments. 73 | for trigger in self.all_triggers_cache: 74 | if text_lower.startswith(trigger): 75 | # If it's an exact match 76 | if len(text_lower) == len(trigger): 77 | command_name = self._get_command_name_for_trigger(trigger) 78 | return command_name, "" 79 | 80 | # If it's a prefix match (command with arguments) 81 | # Skip this for single-word-only commands 82 | char_after_trigger = text_lower[len(trigger)] 83 | if char_after_trigger in ' ,.!?': 84 | command_name = self._get_command_name_for_trigger(trigger) 85 | if command_name: 86 | # Check if this command is single-word-only 87 | command = self.commands.get(command_name) 88 | if command and getattr(command, 'single_word_only', False): 89 | # Skip prefix matching for single-word-only commands 90 | continue 91 | args = text_orig[len(trigger):].lstrip(' ,.!?') 92 | return command_name, args 93 | 94 | # If no prefix match, try fuzzy matching for single-word commands. 95 | # This helps with misspellings from voice-to-text. 96 | words = text_lower.split() 97 | if len(words) == 1: 98 | text_norm = words[0].rstrip('.,!?') 99 | # Find the best match among all triggers 100 | best_match_trigger = None 101 | highest_similarity = 0.85 # Minimum similarity threshold 102 | 103 | for trigger in self.all_triggers_cache: 104 | # Only fuzzy match against triggers that don't expect arguments usually 105 | # This is a heuristic: match against single-word triggers 106 | if " " not in trigger: 107 | similarity = SequenceMatcher(None, text_norm, trigger).ratio() 108 | if similarity > highest_similarity: 109 | highest_similarity = similarity 110 | best_match_trigger = trigger 111 | 112 | if best_match_trigger: 113 | command_name = self._get_command_name_for_trigger(best_match_trigger) 114 | logger.debug(f"Fuzzy matched '{text_norm}' to '{best_match_trigger}' with similarity {highest_similarity:.2f}") 115 | return command_name, "" 116 | 117 | return None, None 118 | 119 | async def process_command(self, text: str) -> AsyncGenerator[str, None]: 120 | """Process a command string and yield status messages.""" 121 | command_name, args = self.parse_command(text) 122 | if not command_name: 123 | logger.warning(f"process_command called with unparseable text: {text}") 124 | yield f"Unknown command or query format: {text}" 125 | return 126 | 127 | command = self.commands.get(command_name) 128 | if not command: 129 | yield f"Internal error: Command '{command_name}' parsed but not found." 130 | return 131 | 132 | try: 133 | execute_method = command.execute 134 | if inspect.isasyncgenfunction(execute_method): 135 | async for result_part in execute_method(args): 136 | yield result_part 137 | elif inspect.iscoroutinefunction(execute_method): 138 | result_message = await execute_method(args) 139 | if result_message: 140 | yield result_message 141 | else: 142 | logger.warning(f"Command '{command_name}' execute method is synchronous.") 143 | result_message = execute_method(args) 144 | if result_message: 145 | yield result_message 146 | except Exception as e: 147 | error_msg = f"Command '{command_name}' execution failed: {str(e)}" 148 | logger.error(error_msg, exc_info=True) 149 | yield error_msg 150 | -------------------------------------------------------------------------------- /hotkey_listener.py: -------------------------------------------------------------------------------- 1 | # hotkey_listener.py 2 | import threading 3 | import asyncio 4 | import logging 5 | import subprocess 6 | import time 7 | from pynput import keyboard 8 | from typing import Callable, Optional, Any 9 | 10 | # --- Globals for Hotkey State --- 11 | ctrl_pressed = False 12 | shift_pressed = False 13 | recording_key_pressed = False 14 | ctrl_c_combo_pressed = False 15 | 16 | logger = logging.getLogger(__name__) 17 | logging.getLogger("pynput").setLevel(logging.WARNING) 18 | 19 | # --- References (set during initialization) --- 20 | voice_system_ref = None 21 | print_scheduler_ref = None 22 | main_loop_ref = None 23 | current_task_accessor = None 24 | 25 | def _interrupt_current_action(): 26 | """Cancels the current command task and stops speech.""" 27 | if not current_task_accessor or not print_scheduler_ref or not main_loop_ref: 28 | logger.warning("Cannot interrupt: Missing references.") 29 | return 30 | 31 | task_cancelled = False 32 | try: 33 | current_task = current_task_accessor() 34 | if current_task and not current_task.done(): 35 | logger.debug("Interrupt requested: Cancelling current command task.") 36 | current_task.cancel() 37 | task_cancelled = True 38 | else: 39 | logger.debug("Interrupt requested: No active/cancellable command task found.") 40 | except Exception as e: 41 | logger.error(f"Error accessing/cancelling current task: {e}") 42 | 43 | speech_stopped = False 44 | try: 45 | logger.debug("Interrupt requested: Stopping any active espeak process via pkill.") 46 | result = subprocess.run(['pkill', '-f', 'espeak'], capture_output=True, check=False, timeout=1) 47 | if result.returncode == 0: 48 | logger.debug("pkill stopped espeak process(es).") 49 | speech_stopped = True 50 | elif result.returncode == 1: 51 | logger.debug("pkill found no espeak process to stop.") 52 | else: 53 | stderr_msg = result.stderr.decode(errors='ignore').strip() 54 | logger.warning(f"pkill command failed for espeak (code {result.returncode}): {stderr_msg}") 55 | except FileNotFoundError: 56 | logger.error("Cannot stop speech: 'pkill' command not found.") 57 | except subprocess.TimeoutExpired: 58 | logger.warning("pkill command timed out while trying to stop espeak.") 59 | except Exception as e: 60 | logger.error(f"Error running pkill for espeak: {e}") 61 | 62 | if task_cancelled or speech_stopped: 63 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "System", "Interrupted by user (Ctrl+C).") 64 | 65 | def on_press(key): 66 | """Handles key press events for hotkeys.""" 67 | global ctrl_pressed, shift_pressed, recording_key_pressed, ctrl_c_combo_pressed 68 | if not all([voice_system_ref, print_scheduler_ref, main_loop_ref, current_task_accessor]): 69 | return 70 | 71 | try: 72 | is_ctrl = key in (keyboard.Key.ctrl_l, keyboard.Key.ctrl_r) 73 | is_shift = key in (keyboard.Key.shift, keyboard.Key.shift_r) 74 | is_c_key = hasattr(key, 'char') and key.char == 'c' 75 | 76 | if is_ctrl: 77 | ctrl_pressed = True 78 | elif is_shift: 79 | shift_pressed = True 80 | 81 | # --- Recording Hotkey Logic (Ctrl + Shift) --- 82 | if ctrl_pressed and shift_pressed and not recording_key_pressed: 83 | logger.debug("Ctrl+Shift pressed, scheduling recording start.") 84 | recording_key_pressed = True 85 | 86 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "System", "Recording started...") 87 | main_loop_ref.call_soon_threadsafe( 88 | lambda: main_loop_ref.run_in_executor(None, voice_system_ref.start_quick_record) 89 | ) 90 | return 91 | 92 | # --- Interruption Hotkey Logic (Ctrl+C) --- 93 | if ctrl_pressed and is_c_key and not ctrl_c_combo_pressed: 94 | logger.debug("Ctrl+C pressed, scheduling interruption.") 95 | ctrl_c_combo_pressed = True 96 | main_loop_ref.call_soon_threadsafe(_interrupt_current_action) 97 | return 98 | 99 | except Exception as e: 100 | logger.error(f"Error in hotkey on_press callback: {e}", exc_info=True) 101 | if print_scheduler_ref and main_loop_ref: 102 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "Error", f"Hotkey press error: {e}") 103 | 104 | def on_release(key): 105 | """Handles key release events for hotkeys.""" 106 | global ctrl_pressed, shift_pressed, recording_key_pressed, ctrl_c_combo_pressed 107 | if not all([voice_system_ref, print_scheduler_ref, main_loop_ref]): 108 | return 109 | 110 | try: 111 | is_ctrl = key in (keyboard.Key.ctrl_l, keyboard.Key.ctrl_r) 112 | is_shift = key in (keyboard.Key.shift, keyboard.Key.shift_r) 113 | is_c_key = hasattr(key, 'char') and key.char == 'c' 114 | 115 | # --- CORRECTED RELEASE LOGIC --- 116 | # First, update the state based on which key was released. 117 | if is_ctrl: 118 | ctrl_pressed = False 119 | elif is_shift: 120 | shift_pressed = False 121 | 122 | # Now, check if we should stop recording. 123 | # This only triggers if we WERE recording AND NEITHER Ctrl NOR Shift is still pressed. 124 | if recording_key_pressed and not ctrl_pressed and not shift_pressed: 125 | logger.debug("Ctrl+Shift combo fully released, scheduling recording stop.") 126 | recording_key_pressed = False # Reset state immediately 127 | 128 | # This small delay gives slow applications time to process the key-up event 129 | time.sleep(0.1) 130 | 131 | # Schedule actions on the main loop 132 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "System", "Recording stopped. Processing...") 133 | main_loop_ref.call_soon_threadsafe( 134 | lambda: main_loop_ref.run_in_executor(None, voice_system_ref.stop_quick_record) 135 | ) 136 | # --- END CORRECTED LOGIC --- 137 | 138 | # --- Interruption Hotkey Release Logic --- 139 | if (is_ctrl or is_c_key) and ctrl_c_combo_pressed: 140 | logger.debug(f"Ctrl+C combo key released ({key}). Resetting combo flag.") 141 | ctrl_c_combo_pressed = False 142 | 143 | except Exception as e: 144 | logger.error(f"Error in hotkey on_release callback: {e}", exc_info=True) 145 | if print_scheduler_ref and main_loop_ref: 146 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "Error", f"Hotkey release error: {e}") 147 | 148 | def start_listener(loop: asyncio.AbstractEventLoop, 149 | voice_system_instance: Any, 150 | print_scheduler: Callable, 151 | task_accessor_func: Callable[[], Optional[asyncio.Task]]) -> Optional[threading.Thread]: 152 | """Initializes and starts the global hotkey listener.""" 153 | global voice_system_ref, print_scheduler_ref, main_loop_ref, current_task_accessor 154 | voice_system_ref = voice_system_instance 155 | print_scheduler_ref = print_scheduler 156 | main_loop_ref = loop 157 | current_task_accessor = task_accessor_func 158 | 159 | logger.info("Starting global hotkey listener thread (Ctrl+Shift for record, Ctrl+C for interrupt)...") 160 | try: 161 | listener = keyboard.Listener(on_press=on_press, on_release=on_release) 162 | listener_thread = threading.Thread( 163 | target=listener.run, 164 | daemon=True, 165 | name="HotkeyListenerThread" 166 | ) 167 | listener_thread.start() 168 | logger.info("Hotkey listener thread started successfully.") 169 | return listener_thread 170 | except Exception as e: 171 | logger.error(f"Failed to start pynput hotkey listener: {e}", exc_info=True) 172 | if print_scheduler_ref and main_loop_ref: 173 | main_loop_ref.call_soon_threadsafe(print_scheduler_ref, "Error", "CRITICAL: Failed to start global hotkey listener!") 174 | else: 175 | print("[CRITICAL ERROR] Failed to start global hotkey listener!") 176 | return None 177 | -------------------------------------------------------------------------------- /speech/whisper_processor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from typing import Optional 3 | import nemo.collections.asr as nemo_asr # Import NeMo 4 | import logging 5 | import warnings 6 | import numpy as np 7 | import soundfile as sf # For writing temporary audio files 8 | import tempfile # For creating temporary files 9 | import os # For file operations like remove 10 | 11 | # Configure logging 12 | # Use a more specific logger name if desired, e.g., logging.getLogger("ParakeetASR") 13 | logger = logging.getLogger(__name__) # Using __name__ is a common practice 14 | # Set level for NeMo's logger 15 | logging.getLogger("nemo_toolkit").setLevel(logging.ERROR) 16 | warnings.filterwarnings("ignore", category=UserWarning, module='pytorch_lightning.*') # More specific warning ignore 17 | warnings.filterwarnings("ignore", category=FutureWarning) 18 | 19 | 20 | class ParakeetProcessor: 21 | def __init__(self): 22 | """Initialize the Parakeet ASR processor.""" 23 | logger.info("Initializing Parakeet ASR processor...") 24 | self.asr_model = None # Initialize as None 25 | self.device = "cuda:0" if torch.cuda.is_available() else "cpu" 26 | self.setup_model() 27 | 28 | def setup_model(self): 29 | """Initialize the Parakeet model.""" 30 | logger.info(f"Setting up Parakeet ASR model on device: {self.device}") 31 | try: 32 | model_id = "nvidia/parakeet-tdt-0.6b-v2" 33 | # NeMo will handle downloading the model if it's not cached locally. 34 | # Ensure you have an internet connection the first time this runs. 35 | # The model requires at least 2GB RAM as per its model card. 36 | self.asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name=model_id) 37 | self.asr_model.to(self.device) 38 | self.asr_model.eval() # Set the model to evaluation mode 39 | 40 | logger.info(f"Parakeet ASR model ({model_id}) initialized and moved to {self.device}.") 41 | 42 | except Exception as e: 43 | logger.error(f"Error initializing Parakeet ASR model: {e}", exc_info=True) 44 | # Depending on desired behavior, you might re-raise or handle this 45 | # such that the application can continue without ASR or exit gracefully. 46 | raise # Re-raise to make the calling code aware of the failure 47 | 48 | def _preprocess_audio(self, audio_data_np: np.ndarray, expected_sample_rate: int = 16000) -> np.ndarray: 49 | """ 50 | Prepares audio data for the Parakeet ASR model. 51 | Ensures audio is a 1D float32 NumPy array at the expected sample rate. 52 | The Parakeet model card specifies 16kHz mono channel audio. 53 | """ 54 | if audio_data_np is None or audio_data_np.size == 0: 55 | logger.warning("Preprocessing received empty audio data.") 56 | return np.array([], dtype=np.float32) 57 | 58 | # Ensure it's a NumPy array 59 | if not isinstance(audio_data_np, np.ndarray): 60 | logger.warning("Audio data is not a NumPy array. Attempting conversion.") 61 | try: 62 | audio_data_np = np.array(audio_data_np) 63 | except Exception as e: 64 | logger.error(f"Failed to convert audio data to NumPy array: {e}", exc_info=True) 65 | return np.array([], dtype=np.float32) 66 | 67 | # Ensure it's 1D (mono) 68 | if audio_data_np.ndim > 1: 69 | logger.warning(f"Audio data has {audio_data_np.ndim} dimensions. Converting to mono by taking the mean or first channel.") 70 | # Example: take the mean across channels if stereo, or adapt as needed 71 | if audio_data_np.shape[0] < audio_data_np.shape[1]: # (channels, samples) 72 | audio_data_np = np.mean(audio_data_np, axis=0) 73 | else: # (samples, channels) 74 | audio_data_np = np.mean(audio_data_np, axis=1) 75 | 76 | 77 | # Convert to float32 if not already 78 | if audio_data_np.dtype != np.float32: 79 | if np.issubdtype(audio_data_np.dtype, np.integer): 80 | # Normalize integer types to [-1, 1] before converting to float32 81 | # Common for int16 from PyAudio 82 | max_val = np.iinfo(audio_data_np.dtype).max 83 | audio_data_np = audio_data_np.astype(np.float32) / max_val 84 | else: 85 | # For other float types, just convert 86 | audio_data_np = audio_data_np.astype(np.float32) 87 | 88 | # Basic normalization: ensure values are roughly within [-1, 1] 89 | # This step might be redundant if your input audio is already well-normalized. 90 | # NeMo models are generally robust, but good practice. 91 | abs_max = np.abs(audio_data_np).max() 92 | if abs_max > 1.0: 93 | logger.debug(f"Audio data max absolute value {abs_max} > 1.0. Normalizing.") 94 | audio_data_np /= abs_max 95 | elif abs_max == 0: # Avoid division by zero for pure silence 96 | logger.debug("Audio data is pure silence.") 97 | # audio_data_np remains all zeros 98 | 99 | logger.debug(f"Preprocessed audio for Parakeet - Shape: {audio_data_np.shape}, Type: {audio_data_np.dtype}, Range: [{audio_data_np.min():.3f}, {audio_data_np.max():.3f}]") 100 | return audio_data_np 101 | 102 | async def transcribe(self, audio_data: np.ndarray, sample_rate: int = 16000) -> Optional[str]: 103 | """ 104 | Transcribes a NumPy array of audio data. 105 | Audio data should be 16kHz mono. 106 | """ 107 | if self.asr_model is None: 108 | logger.error("ASR model not initialized. Cannot transcribe.") 109 | return None 110 | 111 | if audio_data is None or audio_data.size == 0: 112 | logger.info("Received empty audio data for transcription.") 113 | return None 114 | 115 | # Preprocess audio (ensure it's a NumPy array at 16kHz, float32, mono) 116 | # Your VoiceCommandSystem provides audio_data as a NumPy array and uses 16kHz. 117 | audio_processed_np = self._preprocess_audio(audio_data, expected_sample_rate=sample_rate) 118 | 119 | if audio_processed_np.size == 0: 120 | logger.warning("Audio processing resulted in empty data. Skipping transcription.") 121 | return None 122 | 123 | temp_file_path = None # Define here for broader scope in finally block 124 | try: 125 | # NeMo's transcribe method primarily takes a list of audio file paths. 126 | # Saving the processed NumPy array to a temporary WAV file is a robust way. 127 | with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_audio_file: 128 | sf.write(tmp_audio_file.name, audio_processed_np, sample_rate) 129 | temp_file_path = tmp_audio_file.name 130 | 131 | logger.debug(f"Transcribing temporary audio file: {temp_file_path}") 132 | 133 | # Transcribe using the NeMo model. 134 | # The `transcribe` method returns a list of transcriptions. 135 | # For a single audio file, it's a list containing one transcription string. 136 | # If `return_hypotheses` is True, the structure is more complex. 137 | # Based on Parakeet model card: `output = asr_model.transcribe(['audio.wav'])` 138 | # `output[0].text` or `output[0]` (if `return_hypotheses=False` which is default). 139 | # Let's assume the simpler case for now. 140 | transcription_results = self.asr_model.transcribe([temp_file_path]) 141 | 142 | transcribed_text = None 143 | if transcription_results and isinstance(transcription_results, list) and len(transcription_results) > 0: 144 | # The result for a single file is typically a list containing one string (the transcription). 145 | # Or if return_hypotheses=True (default for some models), it's a list of Hypothesis objects. 146 | # Let's check the type of the first element. 147 | first_result = transcription_results[0] 148 | if isinstance(first_result, str): 149 | transcribed_text = first_result 150 | elif hasattr(first_result, 'text'): # Handles Hypothesis object 151 | transcribed_text = first_result.text 152 | else: 153 | # If the result structure is different (e.g., nested lists for batched input) 154 | # you might need to adjust. For a single file, it's usually simple. 155 | # For `parakeet-tdt-0.6b-v2`, `transcribe()` returns List[str] by default. 156 | logger.warning(f"Unexpected transcription result format: {type(first_result)}. Full result: {transcription_results}") 157 | transcribed_text = str(first_result) # Fallback to string conversion 158 | 159 | # Parakeet includes punctuation and capitalization. 160 | logger.info(f"Transcribed by Parakeet: '{transcribed_text}'") 161 | 162 | else: 163 | logger.info("Parakeet transcription returned no result or an empty result.") 164 | 165 | return transcribed_text 166 | 167 | except Exception as e: 168 | logger.error(f"Error during Parakeet transcription: {e}", exc_info=True) 169 | return None 170 | finally: 171 | # Clean up the temporary file in all cases (success or error) 172 | if temp_file_path and os.path.exists(temp_file_path): 173 | try: 174 | os.remove(temp_file_path) 175 | logger.debug(f"Temporary audio file {temp_file_path} removed.") 176 | except Exception as cleanup_e: 177 | logger.error(f"Error cleaning up temporary audio file {temp_file_path}: {cleanup_e}", exc_info=True) 178 | -------------------------------------------------------------------------------- /commands/scrap_command.py: -------------------------------------------------------------------------------- 1 | # commands/scrap_command.py 2 | import subprocess 3 | import tempfile 4 | import os 5 | import logging 6 | import shutil 7 | from datetime import datetime 8 | from pathlib import Path 9 | 10 | try: 11 | import pytesseract 12 | from PIL import Image 13 | PIL_AVAILABLE = True 14 | except ImportError: 15 | PIL_AVAILABLE = False 16 | pytesseract = None 17 | Image = None 18 | 19 | from .base import Command 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | def is_tool(name): 24 | """Check whether `name` is on PATH and marked as executable.""" 25 | return shutil.which(name) is not None 26 | 27 | class ScrapCommand(Command): 28 | def __init__(self): 29 | super().__init__( 30 | name="scrap", 31 | aliases=["screengrab", "screen grab", "ocrgrab", "grabtext"], 32 | description="Select screen area, OCR text, copy to clipboard, and save the image.", 33 | execute=self._execute 34 | ) 35 | self.check_dependencies() 36 | self.pictures_dir = Path.home() / "Pictures" / "scraps" 37 | os.makedirs(self.pictures_dir, exist_ok=True) 38 | 39 | def check_dependencies(self): 40 | """Checks for required system tools and libraries.""" 41 | if not PIL_AVAILABLE: 42 | logger.error("Pillow or Pytesseract not installed. Scrap command disabled.") 43 | return 44 | 45 | self.screenshot_tool = None 46 | if is_tool("gnome-screenshot"): 47 | self.screenshot_tool = "gnome-screenshot" 48 | logger.info("Using 'gnome-screenshot' for screen capture.") 49 | elif is_tool("maim"): 50 | self.screenshot_tool = "maim" 51 | logger.info("Using 'maim' for screen capture. Ensure 'slop' is installed for selection if needed.") 52 | elif is_tool("scrot"): 53 | self.screenshot_tool = "scrot" 54 | logger.info("Using 'scrot' for screen capture.") 55 | else: 56 | logger.error("No suitable screenshot tool found (tried gnome-screenshot, maim, scrot). Scrap command disabled.") 57 | 58 | if not is_tool("xclip"): 59 | logger.error("'xclip' not found. Scrap command cannot copy to clipboard/primary.") 60 | self.screenshot_tool = None 61 | 62 | async def _execute(self, args: str) -> str: 63 | """ 64 | Selects a screen area, performs OCR, copies text, and saves the image. 65 | Args are ignored. 66 | """ 67 | if not PIL_AVAILABLE: 68 | return "Error: Pillow or Pytesseract library not installed." 69 | if not self.screenshot_tool: 70 | return "Error: No suitable screenshot tool (gnome-screenshot/maim/scrot) or xclip found." 71 | if not is_tool("xclip"): 72 | return "Error: xclip command not found." 73 | 74 | logger.info(f"Starting scrap using {self.screenshot_tool}...") 75 | 76 | try: 77 | with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_img_file: 78 | temp_filename = temp_img_file.name 79 | logger.debug(f"Temporary screenshot file: {temp_filename}") 80 | 81 | screenshot_success = False 82 | cmd = [] 83 | if self.screenshot_tool == "gnome-screenshot": 84 | cmd = ['gnome-screenshot', '-a', '-f', temp_filename] 85 | elif self.screenshot_tool == "maim": 86 | cmd = ['maim', '-s', temp_filename] 87 | elif self.screenshot_tool == "scrot": 88 | cmd = ['scrot', '-s', temp_filename] 89 | 90 | if not cmd: 91 | return "Error: Could not determine screenshot command." 92 | 93 | logger.debug(f"Running command: {' '.join(cmd)}") 94 | try: 95 | result = subprocess.run(cmd, check=True, capture_output=True, timeout=60) 96 | if os.path.exists(temp_filename) and os.path.getsize(temp_filename) > 0: 97 | screenshot_success = True 98 | else: 99 | logger.warning(f"{self.screenshot_tool} exited ok, but temp file is missing or empty.") 100 | stderr_output = result.stderr.decode('utf-8', errors='ignore').lower() 101 | if "cancel" in stderr_output or "giblib error" in stderr_output: 102 | screenshot_success = False 103 | else: 104 | screenshot_success = True 105 | logger.warning("Assuming screenshot success despite possible file issue.") 106 | 107 | except FileNotFoundError: 108 | logger.error(f"Screenshot tool '{self.screenshot_tool}' not found during execution.") 109 | os.remove(temp_filename) 110 | return f"Error: Screenshot tool '{self.screenshot_tool}' failed (not found)." 111 | except subprocess.CalledProcessError as e: 112 | logger.info(f"{self.screenshot_tool} exited with error (likely cancelled): {e}") 113 | stderr_output = e.stderr.decode('utf-8', errors='ignore').lower() 114 | if "cancel" in stderr_output or "giblib error" in stderr_output: 115 | screenshot_success = False 116 | else: 117 | logger.error(f"Screenshot command failed: {e.stderr.decode('utf-8', errors='ignore')}") 118 | screenshot_success = False 119 | except subprocess.TimeoutExpired: 120 | logger.error("Screenshot command timed out.") 121 | screenshot_success = False 122 | except Exception as e: 123 | logger.error(f"Unexpected error during screenshot: {e}", exc_info=True) 124 | screenshot_success = False 125 | 126 | if screenshot_success: 127 | # Save a copy of the screenshot 128 | timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 129 | save_path = self.pictures_dir / f"scrap_{timestamp}.png" 130 | shutil.copy(temp_filename, save_path) 131 | logger.info(f"Screenshot saved to {save_path}") 132 | 133 | logger.debug("Screenshot captured successfully. Performing OCR...") 134 | try: 135 | extracted_text = pytesseract.image_to_string(Image.open(temp_filename)).strip() 136 | 137 | if not extracted_text: 138 | logger.info("OCR completed, but no text was found.") 139 | os.remove(temp_filename) 140 | return "Scrap complete. No text found in the selected area." 141 | 142 | logger.info(f"OCR successful. Text length: {len(extracted_text)}") 143 | logger.debug(f"Extracted text (first 100 chars): {extracted_text[:100]}") 144 | 145 | try: 146 | subprocess.run( 147 | ['xclip', '-selection', 'clipboard'], 148 | input=extracted_text.encode('utf-8'), 149 | check=True, 150 | timeout=5 151 | ) 152 | subprocess.run( 153 | ['xclip', '-selection', 'primary'], 154 | input=extracted_text.encode('utf-8'), 155 | check=True, 156 | timeout=5 157 | ) 158 | logger.info("Text copied to clipboard and primary selection.") 159 | os.remove(temp_filename) 160 | return f"Scrap successful. Copied {len(extracted_text)} characters and saved image." 161 | 162 | except FileNotFoundError: 163 | logger.error("xclip not found during copy step.") 164 | os.remove(temp_filename) 165 | return "Error: xclip not found. Could not copy OCR text." 166 | except subprocess.CalledProcessError as e: 167 | logger.error(f"xclip command failed: {e.stderr.decode('utf-8', errors='ignore')}") 168 | os.remove(temp_filename) 169 | return f"Error copying text with xclip: {e.stderr.decode('utf-8', errors='ignore')}" 170 | except subprocess.TimeoutExpired: 171 | logger.error("xclip command timed out.") 172 | os.remove(temp_filename) 173 | return "Error: Timeout copying text with xclip." 174 | except Exception as e: 175 | logger.error(f"Unexpected error during xclip copy: {e}", exc_info=True) 176 | os.remove(temp_filename) 177 | return f"Error during text copy: {e}" 178 | 179 | except pytesseract.TesseractNotFoundError: 180 | logger.error("pytesseract error: 'tesseract' command not found.") 181 | os.remove(temp_filename) 182 | return "Error: Tesseract OCR engine not found. Please install tesseract-ocr." 183 | except Exception as ocr_err: 184 | logger.error(f"Error during OCR processing: {ocr_err}", exc_info=True) 185 | os.remove(temp_filename) 186 | return f"Error during OCR: {ocr_err}" 187 | else: 188 | if os.path.exists(temp_filename): 189 | os.remove(temp_filename) 190 | logger.info("Scrap cancelled or failed before OCR.") 191 | return "Scrap cancelled or failed." 192 | 193 | except Exception as outer_e: 194 | error_msg = f"Unexpected error during scrap: {str(outer_e)}" 195 | logger.error(error_msg, exc_info=True) 196 | if 'temp_filename' in locals() and os.path.exists(temp_filename): 197 | try: os.remove(temp_filename) 198 | except Exception as cleanup_e: logger.error(f"Failed to clean up temp file {temp_filename}: {cleanup_e}") 199 | return error_msg 200 | -------------------------------------------------------------------------------- /core/voice_system.py: -------------------------------------------------------------------------------- 1 | # core/voice_system.py 2 | import numpy as np 3 | import warnings 4 | import logging 5 | import pyaudio 6 | import queue 7 | import asyncio 8 | import threading 9 | import psutil 10 | import time 11 | from typing import Optional, Callable, Awaitable, Any, Coroutine 12 | 13 | from webrtcvad import Vad 14 | from speech.whisper_processor import ParakeetProcessor 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | def is_espeak_running(): 19 | """Check if espeak is currently running.""" 20 | for proc in psutil.process_iter(['name', 'cmdline']): 21 | try: 22 | if proc.info['name'] == 'espeak' or \ 23 | (proc.info['cmdline'] and 'espeak' in proc.info['cmdline'][0]): 24 | return True 25 | except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): 26 | pass 27 | return False 28 | 29 | 30 | class VoiceCommandSystem: 31 | def __init__(self, 32 | loop: asyncio.AbstractEventLoop, 33 | speak_func: Callable[[str], Awaitable[None]], 34 | command_trigger_func: Callable[[str], Coroutine[Any, Any, None]]): 35 | logger.info("Initializing voice command system...") 36 | self.loop = loop 37 | self.speak_func = speak_func 38 | self.command_trigger_func = command_trigger_func 39 | self.transcript_callback: Optional[Callable[[str, str], None]] = None 40 | self.input_device_index: Optional[int] = None 41 | 42 | self.asr_processor: Optional[ParakeetProcessor] = None 43 | self.p: Optional[pyaudio.PyAudio] = None 44 | self.vad: Optional[Vad] = None 45 | 46 | # --- REFACTORED FOR THREAD-SAFE AUDIO HANDLING --- 47 | self.background_stream: Optional[pyaudio.Stream] = None 48 | self.quick_record_stream: Optional[pyaudio.Stream] = None 49 | 50 | self.background_queue: queue.Queue = queue.Queue() 51 | self.quick_record_queue: queue.Queue = queue.Queue() 52 | 53 | self.background_worker_thread: Optional[threading.Thread] = None 54 | self.quick_record_worker_thread: Optional[threading.Thread] = None 55 | 56 | self.is_background_listening = threading.Event() 57 | self.is_quick_recording = threading.Event() 58 | 59 | self.current_audio: list[np.ndarray] = [] 60 | self.prev_frames: list[np.ndarray] = [] 61 | # --- END REFACTOR --- 62 | 63 | # Audio parameters 64 | self.sample_rate: int = 16000 65 | self.channels: int = 1 66 | self.format: int = pyaudio.paInt16 67 | self.frame_duration: int = 30 68 | self.frame_length: int = 0 69 | self.chunk_size: int = 0 70 | 71 | self.prev_frames_duration: float = 0.5 72 | self.prev_frames_maxlen: int = 0 73 | 74 | self.silence_limit: float = 0.7 75 | self.min_silence_detections: int = 0 76 | 77 | try: 78 | self.setup_system() 79 | self.start_background_listener() 80 | except Exception as e: 81 | logger.error(f"Critical error during VoiceCommandSystem setup: {e}", exc_info=True) 82 | self.cleanup() 83 | raise 84 | 85 | logger.info("Voice system initialization complete.") 86 | 87 | def setup_system(self): 88 | logger.debug("Setting up system components...") 89 | self.p = pyaudio.PyAudio() 90 | self._detect_audio_devices() 91 | self.asr_processor = ParakeetProcessor() 92 | self.setup_vad() 93 | self.chunk_size = self.frame_length 94 | logger.debug("System components setup finished.") 95 | 96 | def _detect_audio_devices(self): 97 | # This extensive device detection logic is good. No changes needed here. 98 | logger.info("Detecting audio input devices...") 99 | if not self.p: 100 | logger.error("PyAudio not initialized before _detect_audio_devices.") 101 | self.p = pyaudio.PyAudio() 102 | 103 | self.input_devices = [] 104 | default_system_device_index: Optional[int] = None 105 | host_api_info = None 106 | default_host_api_index = 0 107 | 108 | try: 109 | default_host_api_info_dict = self.p.get_default_host_api_info() 110 | default_host_api_index = default_host_api_info_dict['index'] 111 | host_api_info = self.p.get_host_api_info_by_index(default_host_api_index) 112 | default_system_device_index = host_api_info.get('defaultInputDevice') 113 | if default_system_device_index == -1: 114 | default_system_device_index = None 115 | logger.info(f"Default Host API: {host_api_info.get('name')}, Default System Input Device Index: {default_system_device_index}") 116 | except Exception as e: 117 | logger.warning(f"Could not get default device via Host API info: {e}. Will iterate.") 118 | try: 119 | default_input_device_info = self.p.get_default_input_device_info() 120 | default_system_device_index = default_input_device_info['index'] 121 | default_host_api_index = default_input_device_info['hostApi'] 122 | logger.info(f"Found default input device directly: {default_input_device_info.get('name')} (Index: {default_system_device_index})") 123 | except Exception as e2: 124 | logger.warning(f"Could not get default input device info directly: {e2}. Will iterate all devices.") 125 | default_system_device_index = None 126 | 127 | device_count = self.p.get_device_count() 128 | selected_device_info = None 129 | 130 | for i in range(device_count): 131 | try: 132 | device_info = self.p.get_device_info_by_index(i) 133 | is_on_preferred_api = (device_info.get('hostApi') == default_host_api_index) 134 | has_input_channels = device_info.get('maxInputChannels', 0) >= self.channels 135 | 136 | if has_input_channels: 137 | if (default_host_api_index is not None and is_on_preferred_api) or \ 138 | (default_host_api_index is None): 139 | try: 140 | supported = self.p.is_format_supported( 141 | rate=self.sample_rate, 142 | input_device=device_info['index'], 143 | input_channels=self.channels, 144 | input_format=self.format 145 | ) 146 | if supported: 147 | self.input_devices.append(device_info) 148 | if default_system_device_index == i: 149 | self.input_device_index = i 150 | selected_device_info = device_info 151 | logger.info(f"Selecting system default input device: {device_info.get('name')} (Index: {i})") 152 | break 153 | elif self.input_device_index is None: 154 | self.input_device_index = device_info['index'] 155 | selected_device_info = device_info 156 | except ValueError: 157 | pass # Format not supported 158 | except Exception as dev_e: 159 | logger.warning(f"Could not query full device info for index {i}: {dev_e}") 160 | 161 | if self.input_device_index is None: 162 | final_error_msg = f"No compatible audio input devices found." 163 | logger.error(final_error_msg) 164 | raise RuntimeError(final_error_msg) 165 | else: 166 | if not selected_device_info: 167 | selected_device_info = self.p.get_device_info_by_index(self.input_device_index) 168 | logger.info(f"Using audio input device: {selected_device_info.get('name', 'N/A')} (Index: {self.input_device_index})") 169 | 170 | def setup_vad(self): 171 | logger.debug("Setting up VAD...") 172 | self.vad = Vad(3) 173 | self.frame_length = int(self.sample_rate * self.frame_duration / 1000) 174 | self.prev_frames_maxlen = int(self.prev_frames_duration * self.sample_rate / self.frame_length) 175 | self.min_silence_detections = int(self.silence_limit * 1000 / self.frame_duration) 176 | 177 | def set_transcript_callback(self, callback: Callable[[str, str], None]): 178 | self.transcript_callback = callback 179 | 180 | # --- WORKER THREAD METHODS --- 181 | 182 | def _background_worker(self): 183 | """CONSUMER for the background listener. Processes audio from its queue.""" 184 | while self.is_background_listening.is_set(): 185 | try: 186 | in_data = self.background_queue.get(timeout=0.5) 187 | if is_espeak_running(): 188 | continue 189 | audio_chunk = np.frombuffer(in_data, dtype=np.int16) 190 | self.prev_frames.append(audio_chunk.copy()) 191 | if len(self.prev_frames) > self.prev_frames_maxlen: 192 | self.prev_frames.pop(0) 193 | except queue.Empty: 194 | continue # This is normal, just loop again 195 | except Exception as e: 196 | logger.error(f"Error in background worker: {e}", exc_info=True) 197 | 198 | def _quick_record_worker(self): 199 | """CONSUMER for the quick recorder. Processes audio from its queue.""" 200 | while self.is_quick_recording.is_set() or not self.quick_record_queue.empty(): 201 | try: 202 | in_data = self.quick_record_queue.get(timeout=0.5) 203 | if is_espeak_running(): 204 | continue 205 | audio_chunk = np.frombuffer(in_data, dtype=np.int16) 206 | self.current_audio.append(audio_chunk.copy()) 207 | except queue.Empty: 208 | if not self.is_quick_recording.is_set(): 209 | break # Exit if recording is stopped and queue is empty 210 | except Exception as e: 211 | logger.error(f"Error in quick record worker: {e}", exc_info=True) 212 | 213 | # --- CALLBACK METHODS (PRODUCERS) --- 214 | 215 | def background_callback(self, in_data, frame_count, time_info, status_flags): 216 | """PRODUCER: Puts background audio data into a queue. Must be fast.""" 217 | if status_flags: 218 | logger.warning(f"Background audio input status flags non-zero: {status_flags}") 219 | self.background_queue.put(in_data) 220 | return (None, pyaudio.paContinue) 221 | 222 | def quick_record_callback(self, in_data, frame_count, time_info, status_flags): 223 | """PRODUCER: Puts quick record audio data into a queue. Must be fast.""" 224 | if status_flags: 225 | logger.warning(f"Quick record audio input status flags non-zero: {status_flags}") 226 | self.quick_record_queue.put(in_data) 227 | return (None, pyaudio.paContinue) 228 | 229 | # --- STREAM CONTROL METHODS --- 230 | 231 | def start_background_listener(self): 232 | if self.is_background_listening.is_set(): 233 | return 234 | logger.debug("Starting background listener...") 235 | self.is_background_listening.set() 236 | 237 | self.background_worker_thread = threading.Thread(target=self._background_worker) 238 | self.background_worker_thread.start() 239 | 240 | self.background_stream = self.p.open( 241 | format=self.format, channels=self.channels, rate=self.sample_rate, 242 | input=True, input_device_index=self.input_device_index, 243 | frames_per_buffer=self.chunk_size * 2, # A slightly larger buffer is safe 244 | stream_callback=self.background_callback 245 | ) 246 | logger.debug("Background listener stream started.") 247 | 248 | def stop_background_listener(self): 249 | if not self.is_background_listening.is_set(): 250 | return 251 | logger.debug("Stopping background listener...") 252 | self.is_background_listening.clear() 253 | 254 | if self.background_stream: 255 | self.background_stream.stop_stream() 256 | self.background_stream.close() 257 | self.background_stream = None 258 | 259 | if self.background_worker_thread: 260 | self.background_worker_thread.join() 261 | self.background_worker_thread = None 262 | logger.debug("Background listener stopped.") 263 | 264 | def start_quick_record(self): 265 | if self.is_quick_recording.is_set(): 266 | return False 267 | logger.info("Attempting to start quick recording...") 268 | self.stop_background_listener() 269 | 270 | self.is_quick_recording.set() 271 | self.current_audio = list(self.prev_frames) 272 | logger.debug(f"Initialized quick recording with {len(self.current_audio)} frames.") 273 | 274 | self.quick_record_worker_thread = threading.Thread(target=self._quick_record_worker) 275 | self.quick_record_worker_thread.start() 276 | 277 | self.quick_record_stream = self.p.open( 278 | format=self.format, channels=self.channels, rate=self.sample_rate, 279 | input=True, input_device_index=self.input_device_index, 280 | frames_per_buffer=self.chunk_size, 281 | stream_callback=self.quick_record_callback 282 | ) 283 | logger.info("Quick recording stream started successfully.") 284 | return True 285 | 286 | def stop_quick_record(self): 287 | if not self.is_quick_recording.is_set(): 288 | return 289 | logger.info("Stopping quick recording...") 290 | self.is_quick_recording.clear() 291 | 292 | if self.quick_record_stream: 293 | self.quick_record_stream.stop_stream() 294 | self.quick_record_stream.close() 295 | self.quick_record_stream = None 296 | 297 | if self.quick_record_worker_thread: 298 | self.quick_record_worker_thread.join() # Wait for worker to process all data 299 | self.quick_record_worker_thread = None 300 | 301 | if self.current_audio: 302 | try: 303 | combined_audio_data = np.concatenate(self.current_audio) 304 | self._schedule_process_speech(combined_audio_data) 305 | except ValueError as e: 306 | logger.error(f"Error concatenating audio chunks: {e}", exc_info=True) 307 | finally: 308 | self.current_audio = [] 309 | else: 310 | logger.info("No audio was captured.") 311 | if self.transcript_callback: 312 | self.loop.call_soon_threadsafe(self.transcript_callback, "...", "Voice") 313 | 314 | self.start_background_listener() # Restart background listener 315 | logger.debug("Background listener restarted.") 316 | 317 | # --- ASYNC SPEECH PROCESSING --- 318 | 319 | def _schedule_process_speech(self, audio_data: np.ndarray): 320 | """Schedules the async process_speech method in the main event loop.""" 321 | if self.loop and self.loop.is_running(): 322 | self.loop.call_soon_threadsafe( 323 | lambda: self.loop.create_task(self.process_speech(audio_data)) 324 | ) 325 | 326 | async def process_speech(self, audio_data: np.ndarray): 327 | """Transcribes speech and triggers command processing.""" 328 | # This async logic is well-structured. No changes needed here. 329 | if not self.asr_processor or not self.command_trigger_func: 330 | logger.error("ASR processor or command trigger not initialized.") 331 | return 332 | 333 | try: 334 | transcribed_text = await self.asr_processor.transcribe(audio_data) 335 | 336 | if self.transcript_callback: 337 | text_to_print = transcribed_text if transcribed_text else "..." 338 | self.transcript_callback(text_to_print, "Voice") 339 | 340 | if transcribed_text: 341 | logger.info(f"Transcription successful: '{transcribed_text}'. Triggering command.") 342 | await self.command_trigger_func(transcribed_text) 343 | else: 344 | logger.info("Transcription returned no text.") 345 | except Exception as e: 346 | logger.error(f"Error during async speech processing: {e}", exc_info=True) 347 | if self.transcript_callback: 348 | self.transcript_callback("[Error processing speech]", "Error") 349 | 350 | def cleanup(self): 351 | """Cleans up all resources.""" 352 | logger.info("Cleaning up VoiceCommandSystem resources...") 353 | self.stop_quick_record() 354 | self.stop_background_listener() 355 | 356 | if self.p: 357 | self.p.terminate() 358 | self.p = None 359 | logger.info("Voice command system resources released.") 360 | -------------------------------------------------------------------------------- /commands/computer_command.py: -------------------------------------------------------------------------------- 1 | # commands/computer_command.py 2 | import subprocess 3 | import json 4 | from typing import AsyncGenerator, Dict, List, Optional 5 | import logging 6 | import os 7 | from pathlib import Path 8 | from .base import Command 9 | import aiohttp 10 | import asyncio 11 | import shutil 12 | 13 | from cli.output import schedule_print, speak 14 | 15 | logger = logging.getLogger(__name__) 16 | logger.setLevel(logging.DEBUG) 17 | 18 | # --- is_tool Helper --- 19 | def is_tool(name): 20 | return shutil.which(name) is not None 21 | 22 | # --- ToolRegistry Class (No changes needed from previous version) --- 23 | class ToolRegistry: 24 | def __init__(self): 25 | self.apps: Dict[str, str] = {} 26 | self.active_windows: Dict[str, str] = {} 27 | self.terminal_apps = ['konsole', 'gnome-terminal', 'xterm', 'terminator', 'alacritty', 'kitty'] 28 | self.command_history = [] 29 | self.update_installed_apps() 30 | self.update_active_windows() 31 | 32 | def update_installed_apps(self): 33 | try: 34 | result = subprocess.run(['find', '/usr/share/applications', '-name', '*.desktop'], capture_output=True, text=True, check=False) 35 | if result.returncode != 0: 36 | logger.warning(f"Finding .desktop files failed: {result.stderr}") 37 | return 38 | self.apps.clear() 39 | for desktop_file in result.stdout.splitlines(): 40 | try: 41 | with open(desktop_file, 'r', encoding='utf-8', errors='ignore') as f: 42 | content = f.read() 43 | name = None 44 | exec_path = None 45 | nodisplay = False 46 | for line in content.splitlines(): 47 | if line.startswith('Name='): 48 | name = line.split('=', 1)[1].lower().strip() 49 | elif line.startswith('Exec='): 50 | exec_path = line.split('=', 1)[1].split('%')[0].strip() 51 | elif line.startswith('NoDisplay=true'): 52 | nodisplay = True 53 | if name and exec_path and not nodisplay: 54 | cmd_base = exec_path.split()[0] 55 | if '/' in cmd_base or is_tool(cmd_base): 56 | self.apps[name] = exec_path 57 | except Exception as file_e: 58 | logger.warning(f"Could not parse desktop file {desktop_file}: {file_e}") 59 | except Exception as e: 60 | logging.error(f"Failed to update installed apps: {e}", exc_info=True) 61 | 62 | def update_active_windows(self): 63 | if not is_tool('wmctrl'): 64 | logger.error("wmctrl not found.") 65 | self.active_windows.clear() 66 | return 67 | try: 68 | result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=False) 69 | self.active_windows.clear() 70 | if result.returncode != 0: 71 | logger.warning(f"wmctrl command failed: {result.stderr}.") 72 | return 73 | for line in result.stdout.splitlines(): 74 | parts = line.split(None, 3) 75 | if len(parts) >= 4: 76 | self.active_windows[parts[0]] = parts[3].lower() 77 | except Exception as e: 78 | logging.error(f"Failed to update active windows: {e}") 79 | self.active_windows.clear() 80 | 81 | def find_app(self, query: str) -> Optional[str]: 82 | query = query.lower() 83 | if query in ['shell', 'terminal', 'command prompt', 'cmd']: 84 | for terminal in self.terminal_apps: 85 | if terminal in self.apps: 86 | return self.apps[terminal] 87 | if terminal_exec := self.apps.get(terminal.split()[0]): 88 | return terminal_exec 89 | logger.warning("Could not find a known terminal application.") 90 | return None 91 | if query in self.apps: 92 | return self.apps[query] 93 | for name, exec_path in self.apps.items(): 94 | if query in name: 95 | logger.debug(f"Partial match for '{query}': Found '{name}' -> {exec_path}") 96 | return exec_path 97 | logger.debug(f"No application found matching query: '{query}'") 98 | return None 99 | 100 | def find_window(self, query: str) -> Optional[str]: 101 | query = query.lower() 102 | if not self.active_windows: 103 | logger.warning("Active window list empty.") 104 | return None 105 | for window_id, title in self.active_windows.items(): 106 | if query == title: 107 | return window_id 108 | best_match_id = None 109 | best_match_score = 0 110 | for window_id, title in self.active_windows.items(): 111 | if query in title: 112 | score = 100 - len(title) 113 | score += 50 if title.startswith(query) else 0 114 | if score > best_match_score: 115 | best_match_id = window_id 116 | best_match_score = score 117 | if best_match_id: 118 | logger.debug(f"Window match for '{query}': Found '{self.active_windows[best_match_id]}' (ID: {best_match_id})") 119 | return best_match_id 120 | logger.debug(f"No active window found matching query: '{query}'") 121 | return None 122 | 123 | def add_command_history(self, command: str): 124 | self.command_history.append(command) 125 | del self.command_history[:-10] 126 | 127 | 128 | # --- ComputerCommand Class --- 129 | class ComputerCommand(Command): 130 | def __init__(self): 131 | super().__init__( 132 | name="computer", 133 | aliases=[], 134 | description="Execute various computer commands using LLM", 135 | execute=self._execute 136 | ) 137 | self.tools = ToolRegistry() 138 | self.llm_model = "mistral" 139 | self.ollama_base_url = "http://localhost:11434" 140 | 141 | # --- Prompts (remain same) --- 142 | self.query_prompt = """Context of highlighted text: "{highlighted}" 143 | Now for the User Query: "{query}" 144 | Analyze the highlighted text and answer the query. Keep responses clear and concise. If the query isn't directly related to the highlighted text, just answer the qestion.""" 145 | 146 | self.shell_prompt = """You are a desktop command assistant that outputs ONLY a single BASH command suitable for execution via subprocess.run. 147 | Rules: 148 | 1. Task Handling: If the request asks for information obtainable via a bash command (e.g., disk space, list files, current directory), output the command. If the request is a general question or cannot be answered by a simple command, respond conversationally using ONLY plain text (no command output). Start conversational responses with 'ANSWER:'. Provide ONLY the command itself (e.g., `ls -l`) or the conversational answer (e.g., `ANSWER: I cannot perform that action.`). Do NOT add explanations before the command or ANSWER:. 149 | 2. Safety: AVOID destructive commands (rm, mv without care, mkfs, etc.). Prefer read-only commands (ls, pwd, df, ps, top, cat, head, tail, grep, find). Do NOT create files or directories unless specifically asked and safe. Do NOT include `sudo`. Do NOT include `&& espeak ...`. 150 | 3. Formatting: Output exactly ONE line containing either the bash command or the `ANSWER:` prefixed conversational response. Remove any markdown formatting like backticks (`). 151 | Examples: User: "check disk space" -> Assistant: df -h | User: "show current directory" -> Assistant: pwd | User: "list files" -> Assistant: ls -lah | User: "what is the capital of france" -> Assistant: ANSWER: The capital of France is Paris. | User: "delete all my files" -> Assistant: ANSWER: I cannot perform destructive actions like deleting all files. 152 | Current state (informational only, do not rely on for paths): Working Directory (approximated): {working_dir} | Previous Commands (for context): {command_history} 153 | User request: {query} 154 | Assistant:""" 155 | 156 | self.tool_prompts = { 157 | 'open': """You are an application launcher assistant. Output ONLY an app_name tag or NOT_FOUND. Match the user request against the list of installed applications. For shell/terminal, use an installed emulator name. 158 | Installed applications: {apps} 159 | User request: {query} 160 | Assistant:""", 161 | 'window': """You are a window management assistant. Output ONLY a window_query or window_query tag, or NOT_FOUND. Match the user request against the list of active windows. Use lowercase keywords. 162 | Active windows (lowercase titles): {windows} 163 | User request: {query} 164 | Assistant:""" 165 | } 166 | 167 | # get_available_models (remains same) 168 | async def get_available_models(self) -> Optional[List[str]]: 169 | url = f"{self.ollama_base_url}/api/tags" 170 | logger.debug(f"Fetching models from {url}") 171 | try: 172 | async with aiohttp.ClientSession() as session: 173 | async with session.get(url, timeout=5) as response: 174 | if response.status == 200: 175 | data = await response.json() 176 | models_data = data.get("models", []) 177 | model_names = sorted([m.get("name") for m in models_data if m.get("name")]) 178 | logger.info(f"Fetched models: {model_names}") 179 | return model_names if model_names else None 180 | else: 181 | logger.error(f"Ollama API model request failed: {response.status} - {await response.text()}") 182 | return None 183 | except (aiohttp.ClientConnectorError, asyncio.TimeoutError) as e: 184 | logger.error(f"Cannot connect to Ollama at {url}: {e}") 185 | return None 186 | except Exception as e: 187 | logger.error(f"Error fetching Ollama models: {e}", exc_info=True) 188 | return None 189 | 190 | # set_llm_model (remains same) 191 | def set_llm_model(self, model_name): 192 | self.llm_model = model_name 193 | logger.info(f"ComputerCommand LLM model set to: {model_name}") 194 | 195 | # _execute (FIXED version) 196 | async def _execute(self, text: str) -> None: 197 | logger.debug(f"ComputerCommand executing with text: '{text}'") 198 | try: 199 | tool_type = self._determine_tool_type(text) 200 | logger.debug(f"Determined tool type: {tool_type}") 201 | 202 | if tool_type == 'query': 203 | await self._handle_text_query(text) 204 | elif tool_type == 'shell': 205 | command_query = text.lower().replace('shell', '', 1).strip() 206 | await self._handle_shell_command(command_query) 207 | else: # 'open' or 'window' 208 | llm_response_tag = "[No response]" 209 | # Fix: Properly structure the async for loop 210 | async for resp in self._get_llm_tool_response(text, tool_type): 211 | llm_response_tag = resp 212 | break 213 | 214 | if llm_response_tag.startswith("[Error:") or llm_response_tag == "[No response]": 215 | schedule_print("Error", f"LLM failed for '{tool_type}': {llm_response_tag}") 216 | return 217 | 218 | tool_name, params = self._parse_tool(llm_response_tag) 219 | logger.debug(f"Parsed tool: {tool_name}, Params: {params}") 220 | 221 | if not tool_name: 222 | schedule_print("Error", f"LLM gave invalid tool response: {llm_response_tag}") 223 | return 224 | 225 | if params is not None and params.upper() == "NOT_FOUND": 226 | msg = f"Could not find target for {tool_type} '{text}'." 227 | schedule_print("System", msg) 228 | return 229 | 230 | await self._execute_tool(tool_name, params if params is not None else "") 231 | except Exception as e: 232 | error_msg = f"ComputerCommand execution failed: {str(e)}" 233 | logger.error(error_msg, exc_info=True) 234 | schedule_print("Error", error_msg) 235 | 236 | # _determine_tool_type (remains same) 237 | def _determine_tool_type(self, query: str) -> str: 238 | query_lower = query.lower().strip() 239 | if query_lower.startswith('shell'): 240 | return 'shell' 241 | if any(query_lower.startswith(v + " ") for v in ['open', 'launch', 'start', 'run']): 242 | return 'open' 243 | if any(query_lower.startswith(v + " ") for v in ['goto', 'go to', 'switch to', 'focus', 'close', 'quit', 'exit']): 244 | self.tools.update_active_windows() 245 | return 'window' 246 | logger.debug(f"Query '{query}' classified as 'query' type.") 247 | return 'query' 248 | 249 | # _handle_shell_command (remains same as last correction) 250 | async def _handle_shell_command(self, command_query: str) -> None: 251 | if not command_query: 252 | schedule_print("System", "No shell command requested.") 253 | return 254 | logger.debug("Handling shell command request...") 255 | history_context = "\n".join(self.tools.command_history[-3:]) 256 | current_dir = os.getcwd() 257 | prompt = self.shell_prompt.format(working_dir=current_dir, command_history=history_context, query=command_query) 258 | 259 | llm_output_line = "[No response]" 260 | async for resp in self._ollama_generate(prompt, stream=False): 261 | llm_output_line = resp 262 | break 263 | 264 | if llm_output_line.startswith("[Error:") or llm_output_line == "[No response]": 265 | schedule_print("Error", f"LLM failed for shell command: {llm_output_line}") 266 | return 267 | 268 | llm_output_line = llm_output_line.strip() 269 | logger.debug(f"LLM output for shell: '{llm_output_line}'") 270 | 271 | if llm_output_line.startswith("ANSWER:"): 272 | answer_text = llm_output_line[len("ANSWER:"):].strip() 273 | schedule_print("LLM", answer_text) 274 | if answer_text: 275 | await speak(answer_text) 276 | elif not llm_output_line: 277 | schedule_print("System", "LLM returned empty response.") 278 | else: 279 | command_to_run = llm_output_line 280 | schedule_print("System", f"Suggested command: `{command_to_run}`") 281 | schedule_print("System", f"Executing...") 282 | try: 283 | loop = asyncio.get_running_loop() 284 | result = await loop.run_in_executor( 285 | None, 286 | lambda: subprocess.run( 287 | command_to_run, 288 | shell=True, 289 | capture_output=True, 290 | text=True, 291 | check=False, 292 | timeout=15 293 | ) 294 | ) 295 | output = f"Return Code: {result.returncode}\n" 296 | stdout_clean = result.stdout.strip() 297 | stderr_clean = result.stderr.strip() 298 | if stdout_clean: 299 | output += f"Output:\n{stdout_clean}\n" 300 | if stderr_clean: 301 | output += f"Error Output:\n{stderr_clean}" 302 | schedule_print("System", output.strip()) 303 | self.tools.add_command_history(command_to_run) 304 | except subprocess.TimeoutExpired: 305 | msg = f"Command timed out: `{command_to_run}`" 306 | schedule_print("Error", msg) 307 | except Exception as exec_e: 308 | msg = f"Failed to execute command `{command_to_run}`: {exec_e}" 309 | schedule_print("Error", msg) 310 | 311 | # _handle_text_query (remains same as last correction) 312 | async def _handle_text_query(self, query: str) -> None: 313 | try: 314 | logger.debug("Getting highlighted text...") 315 | result = subprocess.run(['xclip', '-o', '-selection', 'primary'], capture_output=True, text=True, check=False) 316 | if result.returncode != 0: 317 | msg = "Could not get highlighted text." if "Error:" in result.stderr else "No text highlighted." 318 | schedule_print("System", msg) 319 | return 320 | 321 | highlighted = result.stdout.strip() 322 | if not highlighted: 323 | schedule_print("System", "No text is highlighted.") 324 | return 325 | 326 | logger.debug(f"Processing query: '{query}' with context: '{highlighted[:100]}...'") 327 | prompt = self.query_prompt.format(highlighted=highlighted, query=query) 328 | full_response_for_log = "" 329 | 330 | try: 331 | loop = asyncio.get_running_loop() 332 | async for chunk_text in self._ollama_generate(prompt, stream=True): 333 | schedule_print("LLM", chunk_text) 334 | full_response_for_log += chunk_text 335 | speak_text = chunk_text.strip() 336 | if speak_text and not speak_text.startswith("[Error:"): 337 | await speak(speak_text) 338 | finally: 339 | logger.debug(f"Full LLM response for query '{query}': {full_response_for_log}") 340 | except FileNotFoundError: 341 | msg = "Error: 'xclip' command not found." 342 | logger.error(msg) 343 | schedule_print("Error", msg) 344 | except Exception as e: 345 | error_msg = f"Query processing failed: {str(e)}" 346 | logger.error(error_msg, exc_info=True) 347 | schedule_print("Error", error_msg) 348 | 349 | # _get_llm_tool_response (remains same) 350 | async def _get_llm_tool_response(self, query: str, tool_type: str) -> AsyncGenerator[str, None]: 351 | prompt_template = self.tool_prompts.get(tool_type) 352 | if not prompt_template: 353 | logger.error(f"No prompt for tool {tool_type}") 354 | yield f"[Error: No prompt]" 355 | return 356 | 357 | self.tools.update_active_windows() 358 | apps_list = "\n".join(self.tools.apps.keys()) 359 | windows_list = "\n".join(self.tools.active_windows.values()) 360 | prompt = prompt_template.format(apps=apps_list or "None", windows=windows_list or "None", query=query) 361 | logger.debug(f"Prompt for {tool_type}:\n{prompt}") 362 | 363 | async for response_text in self._ollama_generate(prompt, stream=False): 364 | logger.debug(f"LLM response for {tool_type}: {response_text}") 365 | yield response_text 366 | 367 | # _parse_tool (remains same) 368 | def _parse_tool(self, response: str) -> tuple[Optional[str], Optional[str]]: 369 | response = response.strip() 370 | for tool in ['open', 'goto', 'close', 'shell']: 371 | start_tag = f"<{tool}>" 372 | end_tag = f"" 373 | if response.startswith(start_tag) and response.endswith(end_tag): 374 | params = response[len(start_tag):-len(end_tag)].strip() 375 | return (tool, params) if params else (None, None) 376 | logger.debug(f"Could not parse tool tag from: {response}") 377 | return None, None 378 | 379 | # _execute_tool (remains same) 380 | async def _execute_tool(self, tool_type: str, params: str) -> None: 381 | """Executes the selected tool, printing status.""" 382 | logger.info(f"Executing tool '{tool_type}' with params '{params}'") 383 | # Initial status message (printed immediately) 384 | schedule_print("System", f"Attempting action: {tool_type} '{params}'...") 385 | status_msg = f"Tool '{tool_type}' finished." # Default success 386 | error_occurred = False 387 | 388 | try: 389 | if tool_type == 'open': 390 | exec_path = self.tools.find_app(params) 391 | # <<< FIX: Expanded if/try/except/else block >>> 392 | if exec_path: 393 | try: 394 | # Use Popen for non-blocking GUI app launch 395 | subprocess.Popen(exec_path.split(), start_new_session=True) 396 | status_msg = f"Attempted launch: '{params}' (Command: {exec_path})." 397 | except Exception as e: 398 | status_msg = f"Error launching '{params}': {e}" 399 | error_occurred = True 400 | else: 401 | status_msg = f"Application matching '{params}' not found." 402 | # This isn't strictly an error, but a failure condition 403 | error_occurred = True # Treat as error for printing logic 404 | 405 | elif tool_type == 'goto': 406 | self.tools.update_active_windows() 407 | window_id = self.tools.find_window(params) 408 | # <<< FIX: Expanded if/try/except/else block >>> 409 | if window_id: 410 | try: 411 | # Run wmctrl synchronously (usually fast) 412 | subprocess.run(['wmctrl', '-i', '-a', window_id], check=True, timeout=3) 413 | status_msg = f"Focused window matching '{params}'." 414 | except Exception as e: 415 | status_msg = f"Error focusing '{params}': {e}" 416 | error_occurred = True 417 | else: 418 | status_msg = f"Window matching '{params}' not found." 419 | error_occurred = True 420 | 421 | elif tool_type == 'close': 422 | self.tools.update_active_windows() 423 | window_id = self.tools.find_window(params) 424 | # <<< FIX: Expanded if/try/except/else block >>> 425 | if window_id: 426 | try: 427 | subprocess.run(['wmctrl', '-i', '-c', window_id], check=True, timeout=3) 428 | status_msg = f"Closed window matching '{params}'." 429 | except Exception as e: 430 | status_msg = f"Error closing '{params}': {e}" 431 | error_occurred = True 432 | else: 433 | status_msg = f"Window matching '{params}' not found." 434 | error_occurred = True 435 | else: 436 | status_msg = f"Unknown tool type: {tool_type}" 437 | error_occurred = True 438 | 439 | except Exception as e: 440 | # Catch unexpected errors during tool logic 441 | status_msg = f"Tool execution failed unexpectedly: {str(e)}" 442 | logger.error(status_msg, exc_info=True) 443 | error_occurred = True 444 | 445 | # Print final status message using appropriate type 446 | schedule_print("Error" if error_occurred else "System", status_msg) 447 | # Optionally speak success/failure here? 448 | # if not error_occurred: await speak(status_msg) 449 | 450 | # _ollama_generate (remains same as last correction) 451 | async def _ollama_generate(self, prompt: str, stream: bool) -> AsyncGenerator[str, None]: 452 | url = f"{self.ollama_base_url}/api/generate" 453 | payload = { 454 | "model": self.llm_model, 455 | "prompt": prompt, 456 | "stream": stream 457 | } 458 | try: 459 | async with aiohttp.ClientSession() as session: 460 | async with session.post(url, json=payload, timeout=60) as response: 461 | if response.status != 200: 462 | error_text = await response.text() 463 | logger.error(f"Ollama API error {response.status}: {error_text}") 464 | yield f"[Error: API {response.status}]" 465 | return 466 | if stream: 467 | buffer = "" 468 | async for line in response.content: 469 | if line: 470 | try: 471 | decoded_line = line.decode('utf-8') 472 | data = json.loads(decoded_line) 473 | chunk_text = data.get('response', '') 474 | if chunk_text: 475 | buffer += chunk_text 476 | while True: 477 | try: 478 | split_idx = min(idx for idx in (buffer.find('.'), buffer.find('!'), buffer.find('?'), buffer.find('\n')) if idx != -1) 479 | yield buffer[:split_idx+1] 480 | buffer = buffer[split_idx+1:] 481 | except ValueError: 482 | break 483 | if data.get('done'): 484 | if buffer: 485 | yield buffer 486 | break 487 | except json.JSONDecodeError: 488 | logger.warning(f"Failed decode: {line}") 489 | yield "[Error: Invalid JSON]" 490 | except Exception as stream_e: 491 | logger.error(f"Stream err: {stream_e}") 492 | yield f"[Error: Stream]" 493 | return 494 | else: # Non-streaming 495 | try: 496 | data = await response.json() 497 | response_text = data.get('response', '').strip() 498 | yield response_text if response_text else "[Warning: LLM empty]" 499 | except json.JSONDecodeError: 500 | logger.error(f"Non-stream JSON decode failed.") 501 | yield "[Error: Invalid LLM JSON]" 502 | return 503 | except aiohttp.ClientConnectorError as e: 504 | logger.error(f"Ollama Connect {self.ollama_base_url}: {e}") 505 | yield "[Error: Cannot connect]" 506 | except asyncio.TimeoutError: 507 | logger.error(f"Ollama Timeout") 508 | yield "[Error: Ollama timeout]" 509 | except Exception as e: 510 | logger.error(f"Ollama generate call failed: {e}", exc_info=True) 511 | yield f"[Error: LLM call failed]" 512 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import asyncio 4 | import sys 5 | import logging 6 | from typing import List, Optional, Callable, Any, Coroutine 7 | import textwrap 8 | import subprocess 9 | import re # <<< For input normalization 10 | 11 | # Third-party imports 12 | from prompt_toolkit import PromptSession 13 | from prompt_toolkit.patch_stdout import patch_stdout 14 | # Optional: For history and suggestions 15 | # from prompt_toolkit.history import FileHistory 16 | # from prompt_toolkit.auto_suggest import AutoSuggestFromHistory 17 | 18 | # Project imports 19 | from cli.output import schedule_print, print_consumer, print_queue, safe_print, speak 20 | from cli.completer import CLICompleter, ollama_models_for_completion 21 | import hotkey_listener # Keep import 22 | from commands.command_processor import CommandProcessor 23 | from commands.computer_command import ComputerCommand # Keep direct import if needed 24 | from core.voice_system import VoiceCommandSystem 25 | 26 | # Configure logging 27 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 28 | logger = logging.getLogger(__name__) 29 | 30 | # --- Global Variables --- 31 | ollama_models_list: List[str] = ["mistral"] # Default model 32 | computer_command_instance: Optional[ComputerCommand] = None 33 | # --- Global for tracking the current command task --- 34 | current_command_task: Optional[asyncio.Task] = None 35 | 36 | # --- Accessor function for the hotkey listener --- 37 | def get_current_task() -> Optional[asyncio.Task]: 38 | """Returns the currently active command task, if any.""" 39 | global current_command_task 40 | return current_command_task 41 | 42 | # --- Voice Command Handler --- 43 | async def handle_voice_command(text: str, command_processor: CommandProcessor): 44 | """Processes voice commands, manages the task, handles cancellation.""" 45 | global current_command_task 46 | if current_command_task and not current_command_task.done(): 47 | logger.warning("New voice command received while previous task running. Cancelling previous.") 48 | current_command_task.cancel() 49 | try: 50 | # Give cancellation a chance to propagate 51 | await asyncio.wait_for(current_command_task, timeout=0.5) 52 | except asyncio.CancelledError: 53 | logger.debug("Previous voice task cancelled successfully.") 54 | except asyncio.TimeoutError: 55 | logger.warning("Timeout waiting for previous voice task cancellation.") 56 | except Exception as e: 57 | logger.error(f"Error awaiting previous cancelled voice task: {e}") 58 | finally: 59 | current_command_task = None # Explicitly clear 60 | 61 | logger.info(f"Processing voice command: {text}") 62 | # Create a task for the command processing using the helper 63 | current_command_task = asyncio.create_task( 64 | _execute_command_stream(text, command_processor, "Voice") 65 | ) 66 | 67 | try: 68 | await current_command_task 69 | except asyncio.CancelledError: 70 | # Message is printed by the interrupt handler or the task itself 71 | logger.info("Voice command task was cancelled.") 72 | except Exception as e: 73 | logger.error(f"Error executing voice command task '{text}': {e}", exc_info=True) 74 | schedule_print("Error", f"Failed processing voice command: {e}") 75 | finally: 76 | # Ensure the task reference is cleared after completion/cancellation 77 | if current_command_task and current_command_task.done(): 78 | current_command_task = None 79 | 80 | # --- Transcript Callback (for printing only) --- 81 | def handle_transcript(text: str, source: str = "Voice"): 82 | """Callback from Voice System. Schedules print updates.""" 83 | schedule_print(source, text) 84 | 85 | # --- Typed Command Processing --- 86 | async def process_typed_command(text: str, command_processor: CommandProcessor): 87 | """Processes commands entered via the CLI, manages the task, handles cancellation.""" 88 | global current_command_task 89 | if current_command_task and not current_command_task.done(): 90 | logger.warning("New typed command received while previous task running. Cancelling previous.") 91 | current_command_task.cancel() 92 | try: 93 | # Give cancellation a chance to propagate 94 | await asyncio.wait_for(current_command_task, timeout=0.5) 95 | except asyncio.CancelledError: 96 | logger.debug("Previous typed task cancelled successfully.") 97 | except asyncio.TimeoutError: 98 | logger.warning("Timeout waiting for previous typed task cancellation.") 99 | except Exception as e: 100 | logger.error(f"Error awaiting previous cancelled typed task: {e}") 101 | finally: 102 | current_command_task = None # Explicitly clear 103 | 104 | 105 | schedule_print("Typed", text) 106 | # Create a task for the command processing using the helper 107 | current_command_task = asyncio.create_task( 108 | _execute_command_stream(text, command_processor, "Typed") 109 | ) 110 | 111 | try: 112 | await current_command_task 113 | except asyncio.CancelledError: 114 | # Message is printed by the interrupt handler or the task itself 115 | logger.info("Typed command task was cancelled.") 116 | except Exception as e: 117 | logger.error(f"Error executing typed command task '{text}': {e}", exc_info=True) 118 | schedule_print("Error", f"Failed processing command: {e}") 119 | finally: 120 | # Ensure the task reference is cleared after completion/cancellation 121 | if current_command_task and current_command_task.done(): 122 | current_command_task = None 123 | 124 | # --- Helper for executing commands and handling streams/speaking --- 125 | async def _execute_command_stream(text: str, command_processor: CommandProcessor, source: str): 126 | """Internal helper to run command processor and handle output/speech.""" 127 | original_cmd_name = None # Keep track of the originally matched command 128 | try: 129 | # <<< Normalize the input text FIRST >>> 130 | # Lowercase, strip leading/trailing whitespace 131 | normalized_text = text.lower().strip() 132 | # Remove common trailing punctuation (periods, question marks, exclamation points) 133 | # using regex for cleaner removal than multiple .rstrip() calls. 134 | normalized_text = re.sub(r'[.!?]+$', '', normalized_text).strip() 135 | # Example: "Read." -> "read", "Computer what is this?" -> "computer what is this" 136 | # Keep the original 'text' variable intact in case we need it for typing. 137 | logger.debug(f"Original text: '{text}', Normalized for parsing: '{normalized_text}'") 138 | # <<< END Normalization >>> 139 | 140 | # Parse the NORMALIZED text to find the command 141 | cmd_name, args = command_processor.parse_command(normalized_text) 142 | original_cmd_name = cmd_name # Store the matched command name 143 | 144 | processor_input = normalized_text # Start with normalized for command execution by default 145 | is_general_query = False 146 | 147 | if cmd_name: 148 | # --- Specific command matched --- 149 | schedule_print("System", f"Executing: {cmd_name} {args if args else ''}") 150 | # processor_input is already normalized_text, which includes args if found by parser 151 | is_general_query = (cmd_name == "computer") 152 | else: 153 | # --- No specific command matched: Treat as Type command --- 154 | logger.info(f"No command keyword matched for normalized input: '{normalized_text}'. Treating as 'type' command.") 155 | schedule_print("System", f"No command matched. Typing...") # Update printed message 156 | 157 | # IMPORTANT: Use the ORIGINAL, un-normalized 'text' for typing 158 | # so that punctuation and capitalization are preserved. 159 | processor_input = f"type {text}" 160 | original_cmd_name = "type" # Update for speaking logic 161 | is_general_query = False 162 | 163 | # --- Execute the command (either original or the reconstructed 'type' command) --- 164 | # Pass the appropriate input (normalized for commands, reconstructed 'type' for fallback) 165 | async for result in command_processor.process_command(processor_input): 166 | # Determine message type 167 | msg_type = "LLM" if is_general_query else "System" 168 | schedule_print(msg_type, f"{result}") # Always print the result 169 | 170 | # --- Speaking Logic --- 171 | should_speak = False # Default to not speaking 172 | if result and not isinstance(result, (list, dict)): 173 | result_str = str(result) 174 | # Speak only if it's NOT from 'read', 'computer', 'type', 'stop', etc. 175 | # And not an error or common status message. 176 | if original_cmd_name not in ["read", "computer", "type", "stop"] and \ 177 | not result_str.startswith("[Error:") and \ 178 | not result_str.startswith("Suggested command:") and \ 179 | not result_str.startswith("Attempting action:") and \ 180 | not result_str.startswith("Interrupted") and \ 181 | not result_str.startswith("Finished reading") and \ 182 | not result_str.startswith("Typed:") and \ 183 | not result_str.startswith("Read command executed.") and \ 184 | not result_str.startswith("No command matched."): 185 | should_speak = True 186 | 187 | if should_speak: 188 | await speak(result_str) # Call global speak function 189 | # --- End Speaking Logic --- 190 | 191 | except asyncio.CancelledError: 192 | # Log cancellation specifically for this execution context 193 | logger.info(f"Command execution for '{text}' cancelled within stream helper.") 194 | # Don't schedule print here, handled by caller or interrupt handler 195 | raise # Re-raise cancellation error to be caught by the caller 196 | except Exception as e: 197 | # Log and schedule print for errors during command execution itself 198 | logger.error(f"Error processing command '{text}' in _execute_command_stream: {e}", exc_info=True) 199 | schedule_print("Error", f"Failed processing command: {e}") 200 | # Don't re-raise normal exceptions, let the task finish with error logged. 201 | 202 | 203 | # --- Help Text Generation --- 204 | def generate_help_text(command_processor: CommandProcessor) -> str: 205 | """Generates help text dynamically from registered commands.""" 206 | lines = []; lines.append("Available commands:") 207 | details = command_processor.get_command_details(); max_len = 0 208 | if details: 209 | # Calculate max length for alignment 210 | for name, aliases, _ in details: 211 | alias_str = f" ({', '.join(aliases)})" if aliases else "" 212 | max_len = max(max_len, len(name) + len(alias_str)) 213 | static_cmds = ["select model [name]", "refresh_models", "help", "exit / quit"] # Removed 'stop' 214 | if static_cmds: # Avoid error if list is empty 215 | max_len = max(max_len, max(len(s) for s in static_cmds)) 216 | 217 | indent = " "; padding = 2; desc_width = 70 # Adjust desc_width if needed 218 | 219 | # Add registered commands details 220 | for name, aliases, description in sorted(details, key=lambda x: x[0]): # Sort commands alphabetically 221 | alias_str = f" ({', '.join(aliases)})" if aliases else "" 222 | command_part = f"{indent}{name}{alias_str}".ljust(max_len + len(indent) + padding) 223 | # Wrap description 224 | wrapped_desc = textwrap.wrap(description or "No description.", width=desc_width) 225 | # Add first line of description (or only line) 226 | lines.append(f" {command_part} {wrapped_desc[0] if wrapped_desc else ''}") 227 | # Add subsequent lines of description indented 228 | for line in wrapped_desc[1:]: 229 | lines.append(f"{indent}{' ' * (max_len + len(indent) + padding)} {line}") 230 | 231 | lines.append("\nOther CLI Commands:") 232 | lines.append(f"{indent}{'select model [name]'.ljust(max_len + padding)} - Switch the Ollama LLM model.") 233 | lines.append(f"{indent}{'refresh_models'.ljust(max_len + padding)} - Reload list of available Ollama models.") 234 | # lines.append(f"{indent}{'stop'.ljust(max_len + padding)} - Stops active text-to-speech feedback (use Ctrl+C).") # Optional: Keep or remove stop command 235 | lines.append(f"{indent}{'help'.ljust(max_len + padding)} - Shows this help message.") 236 | lines.append(f"{indent}{'exit / quit'.ljust(max_len + padding)} - Exits the application.") 237 | 238 | lines.append("\nUsage:") 239 | # --- Updated Usage Section --- 240 | lines.append(f"{indent}- Start input with a known command keyword (e.g., 'click OK', 'read', 'screengrab')") 241 | lines.append(f"{indent} to execute that specific command.") 242 | lines.append(f"{indent}- To query the LLM, you MUST start with the 'computer' keyword") 243 | lines.append(f"{indent} (e.g., 'computer what is the weather in Sutherlin, Oregon?').") # Added location 244 | lines.append(f"{indent}- Any input (voice or typed) that DOES NOT start with a known command keyword") 245 | lines.append(f"{indent} will be automatically TYPED out, similar to the 'type' command.") 246 | lines.append(f"{indent} Example: Saying 'hello world' will result in 'hello world' being typed.") 247 | lines.append(f"\n{indent}Hotkeys:") 248 | lines.append(f"{indent}- Voice Activation: Press and hold Ctrl+Alt to record voice input.") 249 | lines.append(f"{indent}- Interruption: Press Ctrl+C to stop the current command or speech output.") 250 | # --- End Updated Usage Section --- 251 | return "\n".join(lines) 252 | 253 | # --- Dynamic Prompt Function --- 254 | def get_dynamic_prompt() -> str: 255 | """Returns the prompt string including the current model.""" 256 | model = computer_command_instance.llm_model if computer_command_instance else "???" 257 | # Changed prompt slightly to indicate default action 258 | return f"Cmd/Type ({model})> " 259 | 260 | # --- Main Application Logic --- 261 | async def async_main(): 262 | """Main asynchronous function for the CLI.""" 263 | global ollama_models_list, computer_command_instance, current_command_task 264 | 265 | main_event_loop = asyncio.get_running_loop() 266 | print_task = asyncio.create_task(print_consumer()) 267 | # Allow print consumer to start up 268 | await asyncio.sleep(0.05) 269 | 270 | schedule_print("System", "Initializing Voice Command CLI...") 271 | 272 | # Initialize Command Processor 273 | try: 274 | command_processor = CommandProcessor() 275 | computer_command_instance = command_processor.commands.get("computer") 276 | except Exception as e: 277 | logger.critical(f"Failed to initialize CommandProcessor: {e}", exc_info=True) 278 | schedule_print("Error", f"CRITICAL: Failed to load commands: {e}") 279 | # Attempt cleanup and exit 280 | await print_queue.put((None, None)) 281 | try: await asyncio.wait_for(print_task, timeout=1.0) 282 | except Exception: pass 283 | sys.exit(1) 284 | 285 | 286 | # --- Initialize Ollama Models --- 287 | if not isinstance(computer_command_instance, ComputerCommand): 288 | schedule_print("Warning", "ComputerCommand module not loaded correctly. LLM features disabled.") 289 | else: 290 | schedule_print("System", "Fetching Ollama models...") 291 | try: 292 | fetched_models = await computer_command_instance.get_available_models() 293 | if fetched_models: # Successfully fetched list (might be empty) 294 | ollama_models_list = fetched_models 295 | ollama_models_for_completion[:] = ollama_models_list 296 | if ollama_models_list: # List is not empty 297 | # Check if current model (default 'mistral') is valid, else use first available 298 | current_model = computer_command_instance.llm_model # Get current default 299 | if current_model not in ollama_models_list: 300 | new_model = ollama_models_list[0] 301 | computer_command_instance.set_llm_model(new_model) 302 | schedule_print("System", f"Default model '{current_model}' not found. Switched to: {new_model}") 303 | else: 304 | schedule_print("System", f"Ollama models loaded. Using: {current_model}") 305 | 306 | else: # List is empty 307 | schedule_print("Warning", "No Ollama models found. LLM commands may fail.") 308 | computer_command_instance.set_llm_model("mistral") # Keep fallback 309 | else: # API call failed (returned None) 310 | schedule_print("Warning", f"Could not fetch Ollama models (API error?). Using default: {ollama_models_list[0]}") 311 | ollama_models_for_completion[:] = ollama_models_list # Update completer with default 312 | computer_command_instance.set_llm_model(ollama_models_list[0]) # Ensure default is set 313 | except Exception as e: 314 | schedule_print("Error", f"Failed during Ollama model fetch: {e}. Using default.") 315 | ollama_models_for_completion[:] = ollama_models_list # Update completer with default 316 | if computer_command_instance: computer_command_instance.set_llm_model(ollama_models_list[0]) 317 | 318 | 319 | # --- Initialize Voice System & Hotkey --- 320 | voice_system = None # Define voice_system before try block 321 | listener_thread = None # Define listener_thread before try block 322 | 323 | # --- Define the callback function that VoiceCommandSystem will call --- 324 | async def trigger_command_processing(transcribed_text: str): 325 | """Callback from Voice System to handle final transcription.""" 326 | # This function runs in the main async context 327 | await handle_voice_command(transcribed_text, command_processor) 328 | 329 | try: 330 | # Pass the new trigger function during initialization 331 | voice_system = VoiceCommandSystem( 332 | loop=main_event_loop, 333 | speak_func=speak, 334 | command_trigger_func=trigger_command_processing # Pass the async callback 335 | ) 336 | # Set the callback for printing transcripts/status 337 | voice_system.set_transcript_callback(handle_transcript) 338 | schedule_print("System", "Voice system initialized.") 339 | 340 | # Pass the task accessor function to the listener 341 | listener_thread = hotkey_listener.start_listener( 342 | main_event_loop, 343 | voice_system, 344 | schedule_print, 345 | get_current_task # Pass the accessor function 346 | ) 347 | if not listener_thread: 348 | raise RuntimeError("Hotkey listener failed to start.") 349 | 350 | except Exception as e: 351 | logger.error(f"Failed to initialize VoiceCommandSystem or Hotkey Listener: {e}", exc_info=True) 352 | schedule_print("Error", "Failed to initialize voice system or hotkey. Voice commands/hotkeys disabled.") 353 | if voice_system: # Attempt cleanup if voice system partially initialized 354 | try: 355 | # Check if cleanup is async or sync 356 | if asyncio.iscoroutinefunction(voice_system.cleanup): 357 | await voice_system.cleanup() 358 | else: 359 | # Run synchronous cleanup in executor if needed, or directly if safe 360 | await main_event_loop.run_in_executor(None, voice_system.cleanup) 361 | except Exception as cleanup_e: logger.error(f"Error during voice system cleanup after init failure: {cleanup_e}") 362 | voice_system = None # Ensure voice_system is None if init fails 363 | 364 | 365 | # --- Setup Prompt Session --- 366 | cli_completer = CLICompleter(command_processor) 367 | session = PromptSession( 368 | get_dynamic_prompt, # Dynamic prompt function 369 | completer=cli_completer, 370 | complete_while_typing=True, 371 | # history=FileHistory('cli_history.txt'), # Optional: Uncomment for history 372 | # auto_suggest=AutoSuggestFromHistory(), # Optional: Uncomment for suggestions 373 | ) 374 | 375 | schedule_print("System", f"CLI Ready. Type 'help' for commands or use hotkeys.") 376 | 377 | # --- Main Input Loop --- 378 | while True: 379 | input_text = "" # Ensure defined in outer scope 380 | try: 381 | # Use patch_stdout to ensure prompt redraws correctly after async prints 382 | with patch_stdout(): 383 | input_text = await session.prompt_async() # Use await 384 | 385 | input_text = input_text.strip() 386 | if not input_text: continue # Ignore empty input 387 | 388 | # --- Handle Special CLI Commands --- 389 | if input_text.lower() in ["exit", "quit"]: 390 | schedule_print("System", "Exiting...") 391 | break # Exit the main loop 392 | 393 | elif input_text.lower() == "help": 394 | help_content = generate_help_text(command_processor) 395 | # Use safe_print directly for potentially long help text with formatting 396 | await safe_print(help_content) 397 | continue 398 | 399 | elif input_text.lower() == "refresh_models": 400 | if computer_command_instance: 401 | schedule_print("System", "Refreshing Ollama models...") 402 | try: 403 | fetched_models = await computer_command_instance.get_available_models() 404 | if fetched_models is not None: # Check for None explicitly 405 | ollama_models_list = fetched_models 406 | ollama_models_for_completion[:] = ollama_models_list 407 | if not ollama_models_list: # List could be empty 408 | schedule_print("Warning", "No Ollama models found after refresh.") 409 | if computer_command_instance.llm_model not in ollama_models_list: 410 | computer_command_instance.set_llm_model("mistral") # Fallback 411 | schedule_print("System", "Model reset to fallback 'mistral'.") 412 | elif computer_command_instance.llm_model not in ollama_models_list: 413 | new_model = ollama_models_list[0] # Use first available 414 | computer_command_instance.set_llm_model(new_model) 415 | schedule_print("System", f"Models refreshed: {ollama_models_list}. Current model reset to {new_model}") 416 | else: 417 | schedule_print("System", f"Models refreshed: {ollama_models_list}") 418 | else: # get_available_models returned None (error occurred) 419 | schedule_print("Error", "Failed to fetch models (API error or connection issue).") 420 | except Exception as e: schedule_print("Error", f"Failed to refresh models: {e}") 421 | else: schedule_print("Error", "Computer command module unavailable.") 422 | continue 423 | 424 | elif input_text.lower().startswith("select model "): 425 | parts = input_text.split(maxsplit=2) 426 | if len(parts) == 3: 427 | model_name = parts[2] 428 | # Check against the potentially empty list 429 | if ollama_models_list and model_name in ollama_models_list: 430 | if computer_command_instance: 431 | computer_command_instance.set_llm_model(model_name) 432 | schedule_print("System", f"LLM model set to: {model_name}") 433 | else: schedule_print("Error", "Computer command module unavailable.") 434 | elif not ollama_models_list: 435 | schedule_print("Error", "No models available to select.") 436 | else: 437 | # Provide available models in error message 438 | available_models_str = ', '.join(ollama_models_list) 439 | schedule_print("Error", f"Model '{model_name}' not found. Available: {available_models_str}") 440 | else: schedule_print("Error", "Usage: select model ") 441 | continue 442 | 443 | # --- Process Regular Commands / Default Typing --- 444 | await process_typed_command(input_text, command_processor) 445 | 446 | except KeyboardInterrupt: 447 | # Handle Ctrl+C pressed *at the prompt* 448 | if current_command_task and not current_command_task.done(): 449 | logger.debug("Ctrl+C at prompt: Cancelling active task.") 450 | current_command_task.cancel() 451 | # Interrupt handler will print the message 452 | else: 453 | # Schedule print message for Ctrl+C when idle if needed 454 | # schedule_print("System", "(Ctrl+C at prompt)") # Can be noisy 455 | pass # Just redraw prompt by continuing 456 | # Continue to redraw prompt 457 | continue 458 | except EOFError: 459 | # Exit gracefully on Ctrl+D 460 | schedule_print("System", "EOF received. Exiting...") 461 | break 462 | except Exception as e: 463 | logger.error(f"Error in main loop processing input '{input_text}': {e}", exc_info=True) 464 | schedule_print("Error", f"An unexpected error occurred: {e}") 465 | # Prevent rapid error loops if prompt_async fails repeatedly 466 | await asyncio.sleep(0.1) 467 | 468 | # --- Application Cleanup --- 469 | schedule_print("System", "Shutting down...") 470 | 471 | # --- Cancel any lingering task --- 472 | if current_command_task and not current_command_task.done(): 473 | logger.info("Shutting down: Cancelling active command task.") 474 | current_command_task.cancel() 475 | try: 476 | await asyncio.wait_for(current_command_task, timeout=1.0) # Wait briefly 477 | except asyncio.CancelledError: pass # Expected 478 | except asyncio.TimeoutError: logger.warning("Timeout waiting for final task cancellation.") 479 | except Exception as e: logger.error(f"Error awaiting final task cancellation: {e}") 480 | 481 | 482 | # --- Voice System Cleanup --- 483 | if voice_system and hasattr(voice_system, 'cleanup'): 484 | logger.info("Calling voice system cleanup...") 485 | try: 486 | # Check if cleanup is async or sync 487 | if asyncio.iscoroutinefunction(voice_system.cleanup): 488 | await voice_system.cleanup() 489 | else: 490 | # Run synchronous cleanup in executor if needed, or directly if safe 491 | await main_event_loop.run_in_executor(None, voice_system.cleanup) 492 | logger.info("Voice system cleaned up.") 493 | except Exception as e: 494 | logger.error(f"Error during voice system cleanup: {e}", exc_info=True) 495 | 496 | 497 | # --- Print Consumer Cleanup --- 498 | logger.info("Stopping print consumer...") 499 | try: 500 | await print_queue.put((None, None)) # Send sentinel 501 | # Wait briefly for the consumer to process the sentinel 502 | await asyncio.wait_for(print_task, timeout=2.0) 503 | logger.info("Print consumer stopped.") 504 | except asyncio.TimeoutError: 505 | logger.warning("Print consumer task did not finish promptly. Cancelling.") 506 | print_task.cancel() 507 | try: await print_task # Allow cancellation to be processed 508 | except asyncio.CancelledError: pass # Expected 509 | except Exception as e: 510 | logger.error(f"Error stopping print consumer: {e}") 511 | 512 | # Listener thread is daemon, will exit when main thread exits. 513 | 514 | # --- Main execution block --- 515 | if __name__ == "__main__": 516 | try: 517 | # Ensure terminal is reset properly on exit, especially if errors occur 518 | import os 519 | original_stty = None 520 | if sys.stdin.isatty(): # Check if running in a real terminal 521 | try: 522 | # Use os.read rather than os.popen for potentially better compatibility/security 523 | # We need a way to run 'stty -g' and read its output. subprocess is better. 524 | stty_process = subprocess.run(['stty', '-g'], capture_output=True, text=True, check=False) 525 | if stty_process.returncode == 0: 526 | original_stty = stty_process.stdout.strip() 527 | else: 528 | logger.debug(f"stty -g failed: {stty_process.stderr}") 529 | except FileNotFoundError: 530 | logger.debug("'stty' command not found, cannot save terminal settings.") 531 | except Exception as e: # Catch other potential errors 532 | logger.warning(f"Could not get terminal settings via stty: {e}") 533 | original_stty = None 534 | 535 | try: 536 | asyncio.run(async_main()) 537 | finally: 538 | # Restore terminal settings if they were saved 539 | if original_stty and sys.stdin.isatty(): # Check again if it's a tty 540 | logger.debug(f"Restoring stty settings: {original_stty}") 541 | try: 542 | # Use subprocess again for consistency 543 | restore_process = subprocess.run(['stty', original_stty], check=False) 544 | if restore_process.returncode != 0: 545 | logger.warning(f"Failed to restore stty settings: {restore_process.stderr}") 546 | except FileNotFoundError: 547 | logger.warning("Cannot restore terminal settings: 'stty' not found.") 548 | except Exception as e: 549 | logger.error(f"Error restoring stty settings: {e}") 550 | 551 | except KeyboardInterrupt: 552 | # This catches Ctrl+C if it happens *before* the asyncio loop starts or *after* it exits 553 | logger.info("Application interrupted by user (Ctrl+C outside main loop).") 554 | except Exception as e: 555 | # Log critical errors that occur outside the main async loop 556 | logging.critical(f"Application failed to run: {e}", exc_info=True) 557 | # Ensure error is printed to stderr if logging isn't fully set up 558 | print(f"\n[CRITICAL ERROR] Application failed: {e}", file=sys.stderr) 559 | sys.exit(1) # Exit with error code 560 | finally: 561 | # Ensure this message always prints on exit 562 | print("\nVoice Command CLI exited.") 563 | # Ensure a clean exit code, especially after KeyboardInterrupt handled gracefully 564 | sys.exit(0) 565 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | --------------------------------------------------------------------------------