├── .env.example ├── agent_cli ├── py.typed ├── scripts ├── core │ ├── __init__.py │ ├── deps.py │ ├── watch.py │ ├── transcription_logger.py │ ├── sse.py │ ├── chroma.py │ ├── audio_format.py │ └── reranker.py ├── __main__.py ├── install │ ├── __init__.py │ ├── hotkeys.py │ ├── services.py │ └── common.py ├── __init__.py ├── agents │ ├── __init__.py │ ├── memory │ │ └── __init__.py │ ├── server.py │ └── _voice_agent_common.py ├── rag │ ├── __init__.py │ ├── _prompt.py │ ├── models.py │ ├── _store.py │ └── _indexer.py ├── constants.py ├── memory │ ├── __init__.py │ ├── entities.py │ ├── _streaming.py │ ├── _tasks.py │ ├── _filters.py │ ├── models.py │ ├── api.py │ └── _prompt.py ├── services │ ├── _wyoming_utils.py │ └── __init__.py └── cli.py ├── scripts ├── .runtime │ └── .gitkeep ├── __init__.py ├── macos-hotkeys │ ├── skhd-config-example │ ├── toggle-autocorrect.sh │ ├── toggle-transcription.sh │ ├── README.md │ └── toggle-voice-edit.sh ├── run-whisper-macos.sh ├── run-whisper.sh ├── run-openwakeword.sh ├── nvidia-asr-server │ ├── pyproject.toml │ ├── shell.nix │ └── README.md ├── run-piper.sh ├── run-whisper-linux.sh ├── linux-hotkeys │ ├── toggle-autocorrect.sh │ ├── toggle-voice-edit.sh │ ├── toggle-transcription.sh │ └── README.md ├── zellij_help.txt ├── setup-macos.sh ├── setup-macos-hotkeys.sh ├── setup-linux-hotkeys.sh ├── start-all-services.sh └── setup-linux.sh ├── tests ├── memory │ ├── __init__.py │ ├── test_api_health.py │ ├── test_files.py │ ├── test_indexer.py │ ├── test_utils.py │ └── test_proxy_passthrough.py ├── core │ ├── __init__.py │ ├── test_chroma.py │ ├── test_watch.py │ ├── test_sse.py │ └── test_audio_format.py ├── rag │ ├── __init__.py │ ├── test_store.py │ ├── test_retriever.py │ ├── test_history.py │ ├── test_rag_proxy_passthrough.py │ └── test_indexer.py ├── __init__.py ├── agents │ ├── __init__.py │ ├── test_tts_common_extra.py │ ├── test_wake_word_assistant.py │ ├── test_speak_e2e.py │ ├── test_transcribe_agent.py │ ├── test_voice_edit.py │ └── test_transcribe_e2e.py ├── mocks │ ├── __init__.py │ ├── llm.py │ ├── audio.py │ └── wyoming.py ├── test_env_vars.py ├── test_llm_gemini.py ├── test_tools.py ├── test_cli.py ├── test_wyoming_utils.py ├── conftest.py └── test_asr.py ├── example.agent-cli-config.toml ├── .github ├── release-drafter.yml ├── workflows │ ├── toc.yaml │ ├── release-drafter.yml │ ├── release.yml │ ├── automerge.yml │ ├── pytest.yml │ └── update-readme.yml └── renovate.json ├── docker ├── Dockerfile └── docker-compose.yml ├── shell.nix ├── .pre-commit-config.yaml ├── .gitignore ├── LICENSE ├── .cursorrules ├── docs └── installation │ ├── README.md │ ├── docker.md │ ├── windows.md │ └── macos.md └── pyproject.toml /.env.example: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_cli/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_cli/scripts: -------------------------------------------------------------------------------- 1 | ../scripts -------------------------------------------------------------------------------- /scripts/.runtime/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/memory/__init__.py: -------------------------------------------------------------------------------- 1 | """Memory tests.""" 2 | -------------------------------------------------------------------------------- /tests/core/__init__.py: -------------------------------------------------------------------------------- 1 | """Core helper tests.""" 2 | -------------------------------------------------------------------------------- /tests/rag/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for RAG module.""" 2 | -------------------------------------------------------------------------------- /example.agent-cli-config.toml: -------------------------------------------------------------------------------- 1 | agent_cli/example-config.toml -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for the agent-cli package.""" 2 | -------------------------------------------------------------------------------- /tests/agents/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for the agents package.""" 2 | -------------------------------------------------------------------------------- /agent_cli/core/__init__.py: -------------------------------------------------------------------------------- 1 | """Core functionalities for the agent CLI.""" 2 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | template: | 2 | ## What’s Changed 3 | 4 | $CHANGES 5 | -------------------------------------------------------------------------------- /tests/mocks/__init__.py: -------------------------------------------------------------------------------- 1 | """Test mocks package.""" 2 | 3 | from __future__ import annotations 4 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | """Scripts package containing installation and service management scripts.""" 2 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ollama/ollama 2 | 3 | COPY examples/run_ollama.sh /usr/local/bin/run_ollama.sh 4 | 5 | ENTRYPOINT ["run_ollama.sh"] 6 | -------------------------------------------------------------------------------- /agent_cli/__main__.py: -------------------------------------------------------------------------------- 1 | """Entry point for the Agent CLI.""" 2 | 3 | from agent_cli.cli import app 4 | 5 | if __name__ == "__main__": 6 | app() 7 | -------------------------------------------------------------------------------- /agent_cli/install/__init__.py: -------------------------------------------------------------------------------- 1 | """Installation commands for agent-cli.""" 2 | 3 | from __future__ import annotations 4 | 5 | __all__ = ["hotkeys", "services"] 6 | -------------------------------------------------------------------------------- /agent_cli/__init__.py: -------------------------------------------------------------------------------- 1 | """A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.""" 2 | 3 | from importlib.metadata import version 4 | 5 | __version__ = version("agent-cli") 6 | -------------------------------------------------------------------------------- /scripts/macos-hotkeys/skhd-config-example: -------------------------------------------------------------------------------- 1 | # Simple skhd configuration for transcription toggle 2 | # Save to ~/.config/skhd/skhdrc 3 | 4 | # Press Cmd+Shift+R to toggle transcription 5 | cmd + shift - r : /path/to/toggle-transcription.sh 6 | -------------------------------------------------------------------------------- /scripts/run-whisper-macos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # macOS: MLX-based Whisper for Apple Silicon 3 | echo "🎤 Starting Wyoming MLX Whisper on port 10300..." 4 | echo "🍎 Using MLX for Apple Silicon optimization" 5 | 6 | uvx --python 3.12 wyoming-mlx-whisper 7 | -------------------------------------------------------------------------------- /.github/workflows/toc.yaml: -------------------------------------------------------------------------------- 1 | on: push 2 | name: TOC Generator 3 | jobs: 4 | generateTOC: 5 | name: TOC Generator 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: technote-space/toc-generator@v4 9 | with: 10 | TOC_TITLE: "" 11 | -------------------------------------------------------------------------------- /scripts/run-whisper.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Wrapper that calls the platform-specific whisper script 3 | SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" 4 | 5 | if [ "$(uname -s)" = "Darwin" ]; then 6 | exec "$SCRIPT_DIR/run-whisper-macos.sh" 7 | else 8 | exec "$SCRIPT_DIR/run-whisper-linux.sh" 9 | fi 10 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | update_release_draft: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: release-drafter/release-drafter@v6 13 | env: 14 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 15 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | # nix-direnv file 2 | { pkgs ? import {}}: 3 | 4 | pkgs.mkShell { 5 | packages = [ 6 | pkgs.portaudio 7 | pkgs.ffmpeg 8 | pkgs.pkg-config 9 | pkgs.gcc 10 | pkgs.python3 11 | ]; 12 | 13 | shellHook = '' 14 | export LD_LIBRARY_PATH=${pkgs.lib.makeLibraryPath [ pkgs.portaudio ]}:$LD_LIBRARY_PATH 15 | ''; 16 | } 17 | -------------------------------------------------------------------------------- /agent_cli/agents/__init__.py: -------------------------------------------------------------------------------- 1 | """Agent implementations for the Agent CLI.""" 2 | 3 | from . import ( 4 | assistant, 5 | autocorrect, 6 | chat, 7 | memory, 8 | rag_proxy, 9 | server, 10 | speak, 11 | transcribe, 12 | voice_edit, 13 | ) 14 | 15 | __all__ = [ 16 | "assistant", 17 | "autocorrect", 18 | "chat", 19 | "memory", 20 | "rag_proxy", 21 | "server", 22 | "speak", 23 | "transcribe", 24 | "voice_edit", 25 | ] 26 | -------------------------------------------------------------------------------- /scripts/run-openwakeword.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo "👂 Starting Wyoming OpenWakeWord on port 10400..." 3 | 4 | # Use the LiteRT fork until the PR is merged 5 | # PR: https://github.com/rhasspy/wyoming-openwakeword/pull/XXX 6 | # This version works on macOS and other platforms without tflite-runtime 7 | 8 | uvx --python 3.12 --from git+https://github.com/basnijholt/wyoming-openwakeword.git@litert \ 9 | wyoming-openwakeword \ 10 | --uri 'tcp://0.0.0.0:10400' \ 11 | --preload-model 'ok_nabu' 12 | -------------------------------------------------------------------------------- /scripts/macos-hotkeys/toggle-autocorrect.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Toggle script for agent-cli autocorrect on macOS 4 | 5 | /opt/homebrew/bin/terminal-notifier -title "📝 Autocorrect" -message "Processing clipboard text..." 6 | 7 | OUTPUT=$("$HOME/.local/bin/agent-cli" autocorrect --quiet 2>/dev/null) 8 | if [ -n "$OUTPUT" ]; then 9 | /opt/homebrew/bin/terminal-notifier -title "✅ Corrected" -message "$OUTPUT" 10 | else 11 | /opt/homebrew/bin/terminal-notifier -title "❌ Error" -message "No text to correct" 12 | fi 13 | -------------------------------------------------------------------------------- /agent_cli/agents/memory/__init__.py: -------------------------------------------------------------------------------- 1 | """Memory system CLI commands.""" 2 | 3 | from __future__ import annotations 4 | 5 | import typer 6 | 7 | from agent_cli.cli import app 8 | 9 | memory_app = typer.Typer( 10 | name="memory", 11 | help="Memory system operations (add, proxy, etc.).", 12 | rich_markup_mode="markdown", 13 | no_args_is_help=True, 14 | ) 15 | 16 | app.add_typer(memory_app, name="memory") 17 | 18 | # Import subcommands to register them with memory_app 19 | from agent_cli.agents.memory import add, proxy # noqa: E402 20 | 21 | __all__ = ["add", "memory_app", "proxy"] 22 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | environment: 11 | name: pypi 12 | url: https://pypi.org/p/${{ github.repository }} 13 | permissions: 14 | id-token: write 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Install uv 18 | uses: astral-sh/setup-uv@v6 19 | - name: Build 20 | run: uv build 21 | - name: Publish package distributions to PyPI 22 | uses: pypa/gh-action-pypi-publish@release/v1 23 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v6.0.0 4 | hooks: 5 | - id: check-added-large-files 6 | exclude: uv\.lock 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: mixed-line-ending 10 | - repo: https://github.com/astral-sh/ruff-pre-commit 11 | rev: "v0.14.9" 12 | hooks: 13 | - id: ruff 14 | args: ["--fix"] 15 | - id: ruff-format 16 | - repo: https://github.com/pre-commit/mirrors-mypy 17 | rev: "v1.19.1" 18 | hooks: 19 | - id: mypy 20 | additional_dependencies: ["types-PyYAML"] 21 | -------------------------------------------------------------------------------- /agent_cli/rag/__init__.py: -------------------------------------------------------------------------------- 1 | """RAG module.""" 2 | 3 | from __future__ import annotations 4 | 5 | from agent_cli.core.deps import ensure_optional_dependencies 6 | 7 | _REQUIRED_DEPS = { 8 | "chromadb": "chromadb", 9 | "watchfiles": "watchfiles", 10 | "markitdown": "markitdown", 11 | "fastapi": "fastapi", 12 | "uvicorn": "uvicorn", 13 | "onnxruntime": "onnxruntime", 14 | "huggingface_hub": "huggingface-hub", 15 | "transformers": "transformers", 16 | } 17 | 18 | ensure_optional_dependencies( 19 | _REQUIRED_DEPS, 20 | extra_name="rag", 21 | install_hint="`pip install agent-cli[rag]` or `uv sync --extra rag`", 22 | ) 23 | -------------------------------------------------------------------------------- /agent_cli/constants.py: -------------------------------------------------------------------------------- 1 | """Default configuration settings for the Agent CLI package.""" 2 | 3 | from __future__ import annotations 4 | 5 | # --- Audio Configuration --- 6 | AUDIO_FORMAT_STR = "int16" # sounddevice/numpy format 7 | AUDIO_FORMAT_WIDTH = 2 # 2 bytes (16-bit) 8 | AUDIO_CHANNELS = 1 9 | AUDIO_RATE = 16000 10 | AUDIO_CHUNK_SIZE = 1024 11 | 12 | # Standard Wyoming audio configuration 13 | WYOMING_AUDIO_CONFIG = { 14 | "rate": AUDIO_RATE, 15 | "width": AUDIO_FORMAT_WIDTH, 16 | "channels": AUDIO_CHANNELS, 17 | } 18 | 19 | # --- HTTP Defaults --- 20 | DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1" 21 | DEFAULT_OPENAI_MODEL = "gpt-5-mini" 22 | DEFAULT_OPENAI_EMBEDDING_MODEL = "text-embedding-3-small" 23 | -------------------------------------------------------------------------------- /agent_cli/memory/__init__.py: -------------------------------------------------------------------------------- 1 | """Memory module for long-term chat history.""" 2 | 3 | from __future__ import annotations 4 | 5 | from agent_cli.core.deps import ensure_optional_dependencies 6 | 7 | _REQUIRED_DEPS = { 8 | "chromadb": "chromadb", 9 | "fastapi": "fastapi", 10 | "uvicorn": "uvicorn", 11 | "onnxruntime": "onnxruntime", 12 | "huggingface_hub": "huggingface-hub", 13 | "transformers": "transformers", 14 | } 15 | 16 | ensure_optional_dependencies( 17 | _REQUIRED_DEPS, 18 | extra_name="memory", 19 | install_hint="`pip install agent-cli[memory]` or `uv sync --extra memory`", 20 | ) 21 | 22 | from agent_cli.memory.client import MemoryClient # noqa: E402 23 | 24 | __all__ = ["MemoryClient"] 25 | -------------------------------------------------------------------------------- /scripts/nvidia-asr-server/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "nvidia-asr-server" 3 | version = "1.0.0" 4 | description = "NVIDIA ASR server with OpenAI-compatible API" 5 | readme = "README.md" 6 | requires-python = ">=3.13" 7 | dependencies = [ 8 | "fastapi[standard]>=0.115.0", 9 | "torch>=2.5.0", 10 | "soundfile>=0.12.1", 11 | "sacrebleu>=2.4.0", 12 | "typer>=0.9.0", 13 | "nemo-toolkit[asr,tts] @ git+https://github.com/NVIDIA/NeMo.git", 14 | ] 15 | 16 | [tool.uv.sources] 17 | torch = [{ index = "pytorch-cu124" }] 18 | 19 | [[tool.uv.index]] 20 | name = "pytorch-cu124" 21 | url = "https://download.pytorch.org/whl/cu124" 22 | explicit = true 23 | 24 | [tool.uv] 25 | override-dependencies = [ 26 | "ml-dtypes>=0.5.0", 27 | ] 28 | -------------------------------------------------------------------------------- /agent_cli/core/deps.py: -------------------------------------------------------------------------------- 1 | """Helpers for optional dependency checks.""" 2 | 3 | from __future__ import annotations 4 | 5 | from importlib.util import find_spec 6 | 7 | 8 | def ensure_optional_dependencies( 9 | required: dict[str, str], 10 | *, 11 | extra_name: str, 12 | install_hint: str | None = None, 13 | ) -> None: 14 | """Ensure optional dependencies are present, otherwise raise ImportError.""" 15 | missing = [ 16 | pkg_name for module_name, pkg_name in required.items() if find_spec(module_name) is None 17 | ] 18 | if not missing: 19 | return 20 | 21 | hint = install_hint or f"`pip install agent-cli[{extra_name}]`" 22 | msg = f"Missing required dependencies for {extra_name}: {', '.join(missing)}. Please install with {hint}." 23 | raise ImportError(msg) 24 | -------------------------------------------------------------------------------- /.github/workflows/automerge.yml: -------------------------------------------------------------------------------- 1 | name: Auto Merge 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened, labeled, unlabeled] 6 | 7 | jobs: 8 | auto-merge: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Label PR 12 | if: github.event.pull_request.title == '[pre-commit.ci] pre-commit autoupdate' 13 | run: | 14 | curl -X POST \ 15 | -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ 16 | -H "Accept: application/vnd.github.v3+json" \ 17 | https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/labels \ 18 | -d '{"labels":["automerge"]}' 19 | - name: Automatic Merge 20 | uses: pascalgn/automerge-action@v0.16.4 21 | env: 22 | GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" 23 | -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "rebaseWhen": "behind-base-branch", 4 | "dependencyDashboard": true, 5 | "labels": [ 6 | "dependencies", 7 | "no-stale" 8 | ], 9 | "commitMessagePrefix": "⬆️", 10 | "commitMessageTopic": "{{depName}}", 11 | "prBodyDefinitions": { 12 | "Release": "yes" 13 | }, 14 | "packageRules": [ 15 | { 16 | "matchManagers": [ 17 | "github-actions" 18 | ], 19 | "addLabels": [ 20 | "github_actions" 21 | ], 22 | "rangeStrategy": "pin" 23 | }, 24 | { 25 | "matchManagers": [ 26 | "github-actions" 27 | ], 28 | "matchUpdateTypes": [ 29 | "minor", 30 | "patch" 31 | ], 32 | "automerge": true 33 | } 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /scripts/run-piper.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo "🔊 Starting Wyoming Piper on port 10200..." 3 | 4 | # Create .runtime directory for local assets 5 | SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" 6 | mkdir -p "$SCRIPT_DIR/.runtime" 7 | 8 | # Download voice if not present using uvx 9 | if [ ! -d "$SCRIPT_DIR/.runtime/piper-data/en_US-lessac-medium" ]; then 10 | echo "⬇️ Downloading voice model..." 11 | mkdir -p "$SCRIPT_DIR/.runtime/piper-data" 12 | cd "$SCRIPT_DIR/.runtime/piper-data" 13 | uvx --python 3.12 --from piper-tts python -m piper.download_voices en_US-lessac-medium 14 | cd "$SCRIPT_DIR" 15 | fi 16 | 17 | # Run Wyoming Piper using uvx wrapper 18 | uvx --python 3.12 \ 19 | --from git+https://github.com/rhasspy/wyoming-piper.git@v2.1.1 \ 20 | wyoming-piper \ 21 | --voice en_US-lessac-medium \ 22 | --uri 'tcp://0.0.0.0:10200' \ 23 | --data-dir "$SCRIPT_DIR/.runtime/piper-data" \ 24 | --download-dir "$SCRIPT_DIR/.runtime/piper-data" 25 | -------------------------------------------------------------------------------- /agent_cli/rag/_prompt.py: -------------------------------------------------------------------------------- 1 | """Centralized prompts for RAG LLM calls.""" 2 | 3 | RAG_PROMPT_WITH_TOOLS = """ 4 | ## Retrieved Documentation 5 | The following was automatically retrieved based on the user's query: 6 | 7 | 8 | {context} 9 | 10 | 11 | ## RAG Instructions 12 | - Use the retrieved context ONLY if it's relevant to the question 13 | - If the context is irrelevant, ignore it and answer based on your knowledge 14 | - When using context, cite sources: [Source: filename] 15 | - If snippets are insufficient, call read_full_document(file_path) to get full content 16 | """.strip() 17 | 18 | RAG_PROMPT_NO_TOOLS = """ 19 | ## Retrieved Documentation 20 | The following was automatically retrieved based on the user's query: 21 | 22 | 23 | {context} 24 | 25 | 26 | ## RAG Instructions 27 | - Use the retrieved context ONLY if it's relevant to the question 28 | - If the context is irrelevant, ignore it and answer based on your knowledge 29 | - When using context, cite sources: [Source: filename] 30 | """.strip() 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Environment variables 2 | .env 3 | 4 | # Python 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | *.so 9 | .Python 10 | build/ 11 | develop-eggs/ 12 | dist/ 13 | downloads/ 14 | eggs/ 15 | .eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # Audio files 27 | *.wav 28 | *.mp3 29 | 30 | # Virtual Environment 31 | venv/ 32 | env/ 33 | .direnv/ 34 | ENV/ 35 | 36 | # IDE 37 | .idea/ 38 | .vscode/ 39 | *.swp 40 | *.swo 41 | 42 | # Worktrees directory 43 | worktrees/ 44 | 45 | # Submodule 46 | cli-submodule/ 47 | 48 | # other 49 | .coverage* 50 | coverage.* 51 | .envrc 52 | .ruff_cache/ 53 | .mypy_cache/ 54 | .pytest_cache/ 55 | .vscode/ 56 | .vscode/ 57 | 58 | # Examples and scripts - exclude downloaded models and data 59 | examples/ollama/models/ 60 | examples/piper-data/ 61 | examples/whisper-data/ 62 | examples/ollama/id_ed25519* 63 | scripts/.runtime/ 64 | *.onnx 65 | *.onnx.json 66 | *.bin 67 | 68 | # RAG/memory 69 | rag_proxy_*.log 70 | transcription_log.json 71 | temp_rag_*/ 72 | memory_db 73 | rag_db/ 74 | *.log 75 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Bas Nijholt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/nvidia-asr-server/shell.nix: -------------------------------------------------------------------------------- 1 | { pkgs ? import { config.allowUnfree = true; } }: 2 | 3 | pkgs.mkShell { 4 | buildInputs = with pkgs; [ 5 | # Python and uv 6 | python313 7 | uv 8 | 9 | # Audio libraries 10 | ffmpeg 11 | ]; 12 | 13 | shellHook = '' 14 | # Set up CUDA environment (use system NVIDIA drivers and CUDA libraries) 15 | export LD_LIBRARY_PATH=/run/opengl-driver/lib:/run/current-system/sw/lib:$LD_LIBRARY_PATH 16 | 17 | # Tell triton where to find libcuda.so (avoids calling /sbin/ldconfig) 18 | export TRITON_LIBCUDA_PATH=/run/opengl-driver/lib 19 | 20 | # PyTorch memory management - avoid fragmentation 21 | export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True 22 | 23 | # Canary server defaults 24 | export CANARY_PORT=9898 25 | # CANARY_DEVICE auto-detects GPU with most free memory (override if needed) 26 | 27 | echo "CUDA environment configured (using system NVIDIA drivers)" 28 | echo "TRITON_LIBCUDA_PATH: $TRITON_LIBCUDA_PATH" 29 | echo "PYTORCH_CUDA_ALLOC_CONF: $PYTORCH_CUDA_ALLOC_CONF" 30 | echo "Run 'uv run server.py' to start the server" 31 | ''; 32 | } 33 | -------------------------------------------------------------------------------- /.cursorrules: -------------------------------------------------------------------------------- 1 | # Development Rules 2 | 3 | ## Core Rules 4 | - Always start by exploring the context of this package: list the existing files and read the full `README.md`. 5 | - Always use `uv sync --all-extras` to install packages then activate the virtual environment with `source .venv/bin/activate` 6 | - Commit frequently but always make sure tests pass first, using `pytest` 7 | - When working on a feature, check out `git diff origin/main | cat`. Make sure to use --no-pager, or pipe the output to `cat`. 8 | - Prefer functional style Python instead of classes with inheritance 9 | - Keep it DRY - reuse code as much as possible 10 | - Always run pre-commit hooks before committing 11 | - Implement the simplest solution possible and don't generalize when not yet needed 12 | - Only implement the feature that is asked for, not anything extra 13 | - NEVER run `git add .` - only use `git add filename` to avoid committing unrelated files 14 | - NEVER claim that you are done with a task without running pytest. 15 | - The linter might flag issues in pyproject.toml but ignore these because it is incorrect! 16 | - DO NOT MANUALLY update the CLI help messages in the `README.md`. These are automatically generated. 17 | -------------------------------------------------------------------------------- /tests/core/test_chroma.py: -------------------------------------------------------------------------------- 1 | """Tests for core Chroma helpers.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any 6 | 7 | from pydantic import BaseModel 8 | 9 | from agent_cli.core import chroma 10 | 11 | 12 | class _Meta(BaseModel): 13 | source: str 14 | tags: list[str] 15 | score: float | None = None 16 | 17 | 18 | class _FakeCollection: 19 | def __init__(self) -> None: 20 | self.calls: list[tuple[list[str], list[str], list[dict[str, Any]]]] = [] 21 | 22 | def upsert(self, ids: list[str], documents: list[str], metadatas: list[dict[str, Any]]) -> None: 23 | self.calls.append((ids, documents, metadatas)) 24 | 25 | 26 | def test_flatten_and_upsert_uses_base_models() -> None: 27 | """Ensure metadata serialization accepts BaseModel and preserves lists.""" 28 | m = _Meta(source="doc", tags=["a", "b"]) 29 | collection = _FakeCollection() 30 | 31 | chroma.upsert(collection, ids=["1"], documents=["text"], metadatas=[m]) 32 | 33 | assert collection.calls 34 | ids, docs, metas = collection.calls[0] 35 | assert ids == ["1"] 36 | assert docs == ["text"] 37 | assert metas == [{"source": "doc", "tags": ["a", "b"]}] 38 | -------------------------------------------------------------------------------- /tests/rag/test_store.py: -------------------------------------------------------------------------------- 1 | """Tests for RAG store.""" 2 | 3 | from pathlib import Path 4 | from unittest.mock import MagicMock, patch 5 | 6 | from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL 7 | from agent_cli.core import chroma 8 | from agent_cli.rag import _store 9 | 10 | 11 | def test_init_collection(tmp_path: Path) -> None: 12 | """Test collection initialization.""" 13 | with ( 14 | patch("chromadb.PersistentClient") as mock_client, 15 | patch("agent_cli.core.chroma.embedding_functions.OpenAIEmbeddingFunction") as mock_openai, 16 | ): 17 | chroma.init_collection( 18 | tmp_path, 19 | name="docs", 20 | embedding_model=DEFAULT_OPENAI_EMBEDDING_MODEL, 21 | ) 22 | 23 | mock_client.assert_called_once() 24 | mock_openai.assert_called_once() 25 | mock_client.return_value.get_or_create_collection.assert_called_once() 26 | 27 | 28 | def test_delete_by_file_path() -> None: 29 | """Test deleting by file path.""" 30 | mock_collection = MagicMock() 31 | _store.delete_by_file_path(mock_collection, "path/to/file") 32 | mock_collection.delete.assert_called_with(where={"file_path": "path/to/file"}) 33 | -------------------------------------------------------------------------------- /.github/workflows/pytest.yml: -------------------------------------------------------------------------------- 1 | name: pytest 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | 8 | env: 9 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 10 | 11 | jobs: 12 | test: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: [ubuntu-latest, macos-latest, windows-latest] 18 | python-version: ["3.11", "3.13"] 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install portaudio (Linux only) 27 | if: matrix.os == 'ubuntu-latest' 28 | run: sudo apt-get update && sudo apt-get install -y portaudio19-dev 29 | - name: Install uv 30 | uses: astral-sh/setup-uv@v6 31 | - name: Run pytest 32 | run: uv run --all-extras pytest -vvv 33 | - name: Upload coverage reports to Codecov 34 | if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' 35 | uses: codecov/codecov-action@v5 36 | env: 37 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 38 | -------------------------------------------------------------------------------- /tests/test_env_vars.py: -------------------------------------------------------------------------------- 1 | """Test that environment variables are correctly mapped to CLI options.""" 2 | 3 | import os 4 | import re 5 | from unittest import mock 6 | 7 | from typer.testing import CliRunner 8 | 9 | from agent_cli.cli import app 10 | 11 | runner = CliRunner(env={"NO_COLOR": "1", "TERM": "dumb"}) 12 | 13 | 14 | def test_openai_base_url_env_var() -> None: 15 | """Test that OPENAI_BASE_URL environment variable sets the openai_base_url option.""" 16 | env_vars = {"OPENAI_BASE_URL": "http://test"} 17 | 18 | with ( 19 | mock.patch.dict(os.environ, env_vars), 20 | mock.patch("agent_cli.agents.autocorrect._async_autocorrect"), 21 | ): 22 | # We use --print-args to see what the CLI parsed. 23 | # We need to provide a dummy text argument so it doesn't try to read clipboard if it's empty/fails. 24 | result = runner.invoke(app, ["autocorrect", "--print-args", "dummy text"]) 25 | 26 | assert result.exit_code == 0 27 | # Strip ANSI codes 28 | clean_output = re.sub(r"\x1b\[[0-9;]*m", "", result.stdout) 29 | 30 | # Check if openai_base_url matches the env var 31 | assert "openai_base_url" in clean_output 32 | assert "http://test" in clean_output 33 | -------------------------------------------------------------------------------- /tests/test_llm_gemini.py: -------------------------------------------------------------------------------- 1 | """Tests for the Gemini LLM provider.""" 2 | 3 | from __future__ import annotations 4 | 5 | import pytest 6 | 7 | from agent_cli import config 8 | from agent_cli.constants import DEFAULT_OPENAI_MODEL 9 | from agent_cli.services.llm import create_llm_agent 10 | 11 | 12 | @pytest.mark.asyncio 13 | async def test_create_llm_agent_with_gemini() -> None: 14 | """Test that the create_llm_agent function can build an agent with the Gemini provider.""" 15 | provider_cfg = config.ProviderSelection( 16 | llm_provider="gemini", 17 | asr_provider="wyoming", 18 | tts_provider="wyoming", 19 | ) 20 | gemini_cfg = config.GeminiLLM( 21 | llm_gemini_model="gemini-1.5-flash", 22 | gemini_api_key="test-key", 23 | ) 24 | ollama_cfg = config.Ollama( 25 | llm_ollama_model="gemma3:4b", 26 | llm_ollama_host="http://localhost:11434", 27 | ) 28 | openai_cfg = config.OpenAILLM( 29 | llm_openai_model=DEFAULT_OPENAI_MODEL, 30 | openai_api_key="test-key", 31 | ) 32 | 33 | agent = create_llm_agent( 34 | provider_cfg=provider_cfg, 35 | ollama_cfg=ollama_cfg, 36 | openai_cfg=openai_cfg, 37 | gemini_cfg=gemini_cfg, 38 | ) 39 | assert agent is not None 40 | -------------------------------------------------------------------------------- /agent_cli/rag/models.py: -------------------------------------------------------------------------------- 1 | """RAG data models.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pydantic import BaseModel, ConfigDict 6 | 7 | 8 | class Message(BaseModel): 9 | """Chat message model.""" 10 | 11 | role: str 12 | content: str 13 | 14 | 15 | class ChatRequest(BaseModel): 16 | """Chat completion request model.""" 17 | 18 | model_config = ConfigDict(extra="allow") 19 | 20 | model: str 21 | messages: list[Message] 22 | temperature: float | None = 0.7 23 | max_tokens: int | None = 1000 24 | stream: bool | None = False 25 | rag_top_k: int | None = None 26 | rag_enable_tools: bool | None = True 27 | 28 | 29 | class DocMetadata(BaseModel): 30 | """Metadata for an indexed document chunk.""" 31 | 32 | source: str 33 | file_path: str 34 | file_type: str 35 | chunk_id: int 36 | total_chunks: int 37 | indexed_at: str 38 | file_hash: str 39 | file_mtime: float 40 | 41 | 42 | class RagSource(BaseModel): 43 | """Source information for RAG response.""" 44 | 45 | source: str 46 | path: str 47 | chunk_id: int 48 | score: float 49 | 50 | 51 | class RetrievalResult(BaseModel): 52 | """Result of a RAG retrieval operation.""" 53 | 54 | context: str 55 | sources: list[RagSource] 56 | -------------------------------------------------------------------------------- /tests/test_tools.py: -------------------------------------------------------------------------------- 1 | """Tests for the tools.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | from unittest.mock import patch 7 | 8 | from agent_cli._tools import execute_code, read_file 9 | 10 | if TYPE_CHECKING: 11 | from pathlib import Path 12 | 13 | 14 | def test_read_file_tool(tmp_path: Path) -> None: 15 | """Test the ReadFileTool.""" 16 | # 1. Test reading a file that exists 17 | file = tmp_path / "test.txt" 18 | file.write_text("hello") 19 | assert read_file(path=str(file)) == "hello" 20 | 21 | # 2. Test reading a file that does not exist 22 | assert "Error: File not found" in read_file(path="non_existent_file.txt") 23 | 24 | # 3. Test OSError 25 | with patch("pathlib.Path.read_text", side_effect=OSError("Test error")): 26 | assert "Error reading file" in read_file(path=str(file)) 27 | 28 | 29 | def test_execute_code_tool() -> None: 30 | """Test the ExecuteCodeTool.""" 31 | # 1. Test a simple command 32 | assert execute_code(code="echo hello").strip() == "hello" 33 | 34 | # 2. Test a command that fails 35 | assert "Error: Command not found" in execute_code(code="non_existent_command") 36 | 37 | # 3. Test a command that returns a non-zero exit code 38 | assert "Error executing code" in execute_code(code="ls non_existent_file") 39 | -------------------------------------------------------------------------------- /scripts/macos-hotkeys/toggle-transcription.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Toggle script for agent-cli transcription on macOS 4 | 5 | NOTIFIER=${NOTIFIER:-/opt/homebrew/bin/terminal-notifier} 6 | RECORDING_GROUP="agent-cli-transcribe-recording" 7 | TEMP_PREFIX="agent-cli-transcribe-temp" 8 | 9 | notify_temp() { 10 | local title=$1 11 | local message=$2 12 | local duration=${3:-4} # 4 seconds default 13 | local group="${TEMP_PREFIX}-${RANDOM}-$$" 14 | 15 | "$NOTIFIER" -title "$title" -message "$message" -group "$group" 16 | ( 17 | sleep "$duration" 18 | "$NOTIFIER" -remove "$group" >/dev/null 2>&1 || true 19 | ) & 20 | } 21 | 22 | if pgrep -f "agent-cli transcribe" > /dev/null; then 23 | pkill -INT -f "agent-cli transcribe" 24 | "$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true 25 | notify_temp "🛑 Stopped" "Processing results..." 26 | else 27 | "$NOTIFIER" -title "🎙️ Started" -message "Listening..." -group "$RECORDING_GROUP" 28 | ( 29 | OUTPUT=$("$HOME/.local/bin/agent-cli" transcribe --llm --quiet 2>/dev/null) 30 | "$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true 31 | if [ -n "$OUTPUT" ]; then 32 | notify_temp "📄 Result" "$OUTPUT" 33 | else 34 | notify_temp "❌ Error" "No output" 35 | fi 36 | ) & 37 | fi 38 | -------------------------------------------------------------------------------- /scripts/macos-hotkeys/README.md: -------------------------------------------------------------------------------- 1 | # macOS Hotkeys 2 | 3 | System-wide hotkeys for agent-cli voice AI features on macOS. 4 | 5 | ## Setup 6 | 7 | ```bash 8 | ./setup-macos-hotkeys.sh 9 | ``` 10 | 11 | ## Usage 12 | 13 | - **`Cmd+Shift+R`** → Toggle voice transcription (start/stop with result) 14 | - **`Cmd+Shift+A`** → Autocorrect clipboard text 15 | - **`Cmd+Shift+V`** → Toggle voice edit mode for clipboard 16 | 17 | Results appear in notifications and clipboard. 18 | 19 | > **Tip:** For a persistent “Listening…” indicator, open System Settings → Notifications → *terminal-notifier* and set the Alert style to **Persistent** (or choose **Alerts** on older macOS versions). 20 | > Also enable "Allow notification when mirroring or sharing the display". 21 | > The scripts keep that alert pinned while dismissing status/result notifications automatically. 22 | 23 | ## What it installs 24 | 25 | - **skhd**: Hotkey manager 26 | - **terminal-notifier**: Notifications 27 | - **Configuration**: Automatic setup 28 | 29 | ## Troubleshooting 30 | 31 | **Hotkey not working?** 32 | - Grant accessibility permissions in System Settings 33 | 34 | **No notifications?** 35 | ```bash 36 | terminal-notifier -title "Test" -message "Hello" 37 | ``` 38 | 39 | **Services not running?** 40 | ```bash 41 | ./start-all-services.sh 42 | ``` 43 | 44 | That's it! System-wide hotkeys for agent-cli on macOS. 45 | -------------------------------------------------------------------------------- /scripts/macos-hotkeys/toggle-voice-edit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Toggle script for agent-cli voice-edit on macOS 4 | 5 | NOTIFIER=${NOTIFIER:-/opt/homebrew/bin/terminal-notifier} 6 | RECORDING_GROUP="agent-cli-voice-edit-recording" 7 | TEMP_PREFIX="agent-cli-voice-edit-temp" 8 | 9 | notify_temp() { 10 | local title=$1 11 | local message=$2 12 | local duration=${3:-4} # 4 seconds default 13 | local group="${TEMP_PREFIX}-${RANDOM}-$$" 14 | 15 | "$NOTIFIER" -title "$title" -message "$message" -group "$group" 16 | ( 17 | sleep "$duration" 18 | "$NOTIFIER" -remove "$group" >/dev/null 2>&1 || true 19 | ) & 20 | } 21 | 22 | if pgrep -f "agent-cli voice-edit" > /dev/null; then 23 | pkill -INT -f "agent-cli voice-edit" 24 | "$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true 25 | notify_temp "🛑 Stopped" "Processing voice command..." 26 | else 27 | "$NOTIFIER" -title "🎙️ Started" -message "Listening for voice command..." -group "$RECORDING_GROUP" 28 | ( 29 | OUTPUT=$("$HOME/.local/bin/agent-cli" voice-edit --quiet 2>/dev/null) 30 | "$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true 31 | if [ -n "$OUTPUT" ]; then 32 | notify_temp "✨ Voice Edit Result" "$OUTPUT" 33 | else 34 | notify_temp "❌ Error" "No output" 35 | fi 36 | ) & 37 | fi 38 | -------------------------------------------------------------------------------- /scripts/run-whisper-linux.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Linux: faster-whisper with CUDA/CPU detection 3 | echo "🎤 Starting Wyoming Faster Whisper on port 10300..." 4 | 5 | # Detect if CUDA is available 6 | if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then 7 | echo "⚡ NVIDIA GPU detected" 8 | DETECTED_DEVICE="cuda" 9 | else 10 | echo "💻 No GPU detected or CUDA unavailable" 11 | DETECTED_DEVICE="cpu" 12 | fi 13 | 14 | # Allow device override via environment variable 15 | DEVICE="${WHISPER_DEVICE:-$DETECTED_DEVICE}" 16 | 17 | # Set default model based on final device choice 18 | if [ "$DEVICE" = "cuda" ]; then 19 | DEFAULT_MODEL="large-v3" 20 | else 21 | DEFAULT_MODEL="tiny" 22 | fi 23 | 24 | # Allow model override via environment variable 25 | MODEL="${WHISPER_MODEL:-$DEFAULT_MODEL}" 26 | echo "📦 Using model: $MODEL on device: $DEVICE" 27 | 28 | # Create .runtime directory for whisper data 29 | SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" 30 | mkdir -p "$SCRIPT_DIR/.runtime" 31 | 32 | uvx --python 3.12 \ 33 | --from git+https://github.com/rhasspy/wyoming-faster-whisper.git@v3.0.1 \ 34 | wyoming-faster-whisper \ 35 | --model "$MODEL" \ 36 | --language en \ 37 | --device "$DEVICE" \ 38 | --uri 'tcp://0.0.0.0:10300' \ 39 | --data-dir "$SCRIPT_DIR/.runtime/whisper-data" \ 40 | --download-dir "$SCRIPT_DIR/.runtime/whisper-data" 41 | -------------------------------------------------------------------------------- /agent_cli/memory/entities.py: -------------------------------------------------------------------------------- 1 | """Domain entities for the memory system. 2 | 3 | These models represent the "Truth" of the system with strict validation. 4 | Unlike the storage models (files/DB), these entities do not have optional fields 5 | where they shouldn't. 6 | """ 7 | 8 | from __future__ import annotations 9 | 10 | from datetime import datetime # noqa: TC003 11 | from typing import Literal 12 | 13 | from pydantic import BaseModel, Field 14 | 15 | 16 | class Turn(BaseModel): 17 | """A single user or assistant message in the conversation.""" 18 | 19 | id: str = Field(..., description="Unique UUID for this turn") 20 | conversation_id: str 21 | role: Literal["user", "assistant"] 22 | content: str 23 | created_at: datetime 24 | 25 | 26 | class Fact(BaseModel): 27 | """An atomic piece of information extracted from a user message.""" 28 | 29 | id: str = Field(..., description="Unique UUID for this fact") 30 | conversation_id: str 31 | content: str 32 | source_id: str = Field(..., description="UUID of the Turn this fact was extracted from") 33 | created_at: datetime 34 | # Facts are always role="memory" implicitly in the storage layer 35 | 36 | 37 | class Summary(BaseModel): 38 | """The rolling summary of a conversation.""" 39 | 40 | conversation_id: str 41 | content: str 42 | created_at: datetime 43 | # Summaries are role="summary" implicitly 44 | -------------------------------------------------------------------------------- /scripts/linux-hotkeys/toggle-autocorrect.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Toggle script for agent-cli autocorrect on Linux 4 | # 5 | # This script corrects text from clipboard using AI: 6 | # - Reads text from clipboard 7 | # - Processes it with LLM for grammar/spelling corrections 8 | # - Displays the corrected result 9 | # 10 | # Works across different Linux desktop environments 11 | 12 | # Function to send notification 13 | notify() { 14 | local title="$1" 15 | local message="$2" 16 | local timeout="${3:-3000}" 17 | 18 | if command -v notify-send &> /dev/null; then 19 | notify-send -t "$timeout" "$title" "$message" 20 | elif command -v dunstify &> /dev/null; then 21 | dunstify -t "$timeout" "$title" "$message" 22 | else 23 | echo "$title: $message" 24 | fi 25 | } 26 | 27 | # Function to sync clipboard (Wayland) 28 | sync_clipboard() { 29 | if command -v wl-paste &> /dev/null && command -v wl-copy &> /dev/null; then 30 | wl-paste | wl-copy -p 2>/dev/null || true 31 | fi 32 | } 33 | 34 | # Ensure agent-cli is in PATH 35 | export PATH="$PATH:$HOME/.local/bin" 36 | 37 | notify "📝 Autocorrect" "Processing clipboard text..." 38 | 39 | OUTPUT=$(agent-cli autocorrect --quiet 2>/dev/null) && { 40 | # Sync clipboard to primary selection (Wayland) 41 | sync_clipboard 42 | notify "✅ Corrected" "$OUTPUT" 5000 43 | } || { 44 | notify "❌ Error" "No text to correct or processing failed" 3000 45 | } 46 | -------------------------------------------------------------------------------- /agent_cli/memory/_streaming.py: -------------------------------------------------------------------------------- 1 | """Streaming helpers for chat completions.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Any 6 | 7 | import httpx 8 | 9 | from agent_cli.core.sse import extract_content_from_chunk, parse_chunk 10 | 11 | if TYPE_CHECKING: 12 | from collections.abc import AsyncGenerator 13 | 14 | 15 | async def stream_chat_sse( 16 | *, 17 | openai_base_url: str, 18 | payload: dict[str, Any], 19 | headers: dict[str, str] | None = None, 20 | request_timeout: float = 120.0, 21 | ) -> AsyncGenerator[str, None]: 22 | """Stream Server-Sent Events from an OpenAI-compatible chat completion endpoint.""" 23 | url = f"{openai_base_url.rstrip('/')}/chat/completions" 24 | async with ( 25 | httpx.AsyncClient(timeout=request_timeout) as client, 26 | client.stream("POST", url, json=payload, headers=headers) as response, 27 | ): 28 | if response.status_code != 200: # noqa: PLR2004 29 | error_text = await response.aread() 30 | yield f"data: {error_text.decode(errors='ignore')}\n\n" 31 | return 32 | async for line in response.aiter_lines(): 33 | if line: 34 | yield line 35 | 36 | 37 | def accumulate_assistant_text(line: str, buffer: list[str]) -> None: 38 | """Parse SSE line and append any assistant text delta into buffer.""" 39 | chunk = parse_chunk(line) 40 | if chunk is None: 41 | return 42 | piece = extract_content_from_chunk(chunk) 43 | if piece: 44 | buffer.append(piece) 45 | -------------------------------------------------------------------------------- /agent_cli/rag/_store.py: -------------------------------------------------------------------------------- 1 | """ChromaDB functional interface.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | from typing import TYPE_CHECKING, Any 7 | 8 | from agent_cli.core.chroma import delete_where, upsert 9 | 10 | if TYPE_CHECKING: 11 | from collections.abc import Sequence 12 | 13 | from chromadb import Collection 14 | 15 | from agent_cli.rag.models import DocMetadata 16 | 17 | LOGGER = logging.getLogger(__name__) 18 | 19 | 20 | def upsert_docs( 21 | collection: Collection, 22 | ids: list[str], 23 | documents: list[str], 24 | metadatas: Sequence[DocMetadata], 25 | ) -> None: 26 | """Upsert documents into the collection.""" 27 | upsert(collection, ids=ids, documents=documents, metadatas=metadatas) 28 | 29 | 30 | def delete_by_file_path(collection: Collection, file_path: str) -> None: 31 | """Delete all chunks associated with a file path.""" 32 | delete_where(collection, {"file_path": file_path}) 33 | 34 | 35 | def query_docs(collection: Collection, text: str, n_results: int) -> dict[str, Any]: 36 | """Query the collection.""" 37 | return collection.query(query_texts=[text], n_results=n_results) 38 | 39 | 40 | def get_all_metadata(collection: Collection) -> list[dict[str, Any]]: 41 | """Retrieve all metadata from the collection.""" 42 | result = collection.get(include=["metadatas"]) 43 | return result.get("metadatas", []) or [] # type: ignore[return-value] 44 | 45 | 46 | def count_docs(collection: Collection) -> int: 47 | """Return total number of documents.""" 48 | return collection.count() 49 | -------------------------------------------------------------------------------- /docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | ollama: 3 | build: 4 | context: .. 5 | dockerfile: docker/Dockerfile 6 | ports: 7 | - "11434:11434" 8 | environment: 9 | - OLLAMA_THINK=false 10 | volumes: 11 | - ./ollama:/root/.ollama 12 | # On macOS, Docker does not support GPU acceleration. For better 13 | # performance, it is recommended to install Ollama natively: 14 | # https://ollama.com/download 15 | # 16 | # On Linux with an NVIDIA GPU, you can uncomment the following 17 | # lines to enable GPU acceleration. 18 | # deploy: 19 | # resources: 20 | # reservations: 21 | # devices: 22 | # - driver: nvidia 23 | # count: all 24 | # capabilities: [gpu] 25 | 26 | piper: 27 | image: rhasspy/wyoming-piper 28 | ports: 29 | - "10200:10200" 30 | volumes: 31 | - ./piper-data:/data 32 | command: --voice en-us-ryan-high 33 | 34 | whisper: 35 | image: rhasspy/wyoming-whisper 36 | ports: 37 | - "10300:10300" 38 | volumes: 39 | - ./whisper-data:/data 40 | command: --model large-v3 --language en 41 | # The official rhasspy/wyoming-whisper image does not currently 42 | # support GPU acceleration. 43 | # see https://github.com/rhasspy/wyoming-faster-whisper/issues/35 44 | # and https://github.com/rhasspy/wyoming-faster-whisper/pull/44 45 | 46 | openwakeword: 47 | image: rhasspy/wyoming-openwakeword 48 | ports: 49 | - "10400:10400" 50 | volumes: 51 | - ./openwakeword-data:/data 52 | command: --preload-model ok_nabu --custom-model-dir /data 53 | -------------------------------------------------------------------------------- /tests/mocks/llm.py: -------------------------------------------------------------------------------- 1 | """Mock LLM agents and responses for testing.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Any 6 | 7 | if TYPE_CHECKING: 8 | from collections.abc import Awaitable 9 | 10 | 11 | class MockLLMResult: 12 | """Mock result from LLM agent execution.""" 13 | 14 | def __init__(self, output: str) -> None: 15 | """Initialize mock result.""" 16 | self.output = output 17 | 18 | 19 | class MockLLMAgent: 20 | """Mock LLM agent for testing without real API calls.""" 21 | 22 | def __init__(self, responses: dict[str, str]) -> None: 23 | """Initialize mock agent. 24 | 25 | Args: 26 | ---- 27 | responses: Mapping of input patterns to responses 28 | 29 | """ 30 | self.responses = responses 31 | self.call_history: list[dict[str, Any]] = [] 32 | 33 | def run(self, user_prompt: str) -> Awaitable[MockLLMResult]: 34 | """Mock execution of the agent.""" 35 | self.call_history.append({"user_prompt": user_prompt}) 36 | 37 | async def mock_run() -> MockLLMResult: 38 | response = self._get_response_for_prompt(user_prompt) 39 | return MockLLMResult(response) 40 | 41 | return mock_run() 42 | 43 | def _get_response_for_prompt(self, prompt: str) -> str: 44 | """Get appropriate response for the given prompt.""" 45 | prompt_lower = prompt.lower() 46 | for pattern, response in self.responses.items(): 47 | if pattern.lower() in prompt_lower: 48 | return response 49 | return self.responses.get("default", "Mock LLM response") 50 | -------------------------------------------------------------------------------- /agent_cli/memory/_tasks.py: -------------------------------------------------------------------------------- 1 | """Utilities for tracking background tasks in the memory proxy.""" 2 | 3 | from __future__ import annotations 4 | 5 | import asyncio 6 | import logging 7 | from typing import TYPE_CHECKING, Any 8 | 9 | if TYPE_CHECKING: 10 | from collections.abc import Coroutine 11 | 12 | LOGGER = logging.getLogger(__name__) 13 | 14 | _BACKGROUND_TASKS: set[asyncio.Task[Any]] = set() 15 | 16 | 17 | def _track_background(task: asyncio.Task[Any], label: str) -> asyncio.Task[Any]: 18 | """Track background tasks and surface failures.""" 19 | _BACKGROUND_TASKS.add(task) 20 | 21 | def _done_callback(done: asyncio.Task[Any]) -> None: 22 | _BACKGROUND_TASKS.discard(done) 23 | if done.cancelled(): 24 | LOGGER.debug("Background task %s cancelled", label) 25 | return 26 | exc = done.exception() 27 | if exc: 28 | LOGGER.exception("Background task %s failed", label, exc_info=exc) 29 | 30 | task.add_done_callback(_done_callback) 31 | return task 32 | 33 | 34 | def run_in_background( 35 | coro: asyncio.Task[Any] | Coroutine[Any, Any, Any], 36 | label: str, 37 | ) -> asyncio.Task[Any]: 38 | """Create and track a background asyncio task.""" 39 | task = coro if isinstance(coro, asyncio.Task) else asyncio.create_task(coro) 40 | task.set_name(f"memory-{label}") 41 | return _track_background(task, label) 42 | 43 | 44 | async def wait_for_background_tasks() -> None: 45 | """Await any in-flight background tasks (useful in tests).""" 46 | while _BACKGROUND_TASKS: 47 | tasks = list(_BACKGROUND_TASKS) 48 | await asyncio.gather(*tasks, return_exceptions=False) 49 | -------------------------------------------------------------------------------- /.github/workflows/update-readme.yml: -------------------------------------------------------------------------------- 1 | name: Update README.md 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | update_readme: 11 | runs-on: macos-latest 12 | steps: 13 | - name: Check out repository 14 | uses: actions/checkout@v4 15 | with: 16 | persist-credentials: false 17 | fetch-depth: 0 18 | 19 | - name: Set up Python 20 | uses: actions/setup-python@v5 21 | 22 | - name: Install uv 23 | uses: astral-sh/setup-uv@v6 24 | 25 | - name: Run markdown-code-runner 26 | env: 27 | TERM: dumb 28 | NO_COLOR: 1 29 | TERMINAL_WIDTH: 90 30 | run: | 31 | uvx --with . markdown-code-runner README.md 32 | sed -i '' 's/[[:space:]]*$//' README.md 33 | 34 | - name: Commit updated README.md 35 | id: commit 36 | run: | 37 | git add README.md 38 | git config --local user.email "github-actions[bot]@users.noreply.github.com" 39 | git config --local user.name "github-actions[bot]" 40 | if git diff --quiet && git diff --staged --quiet; then 41 | echo "No changes in README.md, skipping commit." 42 | echo "commit_status=skipped" >> $GITHUB_ENV 43 | else 44 | git commit -m "Update README.md" 45 | echo "commit_status=committed" >> $GITHUB_ENV 46 | fi 47 | 48 | - name: Push changes 49 | if: env.commit_status == 'committed' 50 | uses: ad-m/github-push-action@master 51 | with: 52 | github_token: ${{ secrets.GITHUB_TOKEN }} 53 | branch: ${{ github.head_ref }} 54 | -------------------------------------------------------------------------------- /tests/core/test_watch.py: -------------------------------------------------------------------------------- 1 | """Tests for shared watch helper.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Any 6 | 7 | import pytest 8 | from watchfiles import Change 9 | 10 | from agent_cli.core import watch as watch_mod 11 | 12 | if TYPE_CHECKING: 13 | from pathlib import Path 14 | 15 | 16 | @pytest.mark.asyncio 17 | async def test_watch_directory_skips_hidden(tmp_path: Path) -> None: 18 | """Only non-hidden files trigger the handler.""" 19 | called: list[tuple[Change, Path]] = [] 20 | 21 | async def fake_awatch(_root: Path) -> Any: # type: ignore[override] 22 | yield { 23 | (Change.added, str(tmp_path / "visible.txt")), 24 | (Change.added, str(tmp_path / ".hidden.txt")), 25 | (Change.added, str(tmp_path / "sub/.nested")), 26 | (Change.deleted, str(tmp_path / "gone.txt")), 27 | } 28 | 29 | def handler(change: Change, path: Path) -> None: 30 | called.append((change, path)) 31 | 32 | tmp_path.mkdir(parents=True, exist_ok=True) 33 | (tmp_path / "visible.txt").touch() 34 | (tmp_path / "gone.txt").touch() 35 | (tmp_path / "sub").mkdir() 36 | 37 | # Patch awatch used inside watch_directory 38 | original = watch_mod.awatch 39 | watch_mod.awatch = fake_awatch # type: ignore[assignment] 40 | try: 41 | await watch_mod.watch_directory(tmp_path, handler) 42 | finally: 43 | watch_mod.awatch = original # type: ignore[assignment] 44 | 45 | seen_paths = {p.name for _, p in called} 46 | assert "visible.txt" in seen_paths 47 | assert "gone.txt" in seen_paths 48 | assert ".hidden.txt" not in seen_paths 49 | assert ".nested" not in seen_paths 50 | -------------------------------------------------------------------------------- /scripts/zellij_help.txt: -------------------------------------------------------------------------------- 1 | ╔═══════════════════════════════════════════════════════════════════╗ 2 | ║ Agent CLI Services ║ 3 | ╠═══════════════════════════════════════════════════════════════════╣ 4 | ║ ║ 5 | ║ 🔴 IMPORTANT: ║ 6 | ║ • Ctrl-O d → Detach (keeps services running in background!) ║ 7 | ║ • Ctrl-Q → Quit (STOPS all services!) ║ 8 | ║ ║ 9 | ║ To reattach later: $ zellij attach agent-cli ║ 10 | ║ ║ 11 | ╠═══════════════════════════════════════════════════════════════════╣ 12 | ║ ║ 13 | ║ Services Running: ║ 14 | ║ • Ollama (LLM) - Port 11434 ║ 15 | ║ • Whisper (STT) - Port 10300 ║ 16 | ║ • Piper (TTS) - Port 10200 ║ 17 | ║ • OpenWakeWord - Port 10400 ║ 18 | ║ ║ 19 | ║ Navigation: ║ 20 | ║ • Alt + ← → ↑ ↓ - Move between panes ║ 21 | ║ • Ctrl-F - Toggle this help ║ 22 | ║ • q - Close this help ║ 23 | ║ ║ 24 | ╚═══════════════════════════════════════════════════════════════════╝ 25 | -------------------------------------------------------------------------------- /tests/memory/test_api_health.py: -------------------------------------------------------------------------------- 1 | """Smoke tests for memory API health and lifecycle.""" 2 | 3 | from __future__ import annotations 4 | 5 | from contextlib import ExitStack 6 | from typing import Any 7 | from unittest.mock import patch 8 | 9 | from fastapi.testclient import TestClient 10 | 11 | from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL 12 | from agent_cli.memory import api as memory_api 13 | 14 | 15 | class _FakeCollection: 16 | pass 17 | 18 | 19 | def test_memory_health_and_startup_shutdown(tmp_path: Any) -> None: 20 | started: list[str] = [] 21 | 22 | async def _noop_watch(*_args: Any, **_kwargs: Any) -> None: 23 | started.append("watch") 24 | 25 | with ExitStack() as stack: 26 | stack.enter_context( 27 | patch("agent_cli.memory.client.watch_memory_store", side_effect=_noop_watch), 28 | ) 29 | stack.enter_context( 30 | patch("agent_cli.memory.client.init_memory_collection", return_value=_FakeCollection()), 31 | ) 32 | stack.enter_context( 33 | patch("agent_cli.memory.client.get_reranker_model", return_value=None), 34 | ) 35 | 36 | app = memory_api.create_app( 37 | memory_path=tmp_path, 38 | openai_base_url="http://mock-llm", 39 | embedding_model=DEFAULT_OPENAI_EMBEDDING_MODEL, 40 | enable_summarization=False, 41 | ) 42 | with TestClient(app) as client: 43 | resp = client.get("/health") 44 | assert resp.status_code == 200 45 | body = resp.json() 46 | assert body["status"] == "ok" 47 | assert body["memory_store"] == str(tmp_path.resolve()) 48 | 49 | # startup/shutdown should have triggered watch task creation 50 | assert started 51 | -------------------------------------------------------------------------------- /tests/rag/test_retriever.py: -------------------------------------------------------------------------------- 1 | """Tests for RAG retriever.""" 2 | 3 | from unittest.mock import MagicMock, patch 4 | 5 | from agent_cli.core import reranker 6 | from agent_cli.rag import _retriever 7 | 8 | 9 | def test_get_reranker_model_installed() -> None: 10 | """Test loading reranker when installed.""" 11 | with patch("agent_cli.core.reranker.OnnxCrossEncoder") as mock_ce: 12 | reranker.get_reranker_model() 13 | mock_ce.assert_called_once() 14 | 15 | 16 | def test_search_context() -> None: 17 | """Test searching context.""" 18 | mock_collection = MagicMock() 19 | mock_reranker = MagicMock() 20 | 21 | # Mock query results 22 | mock_collection.query.return_value = { 23 | "documents": [["doc1", "doc2"]], 24 | "metadatas": [ 25 | [ 26 | {"source": "s1", "file_path": "p1", "chunk_id": 0}, 27 | {"source": "s2", "file_path": "p2", "chunk_id": 1}, 28 | ], 29 | ], 30 | } 31 | 32 | # Mock reranker scores 33 | mock_reranker.predict.return_value = [-1.0, 5.0] 34 | 35 | result = _retriever.search_context(mock_collection, mock_reranker, "query", top_k=1) 36 | 37 | # Should return doc2 because it has higher score 38 | assert "doc2" in result.context 39 | assert "doc1" not in result.context 40 | assert len(result.sources) == 1 41 | assert result.sources[0].path == "p2" 42 | 43 | 44 | def test_search_context_empty() -> None: 45 | """Test search with no results.""" 46 | mock_collection = MagicMock() 47 | mock_reranker = MagicMock() 48 | 49 | mock_collection.query.return_value = {"documents": []} 50 | 51 | result = _retriever.search_context(mock_collection, mock_reranker, "query") 52 | 53 | assert result.context == "" 54 | assert result.sources == [] 55 | -------------------------------------------------------------------------------- /agent_cli/install/hotkeys.py: -------------------------------------------------------------------------------- 1 | """Hotkey installation commands.""" 2 | 3 | from __future__ import annotations 4 | 5 | import platform 6 | 7 | from agent_cli.cli import app 8 | from agent_cli.core.utils import print_with_style 9 | from agent_cli.install.common import execute_installation_script, get_platform_script 10 | 11 | 12 | @app.command("install-hotkeys", rich_help_panel="Installation") 13 | def install_hotkeys() -> None: 14 | """Install system-wide hotkeys for agent-cli commands. 15 | 16 | Sets up the following hotkeys: 17 | 18 | macOS: 19 | - Cmd+Shift+R: Toggle voice transcription 20 | - Cmd+Shift+A: Autocorrect clipboard text 21 | - Cmd+Shift+V: Voice edit clipboard text 22 | 23 | Linux: 24 | - Super+Shift+R: Toggle voice transcription 25 | - Super+Shift+A: Autocorrect clipboard text 26 | - Super+Shift+V: Voice edit clipboard text 27 | 28 | Note: On macOS, you may need to grant Accessibility permissions to skhd 29 | in System Settings → Privacy & Security → Accessibility. 30 | """ 31 | script_name = get_platform_script("setup-macos-hotkeys.sh", "setup-linux-hotkeys.sh") 32 | system = platform.system().lower() 33 | 34 | execute_installation_script( 35 | script_name=script_name, 36 | operation_name="Set up hotkeys", 37 | success_message="Hotkeys installed successfully!", 38 | ) 39 | 40 | # Post-installation steps for macOS 41 | if system == "darwin": 42 | print_with_style("\n⚠️ Important:", "yellow") 43 | print_with_style("If hotkeys don't work, grant Accessibility permissions:", "yellow") 44 | print_with_style( 45 | " 1. Open System Settings → Privacy & Security → Accessibility", 46 | "cyan", 47 | ) 48 | print_with_style(" 2. Add and enable 'skhd'", "cyan") 49 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | """Tests for the CLI.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | from unittest.mock import patch 7 | 8 | from typer.testing import CliRunner 9 | 10 | from agent_cli.cli import app 11 | 12 | if TYPE_CHECKING: 13 | import pytest 14 | 15 | runner = CliRunner(env={"NO_COLOR": "1", "TERM": "dumb"}) 16 | 17 | 18 | def test_main_no_args() -> None: 19 | """Test the main function with no arguments.""" 20 | result = runner.invoke(app) 21 | assert "No command specified" in result.stdout 22 | assert "Usage" in result.stdout 23 | 24 | 25 | @patch("agent_cli.core.utils.setup_logging") 26 | def test_main_with_args(mock_setup_logging: pytest.MagicMock) -> None: 27 | """Test the main function with arguments.""" 28 | result = runner.invoke(app, ["--help"]) 29 | assert result.exit_code == 0 30 | assert "Usage" in result.stdout 31 | mock_setup_logging.assert_not_called() 32 | 33 | 34 | @patch("agent_cli.agents.server.run_server") 35 | def test_server_command(mock_run_server: pytest.MagicMock) -> None: 36 | """Test the server command.""" 37 | result = runner.invoke(app, ["server"]) 38 | assert result.exit_code == 0 39 | assert "Starting Agent CLI transcription server" in result.stdout 40 | mock_run_server.assert_called_once_with(host="0.0.0.0", port=61337, reload=False) # noqa: S104 41 | 42 | 43 | @patch("agent_cli.agents.server.run_server") 44 | def test_server_command_with_options(mock_run_server: pytest.MagicMock) -> None: 45 | """Test the server command with custom options.""" 46 | result = runner.invoke(app, ["server", "--host", "127.0.0.1", "--port", "8080", "--reload"]) 47 | assert result.exit_code == 0 48 | assert "Starting Agent CLI transcription server on 127.0.0.1:8080" in result.stdout 49 | assert "Auto-reload enabled for development" in result.stdout 50 | mock_run_server.assert_called_once_with(host="127.0.0.1", port=8080, reload=True) 51 | -------------------------------------------------------------------------------- /agent_cli/agents/server.py: -------------------------------------------------------------------------------- 1 | """FastAPI server command for Agent CLI.""" 2 | 3 | from __future__ import annotations 4 | 5 | from importlib.util import find_spec 6 | 7 | import typer 8 | 9 | from agent_cli import opts 10 | from agent_cli.cli import app 11 | from agent_cli.core.utils import ( 12 | console, 13 | print_command_line_args, 14 | print_error_message, 15 | ) 16 | 17 | has_uvicorn = find_spec("uvicorn") is not None 18 | has_fastapi = find_spec("fastapi") is not None 19 | 20 | 21 | def run_server( 22 | host: str = "0.0.0.0", # noqa: S104 23 | port: int = 61337, 24 | reload: bool = False, 25 | ) -> None: 26 | """Run the FastAPI server.""" 27 | import uvicorn # noqa: PLC0415 28 | 29 | uvicorn.run( 30 | "agent_cli.api:app", 31 | host=host, 32 | port=port, 33 | reload=reload, 34 | log_level="info", 35 | ) 36 | 37 | 38 | @app.command("server") 39 | def server( 40 | host: str = typer.Option("0.0.0.0", help="Host to bind the server to"), # noqa: S104 41 | port: int = typer.Option(61337, help="Port to bind the server to"), 42 | reload: bool = typer.Option(False, "--reload", help="Enable auto-reload for development"), # noqa: FBT003 43 | config_file: str | None = opts.CONFIG_FILE, 44 | print_args: bool = opts.PRINT_ARGS, 45 | ) -> None: 46 | """Run the FastAPI transcription web server.""" 47 | if print_args: 48 | print_command_line_args(locals()) 49 | if not has_uvicorn or not has_fastapi: 50 | msg = "uvicorn or fastapi is not installed, please install it with `pip install fastapi[standard]` or `pip install agent-cli[server]`" 51 | print_error_message(msg) 52 | raise typer.Exit(1) 53 | console.print( 54 | f"[bold green]Starting Agent CLI transcription server on {host}:{port}[/bold green]", 55 | ) 56 | if reload: 57 | console.print("[yellow]Auto-reload enabled for development[/yellow]") 58 | run_server(host=host, port=port, reload=reload) 59 | -------------------------------------------------------------------------------- /scripts/linux-hotkeys/toggle-voice-edit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Toggle script for agent-cli voice-edit on Linux 4 | # 5 | # This script provides voice editing for clipboard text: 6 | # - First invocation: Starts voice editing in the background 7 | # - Second invocation: Stops voice editing and displays the result 8 | # 9 | # Works across different Linux desktop environments 10 | 11 | # Function to send notification 12 | notify() { 13 | local title="$1" 14 | local message="$2" 15 | local timeout="${3:-3000}" 16 | 17 | if command -v notify-send &> /dev/null; then 18 | notify-send -t "$timeout" "$title" "$message" 19 | elif command -v dunstify &> /dev/null; then 20 | dunstify -t "$timeout" "$title" "$message" 21 | else 22 | echo "$title: $message" 23 | fi 24 | } 25 | 26 | # Function to sync clipboard (Wayland) 27 | sync_clipboard() { 28 | if command -v wl-paste &> /dev/null && command -v wl-copy &> /dev/null; then 29 | wl-paste | wl-copy -p 2>/dev/null || true 30 | fi 31 | } 32 | 33 | # Check if agent-cli voice-edit is already running 34 | if pgrep -f "agent-cli voice-edit" > /dev/null; then 35 | # Voice edit is running - stop it 36 | pkill -INT -f "agent-cli voice-edit" 37 | notify "🛑 Voice Edit Stopped" "Processing voice command..." 38 | else 39 | # Voice edit is not running - start it 40 | 41 | # Ensure agent-cli is in PATH 42 | export PATH="$PATH:$HOME/.local/bin" 43 | 44 | # Notify user that recording has started 45 | notify "🎙️ Voice Edit Started" "Listening for voice command..." 46 | 47 | # Start voice edit in background 48 | ( 49 | OUTPUT=$(agent-cli voice-edit --quiet 2>/dev/null) 50 | if [ -n "$OUTPUT" ]; then 51 | # Sync clipboard to primary selection (Wayland) 52 | sync_clipboard 53 | notify "✨ Voice Edit Result" "$OUTPUT" 5000 54 | else 55 | notify "❌ Error" "No output" 3000 56 | fi 57 | ) & 58 | fi 59 | -------------------------------------------------------------------------------- /scripts/setup-macos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | echo "🚀 Setting up agent-cli services on macOS..." 6 | 7 | # Check if Homebrew is installed 8 | if ! command -v brew &> /dev/null; then 9 | echo "❌ Homebrew is not installed. Please install Homebrew first:" 10 | echo "/bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\"" 11 | exit 1 12 | fi 13 | 14 | # Check if uv is installed 15 | if ! command -v uv &> /dev/null; then 16 | echo "📦 Installing uv..." 17 | brew install uv 18 | fi 19 | 20 | # Install Ollama 21 | echo "🧠 Checking Ollama..." 22 | if ! command -v ollama &> /dev/null; then 23 | echo "🍺 Installing Ollama via Homebrew..." 24 | brew install ollama 25 | echo "✅ Ollama installed successfully" 26 | else 27 | echo "✅ Ollama is already installed" 28 | fi 29 | 30 | # Check if zellij is installed 31 | if ! command -v zellij &> /dev/null; then 32 | echo "📺 Installing zellij..." 33 | brew install zellij 34 | fi 35 | 36 | # Install agent-cli 37 | echo "🤖 Installing/upgrading agent-cli..." 38 | uv tool install --upgrade agent-cli 39 | 40 | # Preload default Ollama model 41 | echo "⬇️ Preloading default Ollama model (gemma3:4b)..." 42 | echo "⏳ This may take a few minutes depending on your internet connection..." 43 | # Start Ollama in background, then pull model synchronously 44 | (ollama serve >/dev/null 2>&1 &) && sleep 2 && ollama pull gemma3:4b 45 | # Stop the temporary ollama server 46 | pkill -f "ollama serve" || true 47 | 48 | echo "" 49 | echo "✅ Setup complete! You can now run the services:" 50 | echo "" 51 | echo "Option 1 - Run all services at once:" 52 | echo " ./start-all-services.sh" 53 | echo "" 54 | echo "Option 2 - Run services individually:" 55 | echo " 1. Ollama: ollama serve" 56 | echo " 2. Whisper: ./run-whisper.sh" 57 | echo " 3. Piper: ./run-piper.sh" 58 | echo " 4. OpenWakeWord: ./run-openwakeword.sh" 59 | echo "" 60 | echo "🎉 agent-cli has been installed and is ready to use!" 61 | -------------------------------------------------------------------------------- /tests/memory/test_files.py: -------------------------------------------------------------------------------- 1 | """Tests for file-backed memory helpers.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | from agent_cli.memory import _files as mem_files 8 | from agent_cli.memory.models import MemoryMetadata 9 | 10 | if TYPE_CHECKING: 11 | from pathlib import Path 12 | 13 | 14 | def test_write_and_read_memory_file_round_trip(tmp_path: Path) -> None: 15 | """Writes a memory file and reads it back with metadata intact.""" 16 | record = mem_files.write_memory_file( 17 | tmp_path, 18 | conversation_id="conv-1", 19 | role="memory", 20 | created_at="2025-01-01T00:00:00Z", 21 | content="fact about bikes", 22 | ) 23 | 24 | loaded = mem_files.read_memory_file(record.path) 25 | assert loaded is not None 26 | assert loaded.content == "fact about bikes" 27 | assert loaded.metadata.conversation_id == "conv-1" 28 | assert "facts" in loaded.path.parts 29 | 30 | 31 | def test_snapshot_round_trip(tmp_path: Path) -> None: 32 | """Snapshot JSON stores and restores memory records.""" 33 | meta = MemoryMetadata( 34 | conversation_id="c1", 35 | role="memory", 36 | created_at="now", 37 | ) 38 | rec = mem_files.MemoryFileRecord(id="1", path=tmp_path / "p.md", metadata=meta, content="hi") 39 | snapshot = tmp_path / "snap.json" 40 | 41 | mem_files.write_snapshot(snapshot, [rec]) 42 | loaded = mem_files.load_snapshot(snapshot) 43 | 44 | assert "1" in loaded 45 | assert loaded["1"].content == "hi" 46 | 47 | 48 | def test_load_memory_files_skips_invalid(tmp_path: Path) -> None: 49 | """Invalid files without front matter should be ignored.""" 50 | entries_dir = tmp_path / "entries" / "default" 51 | entries_dir.mkdir(parents=True, exist_ok=True) 52 | bad_file = entries_dir / "bad.md" 53 | bad_file.write_text("no front matter here", encoding="utf-8") 54 | 55 | records = mem_files.load_memory_files(tmp_path) 56 | assert records == [] 57 | -------------------------------------------------------------------------------- /scripts/linux-hotkeys/toggle-transcription.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Toggle script for agent-cli transcription on Linux 4 | # 5 | # This script provides a simple toggle mechanism for voice transcription: 6 | # - First invocation: Starts transcription in the background 7 | # - Second invocation: Stops transcription and displays the result 8 | # 9 | # Works across different Linux desktop environments 10 | 11 | # Function to send notification 12 | notify() { 13 | local title="$1" 14 | local message="$2" 15 | local timeout="${3:-3000}" 16 | 17 | if command -v notify-send &> /dev/null; then 18 | notify-send -t "$timeout" "$title" "$message" 19 | elif command -v dunstify &> /dev/null; then 20 | dunstify -t "$timeout" "$title" "$message" 21 | else 22 | echo "$title: $message" 23 | fi 24 | } 25 | 26 | # Function to sync clipboard (Wayland) 27 | sync_clipboard() { 28 | if command -v wl-paste &> /dev/null && command -v wl-copy &> /dev/null; then 29 | wl-paste | wl-copy -p 2>/dev/null || true 30 | fi 31 | } 32 | 33 | # Check if agent-cli transcribe is already running 34 | if pgrep -f "agent-cli transcribe" > /dev/null; then 35 | # Transcription is running - stop it 36 | pkill -INT -f "agent-cli transcribe" 37 | notify "🛑 Transcription Stopped" "Processing results..." 38 | else 39 | # Transcription is not running - start it 40 | 41 | # Ensure agent-cli is in PATH 42 | export PATH="$PATH:$HOME/.local/bin" 43 | 44 | # Notify user that recording has started 45 | notify "🎙️ Transcription Started" "Listening in background..." 46 | 47 | # Start transcription in background 48 | ( 49 | OUTPUT=$(agent-cli transcribe --llm --quiet 2>/dev/null) 50 | if [ -n "$OUTPUT" ]; then 51 | # Sync clipboard to primary selection (Wayland) 52 | sync_clipboard 53 | notify "📄 Transcription Result" "$OUTPUT" 5000 54 | else 55 | notify "❌ Error" "No output" 3000 56 | fi 57 | ) & 58 | fi 59 | -------------------------------------------------------------------------------- /scripts/setup-macos-hotkeys.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | echo "⌨️ Setting up macOS hotkeys..." 6 | 7 | # Check macOS 8 | if [[ "$(uname)" != "Darwin" ]]; then 9 | echo "❌ This script is for macOS only" 10 | exit 1 11 | fi 12 | 13 | # Install dependencies 14 | echo "📦 Installing dependencies..." 15 | if ! command -v brew &> /dev/null; then 16 | echo "🍺 Installing Homebrew..." 17 | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" 18 | fi 19 | 20 | brew install terminal-notifier 21 | brew tap jackielii/tap && brew install jackielii/tap/skhd-zig 22 | 23 | # Setup configuration 24 | echo "⚙️ Setting up configuration..." 25 | mkdir -p ~/.config/skhd 26 | 27 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 28 | TRANSCRIBE_SCRIPT="$SCRIPT_DIR/macos-hotkeys/toggle-transcription.sh" 29 | AUTOCORRECT_SCRIPT="$SCRIPT_DIR/macos-hotkeys/toggle-autocorrect.sh" 30 | VOICE_EDIT_SCRIPT="$SCRIPT_DIR/macos-hotkeys/toggle-voice-edit.sh" 31 | 32 | cat > ~/.config/skhd/skhdrc << EOF 33 | # Agent-CLI Hotkeys 34 | cmd + shift - r : "$TRANSCRIBE_SCRIPT" 35 | cmd + shift - a : "$AUTOCORRECT_SCRIPT" 36 | cmd + shift - v : "$VOICE_EDIT_SCRIPT" 37 | EOF 38 | 39 | # Start service 40 | echo "🚀 Starting skhd..." 41 | skhd --start-service 42 | 43 | # Test 44 | echo "🧪 Testing..." 45 | terminal-notifier -title "⌨️ Setup Complete" -message "Agent-CLI hotkeys ready!" 46 | 47 | echo "" 48 | echo "✅ Done! Hotkeys:" 49 | echo " Cmd+Shift+R - Transcribe voice" 50 | echo " Cmd+Shift+A - Autocorrect clipboard" 51 | echo " Cmd+Shift+V - Voice edit clipboard" 52 | echo "" 53 | echo "If the hotkey doesn't work:" 54 | echo "1. Open System Settings → Privacy & Security → Accessibility" 55 | echo "2. Add and enable 'skhd'" 56 | echo "" 57 | echo "If the notification doesn't show:" 58 | echo "1. Open System Settings → Notifications" 59 | echo "2. Find 'terminal-notifier' and allow notifications" 60 | echo "3. Set Alert style to Persistent for better visibility" 61 | echo "4. Enable 'Allow notification when mirroring or sharing the display'" 62 | -------------------------------------------------------------------------------- /agent_cli/services/_wyoming_utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for Wyoming protocol interactions to eliminate code duplication.""" 2 | 3 | from __future__ import annotations 4 | 5 | from contextlib import asynccontextmanager 6 | from typing import TYPE_CHECKING 7 | 8 | from wyoming.client import AsyncClient 9 | 10 | from agent_cli.core.utils import print_error_message 11 | 12 | if TYPE_CHECKING: 13 | import logging 14 | from collections.abc import AsyncGenerator 15 | 16 | 17 | @asynccontextmanager 18 | async def wyoming_client_context( 19 | server_ip: str, 20 | server_port: int, 21 | server_type: str, 22 | logger: logging.Logger, 23 | *, 24 | quiet: bool = False, 25 | ) -> AsyncGenerator[AsyncClient, None]: 26 | """Context manager for Wyoming client connections with unified error handling. 27 | 28 | Args: 29 | server_ip: Wyoming server IP 30 | server_port: Wyoming server port 31 | server_type: Type of server (e.g., "ASR", "TTS", "wake word") 32 | logger: Logger instance 33 | quiet: If True, suppress console error messages 34 | 35 | Yields: 36 | Connected Wyoming client 37 | 38 | Raises: 39 | ConnectionRefusedError: If connection fails 40 | Exception: For other connection errors 41 | 42 | """ 43 | uri = f"tcp://{server_ip}:{server_port}" 44 | logger.info("Connecting to Wyoming %s server at %s", server_type, uri) 45 | 46 | try: 47 | async with AsyncClient.from_uri(uri) as client: 48 | logger.info("%s connection established", server_type) 49 | yield client 50 | except ConnectionRefusedError: 51 | logger.exception("%s connection refused.", server_type) 52 | if not quiet: 53 | print_error_message( 54 | f"{server_type} connection refused.", 55 | f"Is the Wyoming {server_type.lower()} server running at {uri}?", 56 | ) 57 | raise 58 | except Exception as e: 59 | logger.exception("An error occurred during %s connection", server_type.lower()) 60 | if not quiet: 61 | print_error_message(f"{server_type} error: {e}") 62 | raise 63 | -------------------------------------------------------------------------------- /agent_cli/core/watch.py: -------------------------------------------------------------------------------- 1 | """Shared watchfiles helper.""" 2 | 3 | from __future__ import annotations 4 | 5 | import asyncio 6 | from collections.abc import Callable 7 | from pathlib import Path 8 | 9 | from watchfiles import Change, awatch 10 | 11 | ChangeHandler = Callable[[Change, Path], None] 12 | PathFilter = Callable[[Path, Path], bool] 13 | 14 | 15 | def _default_skip_hidden(path: Path, root: Path) -> bool: 16 | """Default filter that skips hidden files and directories.""" 17 | rel_parts = path.relative_to(root).parts 18 | return any(part.startswith(".") for part in rel_parts) 19 | 20 | 21 | async def watch_directory( 22 | root: Path, 23 | handler: ChangeHandler, 24 | *, 25 | skip_hidden: bool = True, 26 | ignore_filter: PathFilter | None = None, 27 | use_executor: bool = True, 28 | ) -> None: 29 | """Watch a directory for file changes and invoke handler(change, path). 30 | 31 | Args: 32 | root: The directory to watch. 33 | handler: Callback invoked with (change_type, path) for each file change. 34 | skip_hidden: If True, skip files/dirs starting with '.'. Ignored if 35 | ignore_filter is provided. 36 | ignore_filter: Optional custom filter function(path, root) -> bool. 37 | Returns True if the path should be ignored. Overrides skip_hidden. 38 | use_executor: If True, run handler in a thread pool executor. 39 | 40 | """ 41 | loop = asyncio.get_running_loop() 42 | 43 | # Determine which filter to use 44 | if ignore_filter is not None: 45 | should_skip = ignore_filter 46 | elif skip_hidden: 47 | should_skip = _default_skip_hidden 48 | else: 49 | should_skip = None 50 | 51 | async for changes in awatch(root): 52 | for change_type, file_path_str in changes: 53 | path = Path(file_path_str) 54 | if path.is_dir(): 55 | continue 56 | 57 | if should_skip is not None and should_skip(path, root): 58 | continue 59 | 60 | if use_executor: 61 | await loop.run_in_executor(None, handler, change_type, path) 62 | else: 63 | handler(change_type, path) 64 | -------------------------------------------------------------------------------- /tests/agents/test_tts_common_extra.py: -------------------------------------------------------------------------------- 1 | """Extra tests for the TTS common module.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from unittest.mock import AsyncMock, MagicMock, patch 7 | 8 | import pytest 9 | 10 | from agent_cli import config 11 | from agent_cli.services.tts import _save_audio_file, handle_tts_playback 12 | 13 | 14 | @pytest.mark.asyncio 15 | @patch("agent_cli.services.tts.asyncio.to_thread") 16 | async def test_save_audio_file_os_error(mock_to_thread: AsyncMock) -> None: 17 | """Test _save_audio_file with OSError.""" 18 | mock_to_thread.side_effect = OSError("Permission denied") 19 | 20 | await _save_audio_file( 21 | b"audio data", 22 | Path("test.wav"), 23 | quiet=False, 24 | logger=MagicMock(), 25 | ) 26 | 27 | mock_to_thread.assert_called_once() 28 | 29 | 30 | @pytest.mark.asyncio 31 | @patch("agent_cli.services.tts._speak_text", new_callable=AsyncMock) 32 | async def test_handle_tts_playback_os_error(mock_speak_text: AsyncMock) -> None: 33 | """Test handle_tts_playback with OSError.""" 34 | mock_speak_text.side_effect = OSError("Connection error") 35 | mock_live = MagicMock() 36 | 37 | provider_cfg = config.ProviderSelection( 38 | tts_provider="wyoming", 39 | asr_provider="wyoming", 40 | llm_provider="ollama", 41 | ) 42 | audio_out_cfg = config.AudioOutput(enable_tts=True) 43 | wyoming_tts_cfg = config.WyomingTTS(tts_wyoming_ip="localhost", tts_wyoming_port=1234) 44 | openai_tts_cfg = config.OpenAITTS(tts_openai_model="tts-1", tts_openai_voice="alloy") 45 | kokoro_tts_cfg = config.KokoroTTS( 46 | tts_kokoro_model="tts-1", 47 | tts_kokoro_voice="alloy", 48 | tts_kokoro_host="http://localhost:8000/v1", 49 | ) 50 | 51 | result = await handle_tts_playback( 52 | text="hello", 53 | provider_cfg=provider_cfg, 54 | audio_output_cfg=audio_out_cfg, 55 | wyoming_tts_cfg=wyoming_tts_cfg, 56 | openai_tts_cfg=openai_tts_cfg, 57 | kokoro_tts_cfg=kokoro_tts_cfg, 58 | save_file=None, 59 | quiet=False, 60 | logger=MagicMock(), 61 | live=mock_live, 62 | ) 63 | 64 | assert result is None 65 | -------------------------------------------------------------------------------- /agent_cli/rag/_indexer.py: -------------------------------------------------------------------------------- 1 | """File watcher and indexing logic using watchfiles.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | from typing import TYPE_CHECKING 7 | 8 | from watchfiles import Change 9 | 10 | from agent_cli.core.watch import watch_directory 11 | from agent_cli.rag._indexing import index_file, remove_file 12 | from agent_cli.rag._utils import should_ignore_path 13 | 14 | if TYPE_CHECKING: 15 | from pathlib import Path 16 | 17 | from chromadb import Collection 18 | 19 | LOGGER = logging.getLogger(__name__) 20 | 21 | 22 | async def watch_docs( 23 | collection: Collection, 24 | docs_folder: Path, 25 | file_hashes: dict[str, str], 26 | file_mtimes: dict[str, float], 27 | ) -> None: 28 | """Watch docs folder for changes and update index asynchronously.""" 29 | LOGGER.info("📁 Watching folder: %s", docs_folder) 30 | 31 | await watch_directory( 32 | docs_folder, 33 | lambda change, path: _handle_change( 34 | change, 35 | path, 36 | collection, 37 | docs_folder, 38 | file_hashes, 39 | file_mtimes, 40 | ), 41 | ignore_filter=should_ignore_path, 42 | ) 43 | 44 | 45 | def _handle_change( 46 | change: Change, 47 | file_path: Path, 48 | collection: Collection, 49 | docs_folder: Path, 50 | file_hashes: dict[str, str], 51 | file_mtimes: dict[str, float], 52 | ) -> None: 53 | try: 54 | if change == Change.deleted: 55 | LOGGER.info("[deleted] Removing from index: %s", file_path.name) 56 | remove_file(collection, docs_folder, file_path, file_hashes, file_mtimes) 57 | return 58 | if change in {Change.added, Change.modified} and file_path.is_file(): 59 | action = "created" if change == Change.added else "modified" 60 | LOGGER.info("[%s] Indexing: %s", action, file_path.name) 61 | index_file(collection, docs_folder, file_path, file_hashes, file_mtimes) 62 | except (OSError, UnicodeDecodeError): 63 | LOGGER.warning("Watcher handler transient IO error for %s", file_path, exc_info=True) 64 | except Exception: 65 | LOGGER.exception("Watcher handler failed for %s", file_path) 66 | raise 67 | -------------------------------------------------------------------------------- /scripts/linux-hotkeys/README.md: -------------------------------------------------------------------------------- 1 | # Linux Hotkeys 2 | 3 | System-wide hotkeys for agent-cli voice AI features on Linux. 4 | 5 | ## Setup 6 | 7 | ```bash 8 | ./setup-linux-hotkeys.sh 9 | ``` 10 | 11 | The setup script will: 12 | 1. Install notification support if missing 13 | 2. Show you the exact hotkey bindings to add to your desktop environment 14 | 3. Provide copy-paste ready configuration for popular desktop environments 15 | 16 | ## Usage 17 | 18 | - **`Super+Shift+R`** → Toggle voice transcription (start/stop with result) 19 | - **`Super+Shift+A`** → Autocorrect clipboard text 20 | - **`Super+Shift+V`** → Toggle voice edit mode for clipboard 21 | 22 | Results appear in notifications and clipboard. 23 | 24 | ## Desktop Environment Support 25 | 26 | The setup script provides copy-paste ready instructions for: 27 | 28 | - **Hyprland**: Add bindings to `~/.config/hypr/hyprland.conf` 29 | - **Sway**: Add bindings to `~/.config/sway/config` 30 | - **i3**: Add bindings to `~/.config/i3/config` 31 | - **GNOME**: Use Settings → Keyboard → Custom Shortcuts 32 | - **KDE**: Use System Settings → Shortcuts → Custom Shortcuts 33 | - **XFCE**: Use Settings Manager → Keyboard → Application Shortcuts 34 | - **Other**: Manual hotkey configuration in your desktop environment 35 | 36 | ## Features 37 | 38 | - **Manual configuration**: Simple setup with clear instructions for each desktop environment 39 | - **Wayland support**: Includes clipboard syncing for Wayland compositors 40 | - **Fallback notifications**: Uses `notify-send`, `dunstify`, or console output 41 | - **Error handling**: Shows notifications for both success and failure cases 42 | - **PATH handling**: Scripts automatically find agent-cli installation 43 | 44 | ## Troubleshooting 45 | 46 | **Hotkeys not working?** 47 | - Check your desktop's keyboard shortcut settings for conflicts 48 | - Make sure you added the bindings to your desktop environment's config 49 | - Verify the script paths are correct 50 | 51 | **No notifications?** 52 | ```bash 53 | sudo apt install libnotify-bin # Ubuntu/Debian 54 | sudo dnf install libnotify # Fedora/RHEL 55 | sudo pacman -S libnotify # Arch 56 | ``` 57 | 58 | **Services not running?** 59 | ```bash 60 | ./start-all-services.sh 61 | ``` 62 | 63 | That's it! System-wide hotkeys for agent-cli on Linux. 64 | -------------------------------------------------------------------------------- /agent_cli/memory/_filters.py: -------------------------------------------------------------------------------- 1 | """Filter conversion utilities for ChromaDB.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any 6 | 7 | 8 | def _convert_condition(key: str, value: Any) -> dict[str, Any] | None: 9 | """Convert a single filter condition to ChromaDB format.""" 10 | if isinstance(value, dict): 11 | # Operator dict: {"gte": 10} → {"$gte": 10} 12 | for op, val in value.items(): 13 | chroma_op = f"${op}" if not op.startswith("$") else op 14 | return {key: {chroma_op: val}} 15 | return None 16 | # Simple equality 17 | return {key: {"$eq": value}} 18 | 19 | 20 | def _process_or(conditions: list[dict[str, Any]]) -> dict[str, Any] | None: 21 | """Process $or conditions.""" 22 | or_conditions = [] 23 | for cond in conditions: 24 | for sub_key, sub_val in cond.items(): 25 | converted = _convert_condition(sub_key, sub_val) 26 | if converted: 27 | or_conditions.append(converted) 28 | if len(or_conditions) > 1: 29 | return {"$or": or_conditions} 30 | if or_conditions: 31 | return or_conditions[0] 32 | return None 33 | 34 | 35 | def to_chroma_where(filters: dict[str, Any] | None) -> dict[str, Any] | None: 36 | """Convert universal filter format to ChromaDB WHERE clause. 37 | 38 | Supports: 39 | - Simple equality: {"role": "user"} → {"role": {"$eq": "user"}} 40 | - Operators: {"created_at": {"gte": "2024-01-01"}} → {"created_at": {"$gte": "2024-01-01"}} 41 | - Logical OR: {"$or": [{"role": "user"}, {"role": "assistant"}]} 42 | 43 | Operators: eq, ne, gt, gte, lt, lte, in, nin 44 | """ 45 | if not filters: 46 | return None 47 | 48 | processed: list[dict[str, Any]] = [] 49 | for key, value in filters.items(): 50 | if key == "$or": 51 | or_result = _process_or(value) 52 | if or_result: 53 | processed.append(or_result) 54 | elif not key.startswith("$"): 55 | converted = _convert_condition(key, value) 56 | if converted: 57 | processed.append(converted) 58 | 59 | if not processed: 60 | return None 61 | if len(processed) == 1: 62 | return processed[0] 63 | return {"$and": processed} 64 | -------------------------------------------------------------------------------- /tests/test_wyoming_utils.py: -------------------------------------------------------------------------------- 1 | """Tests for the Wyoming utilities.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | from unittest.mock import AsyncMock, MagicMock, patch 7 | 8 | import pytest 9 | from wyoming.client import AsyncClient 10 | 11 | from agent_cli.services._wyoming_utils import wyoming_client_context 12 | 13 | 14 | @pytest.mark.asyncio 15 | async def test_wyoming_client_context_success(): 16 | """Test that the Wyoming client context manager connects successfully.""" 17 | mock_client = AsyncMock(spec=AsyncClient) 18 | with patch( 19 | "agent_cli.services._wyoming_utils.AsyncClient.from_uri", 20 | return_value=MagicMock( 21 | __aenter__=AsyncMock(return_value=mock_client), 22 | __aexit__=AsyncMock(return_value=None), 23 | ), 24 | ): 25 | async with wyoming_client_context("localhost", 1234, "Test", logging.getLogger()) as client: 26 | assert client is mock_client 27 | 28 | 29 | @pytest.mark.asyncio 30 | async def test_wyoming_client_context_connection_refused( 31 | caplog: pytest.LogCaptureFixture, 32 | ): 33 | """Test that a ConnectionRefusedError is handled correctly.""" 34 | with ( 35 | patch( 36 | "agent_cli.services._wyoming_utils.AsyncClient.from_uri", 37 | side_effect=ConnectionRefusedError, 38 | ), 39 | pytest.raises(ConnectionRefusedError), 40 | ): 41 | async with wyoming_client_context("localhost", 1234, "Test", logging.getLogger()): 42 | pass # This part should not be reached 43 | 44 | assert "Test connection refused" in caplog.text 45 | 46 | 47 | @pytest.mark.asyncio 48 | async def test_wyoming_client_context_generic_exception( 49 | caplog: pytest.LogCaptureFixture, 50 | ): 51 | """Test that a generic Exception is handled correctly.""" 52 | with ( 53 | patch( 54 | "agent_cli.services._wyoming_utils.AsyncClient.from_uri", 55 | side_effect=RuntimeError("Something went wrong"), 56 | ), 57 | pytest.raises(RuntimeError), 58 | ): 59 | async with wyoming_client_context("localhost", 1234, "Test", logging.getLogger()): 60 | pass # This part should not be reached 61 | 62 | assert "An error occurred during test connection" in caplog.text 63 | -------------------------------------------------------------------------------- /agent_cli/core/transcription_logger.py: -------------------------------------------------------------------------------- 1 | """Transcription logging utilities for automatic server-side logging.""" 2 | 3 | from __future__ import annotations 4 | 5 | import json 6 | import logging 7 | from datetime import UTC, datetime 8 | from pathlib import Path 9 | from typing import Any 10 | 11 | 12 | class TranscriptionLogger: 13 | """Handles automatic logging of transcription results with timestamps.""" 14 | 15 | def __init__(self, log_file: Path | str | None = None) -> None: 16 | """Initialize the transcription logger. 17 | 18 | Args: 19 | log_file: Path to the log file. If None, uses default location. 20 | 21 | """ 22 | if log_file is None: 23 | log_file = Path.cwd() / "transcription_log.json" 24 | elif isinstance(log_file, str): 25 | log_file = Path(log_file) 26 | 27 | self.log_file = log_file 28 | 29 | # Ensure the log directory exists 30 | self.log_file.parent.mkdir(parents=True, exist_ok=True) 31 | 32 | def log_transcription( 33 | self, 34 | *, 35 | raw: str, 36 | processed: str | None = None, 37 | ) -> None: 38 | """Log a transcription result. 39 | 40 | Args: 41 | raw: The raw transcript from ASR. 42 | processed: The processed transcript from LLM. 43 | 44 | """ 45 | log_entry: dict[str, Any] = { 46 | "timestamp": datetime.now(UTC).isoformat(), 47 | "raw": raw, 48 | "processed": processed, 49 | } 50 | 51 | # Write to log file as JSON Lines format 52 | try: 53 | with self.log_file.open("a", encoding="utf-8") as f: 54 | f.write(json.dumps(log_entry, ensure_ascii=False) + "\n") 55 | except OSError: 56 | # Use Python's logging module to log errors with the logger itself 57 | logger = logging.getLogger(__name__) 58 | logger.exception("Failed to write transcription log") 59 | 60 | 61 | # Default logger instance 62 | _default_logger: TranscriptionLogger | None = None 63 | 64 | 65 | def get_default_logger() -> TranscriptionLogger: 66 | """Get the default transcription logger instance.""" 67 | global _default_logger 68 | if _default_logger is None: 69 | _default_logger = TranscriptionLogger() 70 | return _default_logger 71 | -------------------------------------------------------------------------------- /tests/memory/test_indexer.py: -------------------------------------------------------------------------------- 1 | """Indexer and watcher tests for file-based memory.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any 6 | 7 | from watchfiles import Change 8 | 9 | from agent_cli.memory import _files as mem_files 10 | from agent_cli.memory import _indexer 11 | 12 | 13 | class _FakeCollection: 14 | def __init__(self) -> None: 15 | self.upserts: list[tuple[list[str], list[str], list[dict[str, Any]]]] = [] 16 | self.deleted: list[list[str]] = [] 17 | 18 | def upsert(self, ids: list[str], documents: list[str], metadatas: list[dict[str, Any]]) -> None: 19 | self.upserts.append((ids, documents, metadatas)) 20 | 21 | def delete(self, ids: list[str]) -> None: 22 | self.deleted.append(ids) 23 | 24 | 25 | def test_initial_index_deletes_stale_and_indexes_current(tmp_path: Any) -> None: 26 | fake = _FakeCollection() 27 | idx = _indexer.MemoryIndex.from_snapshot(tmp_path / "memory_index.json") 28 | idx.entries["stale"] = mem_files.MemoryFileRecord( 29 | id="stale", 30 | path=tmp_path / "entries" / "default" / "stale.md", 31 | metadata=mem_files.MemoryMetadata(conversation_id="c", role="memory", created_at="now"), # type: ignore[attr-defined] 32 | content="old", 33 | ) 34 | 35 | rec = mem_files.write_memory_file( 36 | tmp_path, 37 | conversation_id="c", 38 | role="memory", 39 | created_at="now", 40 | content="fresh", 41 | ) 42 | 43 | _indexer.initial_index(fake, tmp_path, index=idx) 44 | 45 | assert fake.deleted == [["stale"]] 46 | assert fake.upserts # fresh file indexed 47 | assert rec.id in idx.entries 48 | 49 | 50 | def test_handle_change_add_modify_delete(tmp_path: Any) -> None: 51 | fake = _FakeCollection() 52 | idx = _indexer.MemoryIndex(snapshot_path=None) 53 | 54 | rec = mem_files.write_memory_file( 55 | tmp_path, 56 | conversation_id="c", 57 | role="memory", 58 | created_at="now", 59 | content="hello", 60 | ) 61 | 62 | _indexer._handle_change(Change.added, rec.path, fake, idx) 63 | assert fake.upserts 64 | assert rec.id in idx.entries 65 | 66 | _indexer._handle_change(Change.modified, rec.path, fake, idx) 67 | assert len(fake.upserts) >= 2 68 | 69 | _indexer._handle_change(Change.deleted, rec.path, fake, idx) 70 | assert fake.deleted 71 | assert rec.id not in idx.entries 72 | -------------------------------------------------------------------------------- /tests/agents/test_wake_word_assistant.py: -------------------------------------------------------------------------------- 1 | """Tests for the wake word assistant agent.""" 2 | 3 | from __future__ import annotations 4 | 5 | from unittest.mock import MagicMock, patch 6 | 7 | from typer.testing import CliRunner 8 | 9 | from agent_cli.cli import app 10 | 11 | runner = CliRunner(env={"NO_COLOR": "1", "TERM": "dumb"}) 12 | 13 | 14 | def test_assistant_help(): 15 | """Test the assistant --help command.""" 16 | result = runner.invoke(app, ["assistant", "--help"], env={"NO_COLOR": "1", "TERM": "dumb"}) 17 | assert result.exit_code == 0 18 | assert "Usage: agent-cli assistant [OPTIONS]" in result.stdout 19 | 20 | 21 | @patch("agent_cli.agents.assistant.asyncio.run") 22 | def test_assistant_command(mock_asyncio_run: MagicMock): 23 | """Test the assistant command.""" 24 | result = runner.invoke(app, ["assistant"]) 25 | assert result.exit_code == 0 26 | mock_asyncio_run.assert_called_once() 27 | 28 | 29 | @patch("agent_cli.agents.assistant.stop_or_status_or_toggle") 30 | def test_assistant_stop(mock_stop_or_status_or_toggle: MagicMock): 31 | """Test the assistant --stop command.""" 32 | result = runner.invoke(app, ["assistant", "--stop"]) 33 | assert result.exit_code == 0 34 | mock_stop_or_status_or_toggle.assert_called_once_with( 35 | "assistant", 36 | "wake word assistant", 37 | True, 38 | False, 39 | False, 40 | quiet=False, 41 | ) 42 | 43 | 44 | @patch("agent_cli.agents.assistant.stop_or_status_or_toggle") 45 | def test_assistant_status(mock_stop_or_status_or_toggle: MagicMock): 46 | """Test the assistant --status command.""" 47 | result = runner.invoke(app, ["assistant", "--status"]) 48 | assert result.exit_code == 0 49 | mock_stop_or_status_or_toggle.assert_called_once_with( 50 | "assistant", 51 | "wake word assistant", 52 | False, 53 | True, 54 | False, 55 | quiet=False, 56 | ) 57 | 58 | 59 | @patch("agent_cli.agents.assistant.stop_or_status_or_toggle") 60 | def test_assistant_toggle(mock_stop_or_status_or_toggle: MagicMock): 61 | """Test the assistant --toggle command.""" 62 | result = runner.invoke(app, ["assistant", "--toggle"]) 63 | assert result.exit_code == 0 64 | mock_stop_or_status_or_toggle.assert_called_once_with( 65 | "assistant", 66 | "wake word assistant", 67 | False, 68 | False, 69 | True, 70 | quiet=False, 71 | ) 72 | -------------------------------------------------------------------------------- /tests/mocks/audio.py: -------------------------------------------------------------------------------- 1 | """Mock SoundDevice for testing audio functionality without real hardware.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any, Self 6 | 7 | import numpy as np 8 | 9 | 10 | class MockSoundDeviceStream: 11 | """Mock sounddevice stream for testing.""" 12 | 13 | def __init__(self, *args: Any, **kwargs: Any) -> None: 14 | """Initialize mock audio stream.""" 15 | self.args = args 16 | self.kwargs = kwargs 17 | self.is_input = kwargs.get("input", False) or isinstance(self, MockInputStream) 18 | self.is_output = kwargs.get("output", False) or isinstance(self, MockOutputStream) 19 | self.written_data: list[bytes] = [] 20 | self.active = False 21 | self._closed = False 22 | 23 | def start(self) -> None: 24 | """Start the mock stream.""" 25 | self.active = True 26 | 27 | def stop(self) -> None: 28 | """Stop the mock stream.""" 29 | self.active = False 30 | 31 | def close(self) -> None: 32 | """Close the mock stream.""" 33 | self._closed = True 34 | self.active = False 35 | 36 | def read(self, frames: int) -> tuple[np.ndarray, bool]: 37 | """Simulate reading from audio input device. 38 | 39 | Returns: 40 | tuple: (data, overflow) 41 | 42 | """ 43 | dtype = self.kwargs.get("dtype", "int16") 44 | channels = self.kwargs.get("channels", 1) 45 | 46 | shape = (frames, channels) if channels > 1 else (frames,) 47 | 48 | if dtype == "int16": 49 | data = np.full(shape, 1, dtype=np.int16) 50 | else: 51 | data = np.zeros(shape, dtype=np.float32) 52 | 53 | return data, False 54 | 55 | def write(self, data: np.ndarray) -> None: 56 | """Simulate writing to audio output device.""" 57 | # data is numpy array 58 | self.written_data.append(data.tobytes()) 59 | 60 | def get_written_data(self) -> bytes: 61 | """Get all written data concatenated.""" 62 | return b"".join(self.written_data) 63 | 64 | def __enter__(self) -> Self: 65 | """Context manager entry.""" 66 | self.start() 67 | return self 68 | 69 | def __exit__(self, *args: object) -> None: 70 | """Context manager exit.""" 71 | self.close() 72 | 73 | 74 | class MockInputStream(MockSoundDeviceStream): 75 | """Mock input stream.""" 76 | 77 | 78 | class MockOutputStream(MockSoundDeviceStream): 79 | """Mock output stream.""" 80 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Shared test fixtures and configuration.""" 2 | 3 | from __future__ import annotations 4 | 5 | import asyncio 6 | import contextlib 7 | import io 8 | import logging 9 | 10 | import pytest 11 | from rich.console import Console 12 | 13 | 14 | def pytest_collection_modifyitems(items: list[pytest.Item]) -> None: 15 | """Set default timeout for all tests.""" 16 | for item in items: 17 | with contextlib.suppress(AttributeError): 18 | item.add_marker(pytest.mark.timeout(3)) 19 | 20 | 21 | @pytest.fixture 22 | def mock_console() -> Console: 23 | """Provide a console that writes to a StringIO for testing.""" 24 | return Console(file=io.StringIO(), width=80, force_terminal=True) 25 | 26 | 27 | @pytest.fixture 28 | def mock_logger() -> logging.Logger: 29 | """Provide a mock logger for testing.""" 30 | logger = logging.getLogger("test") 31 | logger.setLevel(logging.DEBUG) 32 | return logger 33 | 34 | 35 | @pytest.fixture 36 | def stop_event() -> asyncio.Event: 37 | """Provide an asyncio event for stopping operations.""" 38 | return asyncio.Event() 39 | 40 | 41 | @pytest.fixture 42 | def timeout_seconds() -> float: 43 | """Default timeout for async operations in tests.""" 44 | return 5.0 45 | 46 | 47 | @pytest.fixture 48 | def mock_audio_device_info() -> list[dict]: 49 | """Mock audio device info for testing.""" 50 | return [ 51 | { 52 | "index": 0, 53 | "name": "Mock Input Device", 54 | "max_input_channels": 2, 55 | "max_output_channels": 0, 56 | "default_samplerate": 44100.0, 57 | }, 58 | { 59 | "index": 1, 60 | "name": "Mock Output Device", 61 | "max_input_channels": 0, 62 | "max_output_channels": 2, 63 | "default_samplerate": 44100.0, 64 | }, 65 | { 66 | "index": 2, 67 | "name": "Mock Combined Device", 68 | "max_input_channels": 2, 69 | "max_output_channels": 2, 70 | "default_samplerate": 44100.0, 71 | }, 72 | ] 73 | 74 | 75 | @pytest.fixture 76 | def llm_responses() -> dict[str, str]: 77 | """Predefined LLM responses for testing.""" 78 | return { 79 | "correct": "This text has been corrected and improved.", 80 | "hello": "Hello! How can I help you today?", 81 | "question": "The meaning of life is 42, according to The Hitchhiker's Guide to the Galaxy.", 82 | "default": "I understand your request and here is my response.", 83 | } 84 | -------------------------------------------------------------------------------- /scripts/nvidia-asr-server/README.md: -------------------------------------------------------------------------------- 1 | # NVIDIA ASR Server 2 | 3 | OpenAI-compatible API server for NVIDIA ASR models. 4 | 5 | ## Quick Start 6 | 7 | ```bash 8 | cd scripts/nvidia-asr-server 9 | uv run server.py 10 | ``` 11 | 12 | Server runs at `http://localhost:9898` 13 | 14 | ## CLI Options 15 | 16 | - `--model`, `-m`: Model to use (default: `canary-qwen-2.5b`) 17 | - `canary-qwen-2.5b`: Multilingual ASR (~5GB VRAM) 18 | - `parakeet-tdt-0.6b-v2`: English with timestamps (~2GB VRAM) 19 | - `--port`, `-p`: Port (default: 9898) 20 | - `--device`, `-d`: Device (default: auto-select best GPU) 21 | 22 | ```bash 23 | # Examples 24 | uv run server.py --model parakeet-tdt-0.6b-v2 25 | uv run server.py -m parakeet-tdt-0.6b-v2 -p 9090 -d cuda:1 26 | ``` 27 | 28 | ## Using with Agent-CLI 29 | 30 | ```bash 31 | # Start server 32 | cd scripts/nvidia-asr-server 33 | uv run server.py 34 | 35 | # In another terminal 36 | agent-cli transcribe \ 37 | --asr-provider openai \ 38 | --asr-openai-base-url http://localhost:9898/v1 39 | ``` 40 | 41 | **Note**: The `/v1` suffix is required for OpenAI compatibility. 42 | 43 | ## API Usage 44 | 45 | ### Python Example 46 | 47 | ```python 48 | import requests 49 | 50 | with open("audio.wav", "rb") as f: 51 | response = requests.post( 52 | "http://localhost:9898/v1/audio/transcriptions", 53 | files={"file": f}, 54 | data={"model": "parakeet-tdt-0.6b-v2"} 55 | ) 56 | 57 | print(response.json()["text"]) 58 | ``` 59 | 60 | ### With Timestamps (Parakeet only) 61 | 62 | ```python 63 | response = requests.post( 64 | "http://localhost:9898/v1/audio/transcriptions", 65 | files={"file": open("audio.wav", "rb")}, 66 | data={ 67 | "model": "parakeet-tdt-0.6b-v2", 68 | "timestamp_granularities": ["word"] 69 | } 70 | ) 71 | 72 | result = response.json() 73 | for word in result.get("words", []): 74 | print(f"{word['start']:.2f}s - {word['end']:.2f}s: {word['word']}") 75 | ``` 76 | 77 | ## Requirements 78 | 79 | - Python 3.13+ 80 | - CUDA-compatible GPU (recommended) 81 | - ~2-5GB VRAM depending on model 82 | 83 | ## Troubleshooting 84 | 85 | **GPU out of memory**: Try smaller model or CPU 86 | ```bash 87 | uv run server.py --model parakeet-tdt-0.6b-v2 88 | uv run server.py --device cpu 89 | ``` 90 | 91 | **Port in use**: Change port 92 | ```bash 93 | uv run server.py --port 9999 94 | ``` 95 | 96 | ## License 97 | 98 | - Canary: NVIDIA AI Foundation Models Community License 99 | - Parakeet: CC-BY-4.0 100 | -------------------------------------------------------------------------------- /agent_cli/core/sse.py: -------------------------------------------------------------------------------- 1 | """Shared SSE (Server-Sent Events) formatting helpers for OpenAI-compatible streaming.""" 2 | 3 | from __future__ import annotations 4 | 5 | import json 6 | import time 7 | from typing import Any 8 | 9 | 10 | def format_chunk( 11 | run_id: str, 12 | model: str, 13 | *, 14 | content: str | None = None, 15 | finish_reason: str | None = None, 16 | extra: dict[str, Any] | None = None, 17 | ) -> str: 18 | """Format a single SSE chunk in OpenAI chat.completion.chunk format. 19 | 20 | Args: 21 | run_id: Unique identifier for this completion. 22 | model: Model name to include in response. 23 | content: Text content delta (None for finish chunk). 24 | finish_reason: Reason for completion (e.g., "stop"). 25 | extra: Additional fields to include in the response. 26 | 27 | Returns: 28 | Formatted SSE data line. 29 | 30 | """ 31 | data: dict[str, Any] = { 32 | "id": f"chatcmpl-{run_id}", 33 | "object": "chat.completion.chunk", 34 | "created": int(time.time()), 35 | "model": model, 36 | "choices": [ 37 | { 38 | "index": 0, 39 | "delta": {"content": content} if content else {}, 40 | "finish_reason": finish_reason, 41 | }, 42 | ], 43 | } 44 | if extra: 45 | data.update(extra) 46 | return f"data: {json.dumps(data)}\n\n" 47 | 48 | 49 | def format_done() -> str: 50 | """Format the terminal [DONE] SSE message.""" 51 | return "data: [DONE]\n\n" 52 | 53 | 54 | def parse_chunk(line: str) -> dict[str, Any] | None: 55 | """Parse an SSE data line into a dict. 56 | 57 | Args: 58 | line: Raw SSE line (e.g., "data: {...}"). 59 | 60 | Returns: 61 | Parsed JSON dict, or None if not parseable or [DONE]. 62 | 63 | """ 64 | if not line.startswith("data:"): 65 | return None 66 | payload = line[5:].strip() 67 | if payload == "[DONE]": 68 | return None 69 | try: 70 | return json.loads(payload) 71 | except json.JSONDecodeError: 72 | return None 73 | 74 | 75 | def extract_content_from_chunk(chunk: dict[str, Any]) -> str: 76 | """Extract text content from a parsed SSE chunk. 77 | 78 | Args: 79 | chunk: Parsed chunk dict from parse_chunk(). 80 | 81 | Returns: 82 | Content string, or empty string if not found. 83 | 84 | """ 85 | choices = chunk.get("choices") or [{}] 86 | delta = choices[0].get("delta") or {} 87 | return delta.get("content") or delta.get("text") or "" 88 | -------------------------------------------------------------------------------- /tests/rag/test_history.py: -------------------------------------------------------------------------------- 1 | """Test history preservation in RAG engine.""" 2 | 3 | from pathlib import Path 4 | from unittest.mock import AsyncMock, MagicMock, patch 5 | 6 | import pytest 7 | from pydantic_ai.messages import ModelRequest, ModelResponse 8 | 9 | from agent_cli.rag import engine 10 | from agent_cli.rag.models import ChatRequest, Message 11 | 12 | 13 | @pytest.mark.asyncio 14 | async def test_process_chat_request_preserves_history(tmp_path: Path) -> None: 15 | """Test that conversation history is correctly passed to the agent.""" 16 | mock_collection = MagicMock() 17 | mock_reranker = MagicMock() 18 | 19 | # Mock Agent Run 20 | mock_run_result = MagicMock() 21 | mock_run_result.output = "Response" 22 | mock_run_result.run_id = "test-id" 23 | mock_run_result.usage.return_value = None 24 | 25 | with ( 26 | patch("pydantic_ai.Agent.run", new_callable=AsyncMock) as mock_run, 27 | patch("agent_cli.rag.engine.search_context") as mock_search, 28 | ): 29 | mock_run.return_value = mock_run_result 30 | mock_search.return_value = MagicMock(context="") # No RAG context for this test 31 | 32 | # Create a multi-turn conversation 33 | messages = [ 34 | Message(role="system", content="System prompt"), 35 | Message(role="user", content="Question 1"), 36 | Message(role="assistant", content="Answer 1"), 37 | Message(role="user", content="Question 2"), 38 | ] 39 | req = ChatRequest(model="test", messages=messages) 40 | 41 | await engine.process_chat_request( 42 | req, 43 | mock_collection, 44 | mock_reranker, 45 | "http://mock", 46 | docs_folder=tmp_path, 47 | ) 48 | 49 | # Verify Agent.run was called 50 | mock_run.assert_called_once() 51 | 52 | # Check arguments 53 | call_args = mock_run.call_args 54 | # positional args: prompt (user_prompt) 55 | prompt = call_args[0][0] 56 | assert prompt == "Question 2" 57 | 58 | # keyword args: message_history 59 | history = call_args[1]["message_history"] 60 | assert len(history) == 3 61 | 62 | # Verify types and content of history 63 | assert isinstance(history[0], ModelRequest) 64 | assert history[0].parts[0].content == "System prompt" 65 | 66 | assert isinstance(history[1], ModelRequest) 67 | assert history[1].parts[0].content == "Question 1" 68 | 69 | assert isinstance(history[2], ModelResponse) 70 | assert history[2].parts[0].content == "Answer 1" 71 | -------------------------------------------------------------------------------- /agent_cli/cli.py: -------------------------------------------------------------------------------- 1 | """Shared CLI functionality for the Agent CLI tools.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Annotated 6 | 7 | import typer 8 | 9 | from . import __version__ 10 | from .config import load_config, normalize_provider_defaults 11 | from .core.utils import console 12 | 13 | app = typer.Typer( 14 | name="agent-cli", 15 | help="A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.", 16 | add_completion=True, 17 | context_settings={"help_option_names": ["-h", "--help"]}, 18 | rich_markup_mode="markdown", 19 | ) 20 | 21 | 22 | def _version_callback(value: bool) -> None: 23 | if value: 24 | console.print(f"agent-cli {__version__}") 25 | raise typer.Exit 26 | 27 | 28 | @app.callback(invoke_without_command=True) 29 | def main( 30 | ctx: typer.Context, 31 | version: Annotated[ # noqa: ARG001 32 | bool, 33 | typer.Option( 34 | "-v", 35 | "--version", 36 | callback=_version_callback, 37 | is_eager=True, 38 | help="Show version and exit.", 39 | ), 40 | ] = False, 41 | ) -> None: 42 | """A suite of AI-powered tools.""" 43 | if ctx.invoked_subcommand is None: 44 | console.print("[bold red]No command specified.[/bold red]") 45 | console.print("[bold yellow]Running --help for your convenience.[/bold yellow]") 46 | console.print(ctx.get_help()) 47 | raise typer.Exit 48 | import dotenv # noqa: PLC0415 49 | 50 | dotenv.load_dotenv() 51 | print() 52 | 53 | 54 | def set_config_defaults(ctx: typer.Context, config_file: str | None) -> None: 55 | """Set the default values for the CLI based on the config file.""" 56 | config = load_config(config_file) 57 | wildcard_config = normalize_provider_defaults(config.get("defaults", {})) 58 | 59 | command_key = ctx.command.name or "" 60 | if not command_key: 61 | ctx.default_map = wildcard_config 62 | return 63 | 64 | # For nested subcommands (e.g., "memory proxy"), build "memory.proxy" 65 | if ctx.parent and ctx.parent.command.name and ctx.parent.command.name != "agent-cli": 66 | command_key = f"{ctx.parent.command.name}.{command_key}" 67 | 68 | command_config = normalize_provider_defaults(config.get(command_key, {})) 69 | ctx.default_map = {**wildcard_config, **command_config} 70 | 71 | 72 | # Import commands from other modules to register them 73 | from . import config_cmd # noqa: E402, F401 74 | from .agents import ( # noqa: E402, F401 75 | assistant, 76 | autocorrect, 77 | chat, 78 | memory, 79 | rag_proxy, 80 | server, 81 | speak, 82 | transcribe, 83 | voice_edit, 84 | ) 85 | from .install import hotkeys, services # noqa: E402, F401 86 | -------------------------------------------------------------------------------- /tests/agents/test_speak_e2e.py: -------------------------------------------------------------------------------- 1 | """End-to-end tests for the speak agent with simplified mocks.""" 2 | 3 | from __future__ import annotations 4 | 5 | from types import SimpleNamespace 6 | from unittest.mock import MagicMock, patch 7 | 8 | import pytest 9 | 10 | from agent_cli import config 11 | from agent_cli.agents.speak import _async_main 12 | from tests.mocks.audio import MockSoundDeviceStream 13 | from tests.mocks.wyoming import MockTTSClient 14 | 15 | 16 | @pytest.mark.asyncio 17 | @patch("agent_cli.services.tts.wyoming_client_context") 18 | @patch("agent_cli.agents.speak.setup_devices") 19 | @patch("agent_cli.services.tts.setup_output_stream") 20 | @patch("agent_cli.services.tts.open_audio_stream") 21 | async def test_speak_e2e( 22 | mock_open_audio_stream: MagicMock, 23 | mock_setup_output_stream: MagicMock, 24 | mock_setup_devices: MagicMock, 25 | mock_wyoming_client_context: MagicMock, 26 | ) -> None: 27 | """Test end-to-end speech synthesis with simplified mocks.""" 28 | # Setup mock stream 29 | mock_stream = MockSoundDeviceStream(output=True) 30 | mock_open_audio_stream.return_value.__enter__.return_value = mock_stream 31 | 32 | # Setup device info (input_index, input_name, output_index) 33 | mock_setup_devices.return_value = (None, None, 0) 34 | mock_setup_output_stream.return_value = SimpleNamespace(dtype="int16") 35 | 36 | # Setup mock Wyoming client 37 | mock_tts_client = MockTTSClient(b"fake audio data") 38 | mock_wyoming_client_context.return_value.__aenter__.return_value = mock_tts_client 39 | 40 | general_cfg = config.General( 41 | log_level="INFO", 42 | log_file=None, 43 | list_devices=False, 44 | quiet=False, 45 | clipboard=False, 46 | save_file=None, 47 | ) 48 | provider_cfg = config.ProviderSelection( 49 | tts_provider="wyoming", 50 | asr_provider="wyoming", 51 | llm_provider="ollama", 52 | ) 53 | audio_out_cfg = config.AudioOutput(enable_tts=True) 54 | wyoming_tts_cfg = config.WyomingTTS( 55 | tts_wyoming_ip="mock-host", 56 | tts_wyoming_port=10200, 57 | ) 58 | openai_tts_cfg = config.OpenAITTS(tts_openai_model="tts-1", tts_openai_voice="alloy") 59 | kokoro_tts_cfg = config.KokoroTTS( 60 | tts_kokoro_model="tts-1", 61 | tts_kokoro_voice="alloy", 62 | tts_kokoro_host="http://localhost:8000/v1", 63 | ) 64 | 65 | await _async_main( 66 | general_cfg=general_cfg, 67 | text="Hello, world!", 68 | provider_cfg=provider_cfg, 69 | audio_out_cfg=audio_out_cfg, 70 | wyoming_tts_cfg=wyoming_tts_cfg, 71 | openai_tts_cfg=openai_tts_cfg, 72 | kokoro_tts_cfg=kokoro_tts_cfg, 73 | ) 74 | 75 | # Verify that the audio was "played" 76 | mock_wyoming_client_context.assert_called_once() 77 | assert mock_stream.get_written_data() 78 | -------------------------------------------------------------------------------- /docs/installation/README.md: -------------------------------------------------------------------------------- 1 | # Installation Guide 2 | 3 | Choose the best installation method for your platform and performance needs. 4 | 5 | ## Quick Platform Guide 6 | 7 | | Platform | Recommended Method | GPU Support | Performance | 8 | | ---------------- | ------------------------------ | ------------- | ----------- | 9 | | **macOS** | [Native Setup](macos.md) | ✅ Metal GPU | Best | 10 | | **Linux** | [Native Setup](linux.md) | ✅ NVIDIA GPU | Best | 11 | | **NixOS** | [System Integration](nixos.md) | ✅ NVIDIA GPU | Best | 12 | | **Any Platform** | [Docker Setup](docker.md) | ⚠️ Limited\* | Good | 13 | 14 | > **Note**: Docker on macOS does not support GPU acceleration. For best performance on Mac, use the native setup. 15 | 16 | ## Installation Methods 17 | 18 | ### 🍎 macOS Native (Recommended) 19 | 20 | **Best performance with Metal GPU acceleration** 21 | 22 | - Full GPU acceleration for Ollama 23 | - Optimized for Apple Silicon 24 | - Native macOS integrations 25 | 26 | 👉 [Follow macOS Setup Guide](macos.md) 27 | 28 | ### 🐧 Linux Native (Recommended) 29 | 30 | **Best performance with NVIDIA GPU acceleration** 31 | 32 | - NVIDIA GPU support 33 | - Full system integration 34 | - Optimal resource usage 35 | 36 | 👉 [Follow Linux Setup Guide](linux.md) 37 | 38 | ### ❄️ NixOS System Integration 39 | 40 | **Declarative system configuration with GPU support** 41 | 42 | - System-level service integration 43 | - Declarative configuration 44 | - Automatic service management 45 | 46 | 👉 [Follow NixOS Setup Guide](nixos.md) 47 | 48 | ### 🐳 Docker (Cross-platform) 49 | 50 | **Universal solution, some limitations** 51 | 52 | - Works on any platform 53 | - Consistent environment 54 | - ⚠️ No GPU acceleration on macOS 55 | - ⚠️ Limited GPU support on other platforms 56 | 57 | 👉 [Follow Docker Setup Guide](docker.md) 58 | 59 | ## What Gets Installed 60 | 61 | All installation methods set up these services: 62 | 63 | - **🧠 Ollama** - LLM server (gemma3:4b model) 64 | - **🎤 Wyoming Faster Whisper** - Speech-to-text 65 | - **🗣️ Wyoming Piper** - Text-to-speech 66 | - **👂 Wyoming OpenWakeWord** - Wake word detection 67 | 68 | ## Service Ports 69 | 70 | All methods use the same ports: 71 | 72 | - Ollama (LLM): `11434` 73 | - Whisper (ASR): `10300` 74 | - Piper (TTS): `10200` 75 | - OpenWakeWord: `10400` 76 | 77 | ## After Installation 78 | 79 | Once services are running, install the agent-cli package: 80 | 81 | ```bash 82 | # Using uv (recommended) 83 | uv tools install agent-cli 84 | 85 | # Using pip 86 | pip install agent-cli 87 | ``` 88 | 89 | Then test with: 90 | 91 | ```bash 92 | agent-cli autocorrect --help 93 | ``` 94 | 95 | ## Need Help? 96 | 97 | - Check the troubleshooting section in your chosen installation guide 98 | - Open an issue on [GitHub](https://github.com/basnijholt/agent-cli/issues) 99 | -------------------------------------------------------------------------------- /tests/core/test_sse.py: -------------------------------------------------------------------------------- 1 | """Tests for SSE formatting helpers.""" 2 | 3 | import json 4 | 5 | from agent_cli.core.sse import ( 6 | extract_content_from_chunk, 7 | format_chunk, 8 | format_done, 9 | parse_chunk, 10 | ) 11 | 12 | 13 | def test_format_chunk_with_content() -> None: 14 | """Test formatting a chunk with content.""" 15 | result = format_chunk("test-id", "gpt-4", content="Hello") 16 | assert result.startswith("data: ") 17 | assert result.endswith("\n\n") 18 | 19 | data = json.loads(result[6:]) 20 | assert data["id"] == "chatcmpl-test-id" 21 | assert data["object"] == "chat.completion.chunk" 22 | assert data["model"] == "gpt-4" 23 | assert data["choices"][0]["delta"]["content"] == "Hello" 24 | assert data["choices"][0]["finish_reason"] is None 25 | 26 | 27 | def test_format_chunk_finish() -> None: 28 | """Test formatting a finish chunk.""" 29 | result = format_chunk("test-id", "gpt-4", finish_reason="stop") 30 | data = json.loads(result[6:]) 31 | 32 | assert data["choices"][0]["delta"] == {} 33 | assert data["choices"][0]["finish_reason"] == "stop" 34 | 35 | 36 | def test_format_chunk_with_extra() -> None: 37 | """Test formatting a chunk with extra fields.""" 38 | extra = {"rag_sources": [{"path": "test.md"}]} 39 | result = format_chunk("test-id", "gpt-4", finish_reason="stop", extra=extra) 40 | data = json.loads(result[6:]) 41 | 42 | assert data["rag_sources"] == [{"path": "test.md"}] 43 | 44 | 45 | def test_format_done() -> None: 46 | """Test formatting the done message.""" 47 | assert format_done() == "data: [DONE]\n\n" 48 | 49 | 50 | def test_parse_chunk_valid() -> None: 51 | """Test parsing a valid chunk.""" 52 | line = 'data: {"choices": [{"delta": {"content": "Hi"}}]}' 53 | result = parse_chunk(line) 54 | 55 | assert result is not None 56 | assert result["choices"][0]["delta"]["content"] == "Hi" 57 | 58 | 59 | def test_parse_chunk_done() -> None: 60 | """Test parsing the done message returns None.""" 61 | assert parse_chunk("data: [DONE]") is None 62 | 63 | 64 | def test_parse_chunk_invalid() -> None: 65 | """Test parsing invalid input returns None.""" 66 | assert parse_chunk("not a data line") is None 67 | assert parse_chunk("data: {invalid json}") is None 68 | 69 | 70 | def test_extract_content_from_chunk() -> None: 71 | """Test extracting content from a parsed chunk.""" 72 | chunk = {"choices": [{"delta": {"content": "Hello"}}]} 73 | assert extract_content_from_chunk(chunk) == "Hello" 74 | 75 | # Empty delta 76 | chunk_empty: dict[str, list[dict[str, dict[str, str]]]] = {"choices": [{"delta": {}}]} 77 | assert extract_content_from_chunk(chunk_empty) == "" 78 | 79 | # Alternative text field 80 | chunk_text = {"choices": [{"delta": {"text": "World"}}]} 81 | assert extract_content_from_chunk(chunk_text) == "World" 82 | -------------------------------------------------------------------------------- /agent_cli/memory/models.py: -------------------------------------------------------------------------------- 1 | """Memory data models.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Literal 6 | 7 | from pydantic import BaseModel, ConfigDict, field_validator 8 | 9 | 10 | class Message(BaseModel): 11 | """Chat message model.""" 12 | 13 | role: str 14 | content: str 15 | 16 | 17 | class ChatRequest(BaseModel): 18 | """Chat completion request model with long-term memory support.""" 19 | 20 | model_config = ConfigDict(extra="allow") 21 | 22 | model: str 23 | messages: list[Message] 24 | temperature: float | None = 0.7 25 | max_tokens: int | None = 1000 26 | stream: bool | None = False 27 | memory_id: str | None = None 28 | memory_top_k: int | None = None 29 | memory_recency_weight: float | None = None 30 | memory_score_threshold: float | None = None 31 | 32 | 33 | class MemoryEntry(BaseModel): 34 | """Stored memory entry.""" 35 | 36 | role: str 37 | content: str 38 | created_at: str 39 | score: float | None = None 40 | 41 | 42 | class MemoryMetadata(BaseModel): 43 | """Metadata for a stored memory document.""" 44 | 45 | conversation_id: str 46 | role: str 47 | created_at: str 48 | summary_kind: str | None = None 49 | replaced_by: str | None = None 50 | source_id: str | None = None 51 | 52 | 53 | class SummaryOutput(BaseModel): 54 | """Structured summary returned by the LLM.""" 55 | 56 | summary: str 57 | 58 | @field_validator("summary") 59 | @classmethod 60 | def _not_empty(cls, v: str) -> str: 61 | if not v or not str(v).strip(): 62 | msg = "field must be non-empty" 63 | raise ValueError(msg) 64 | return str(v).strip() 65 | 66 | 67 | class StoredMemory(BaseModel): 68 | """Memory document as stored in the vector DB.""" 69 | 70 | id: str 71 | content: str 72 | metadata: MemoryMetadata 73 | distance: float | None = None 74 | embedding: list[float] | None = None 75 | 76 | 77 | class MemoryRetrieval(BaseModel): 78 | """Result of a memory retrieval operation.""" 79 | 80 | entries: list[MemoryEntry] 81 | 82 | 83 | class MemoryAdd(BaseModel): 84 | """Add a new memory fact.""" 85 | 86 | event: Literal["ADD"] = "ADD" 87 | text: str 88 | 89 | 90 | class MemoryUpdate(BaseModel): 91 | """Update an existing memory fact.""" 92 | 93 | event: Literal["UPDATE"] = "UPDATE" 94 | id: int 95 | text: str 96 | 97 | 98 | class MemoryDelete(BaseModel): 99 | """Delete an existing memory fact.""" 100 | 101 | event: Literal["DELETE"] = "DELETE" 102 | id: int 103 | 104 | 105 | class MemoryIgnore(BaseModel): 106 | """Keep an existing memory as is.""" 107 | 108 | event: Literal["NONE"] = "NONE" 109 | id: int 110 | 111 | 112 | MemoryDecision = MemoryAdd | MemoryUpdate | MemoryDelete | MemoryIgnore 113 | -------------------------------------------------------------------------------- /scripts/setup-linux-hotkeys.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | echo "⌨️ Setting up Linux hotkeys..." 6 | 7 | # Check if we're on Linux 8 | if [[ "$(uname)" != "Linux" ]]; then 9 | echo "❌ This script is for Linux only" 10 | exit 1 11 | fi 12 | 13 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 14 | 15 | TRANSCRIBE_SCRIPT="$SCRIPT_DIR/linux-hotkeys/toggle-transcription.sh" 16 | AUTOCORRECT_SCRIPT="$SCRIPT_DIR/linux-hotkeys/toggle-autocorrect.sh" 17 | VOICE_EDIT_SCRIPT="$SCRIPT_DIR/linux-hotkeys/toggle-voice-edit.sh" 18 | 19 | # Install notifications if missing 20 | echo "📢 Checking notifications..." 21 | if ! command -v notify-send &> /dev/null && ! command -v dunstify &> /dev/null; then 22 | echo "📦 Installing notification support..." 23 | if command -v apt &> /dev/null; then 24 | sudo apt install -y libnotify-bin 25 | elif command -v dnf &> /dev/null; then 26 | sudo dnf install -y libnotify 27 | elif command -v pacman &> /dev/null; then 28 | sudo pacman -S --noconfirm libnotify 29 | elif command -v zypper &> /dev/null; then 30 | sudo zypper install -y libnotify-tools 31 | else 32 | echo "⚠️ Please install libnotify manually for your distribution" 33 | fi 34 | fi 35 | 36 | # Test notifications 37 | if command -v notify-send &> /dev/null; then 38 | notify-send "🎙️ Setup Complete" "Agent-CLI hotkeys ready!" || echo "⚠️ Notifications may not work in your environment" 39 | elif command -v dunstify &> /dev/null; then 40 | dunstify "🎙️ Setup Complete" "Agent-CLI hotkeys ready!" || echo "⚠️ Notifications may not work in your environment" 41 | fi 42 | 43 | echo "" 44 | echo "✅ Scripts ready! Add these hotkeys to your desktop environment:" 45 | echo "" 46 | echo "📋 Hotkey Bindings:" 47 | echo " Super+Shift+R → $TRANSCRIBE_SCRIPT" 48 | echo " Super+Shift+A → $AUTOCORRECT_SCRIPT" 49 | echo " Super+Shift+V → $VOICE_EDIT_SCRIPT" 50 | echo "" 51 | echo "🖥️ Configuration by Desktop Environment:" 52 | echo "" 53 | echo "Hyprland (~/.config/hypr/hyprland.conf):" 54 | echo " bind = SUPER SHIFT, R, exec, $TRANSCRIBE_SCRIPT" 55 | echo " bind = SUPER SHIFT, A, exec, $AUTOCORRECT_SCRIPT" 56 | echo " bind = SUPER SHIFT, V, exec, $VOICE_EDIT_SCRIPT" 57 | echo "" 58 | echo "Sway (~/.config/sway/config):" 59 | echo " bindsym \$mod+Shift+r exec $TRANSCRIBE_SCRIPT" 60 | echo " bindsym \$mod+Shift+a exec $AUTOCORRECT_SCRIPT" 61 | echo " bindsym \$mod+Shift+v exec $VOICE_EDIT_SCRIPT" 62 | echo "" 63 | echo "i3 (~/.config/i3/config):" 64 | echo " bindsym \$mod+Shift+r exec --no-startup-id $TRANSCRIBE_SCRIPT" 65 | echo " bindsym \$mod+Shift+a exec --no-startup-id $AUTOCORRECT_SCRIPT" 66 | echo " bindsym \$mod+Shift+v exec --no-startup-id $VOICE_EDIT_SCRIPT" 67 | echo "" 68 | echo "GNOME: Settings → Keyboard → View and Customize Shortcuts → Custom Shortcuts" 69 | echo "KDE: System Settings → Shortcuts → Custom Shortcuts" 70 | echo "XFCE: Settings Manager → Keyboard → Application Shortcuts" 71 | echo "" 72 | echo "For other environments, bind Super+Shift+R/A/V to the respective scripts." 73 | -------------------------------------------------------------------------------- /agent_cli/core/chroma.py: -------------------------------------------------------------------------------- 1 | """Shared ChromaDB helpers.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Any 6 | 7 | import chromadb 8 | from chromadb.config import Settings 9 | from chromadb.utils import embedding_functions 10 | 11 | from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL 12 | 13 | if TYPE_CHECKING: 14 | from collections.abc import Mapping, Sequence 15 | from pathlib import Path 16 | 17 | from chromadb import Collection 18 | from pydantic import BaseModel 19 | 20 | 21 | def init_collection( 22 | persistence_path: Path, 23 | *, 24 | name: str, 25 | embedding_model: str = DEFAULT_OPENAI_EMBEDDING_MODEL, 26 | openai_base_url: str | None = None, 27 | openai_api_key: str | None = None, 28 | subdir: str | None = None, 29 | ) -> Collection: 30 | """Initialize a Chroma collection with OpenAI-compatible embeddings.""" 31 | target_path = persistence_path / subdir if subdir else persistence_path 32 | target_path.mkdir(parents=True, exist_ok=True) 33 | client = chromadb.PersistentClient( 34 | path=str(target_path), 35 | settings=Settings(anonymized_telemetry=False), 36 | ) 37 | embed_fn = embedding_functions.OpenAIEmbeddingFunction( 38 | api_base=openai_base_url, 39 | api_key=openai_api_key or "dummy", 40 | model_name=embedding_model, 41 | ) 42 | return client.get_or_create_collection(name=name, embedding_function=embed_fn) 43 | 44 | 45 | def flatten_metadatas(metadatas: Sequence[BaseModel]) -> list[dict[str, Any]]: 46 | """Serialize metadata models to JSON-safe dicts while preserving lists.""" 47 | return [meta.model_dump(mode="json", exclude_none=True) for meta in metadatas] 48 | 49 | 50 | def upsert( 51 | collection: Collection, 52 | *, 53 | ids: list[str], 54 | documents: list[str], 55 | metadatas: Sequence[BaseModel], 56 | batch_size: int = 10, 57 | ) -> None: 58 | """Upsert documents with JSON-serialized metadata. 59 | 60 | Args: 61 | collection: ChromaDB collection. 62 | ids: Document IDs. 63 | documents: Document contents. 64 | metadatas: Pydantic metadata models. 65 | batch_size: Max documents per embedding API call (default: 10). 66 | 67 | """ 68 | if not ids: 69 | return 70 | serialized = flatten_metadatas(metadatas) 71 | 72 | # Process in batches to avoid overwhelming the embedding service 73 | for i in range(0, len(ids), batch_size): 74 | batch_ids = ids[i : i + batch_size] 75 | batch_docs = documents[i : i + batch_size] 76 | batch_metas = serialized[i : i + batch_size] 77 | collection.upsert(ids=batch_ids, documents=batch_docs, metadatas=batch_metas) 78 | 79 | 80 | def delete(collection: Collection, ids: list[str]) -> None: 81 | """Delete documents by ID.""" 82 | if ids: 83 | collection.delete(ids=ids) 84 | 85 | 86 | def delete_where(collection: Collection, where: Mapping[str, Any]) -> None: 87 | """Delete documents by a filter.""" 88 | collection.delete(where=where) 89 | -------------------------------------------------------------------------------- /tests/memory/test_utils.py: -------------------------------------------------------------------------------- 1 | """Unit tests for memory utilities that avoid network calls.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Any 6 | from unittest.mock import patch 7 | 8 | from agent_cli.memory._persistence import evict_if_needed 9 | from agent_cli.memory._store import list_conversation_entries 10 | 11 | if TYPE_CHECKING: 12 | from pathlib import Path 13 | 14 | 15 | class FakeCollection: 16 | """Minimal Chroma-like collection for unit tests.""" 17 | 18 | def __init__(self) -> None: 19 | """Initialize in-memory store.""" 20 | self.docs: list[dict[str, Any]] = [] 21 | 22 | def upsert(self, ids: list[str], documents: list[str], metadatas: list[dict[str, Any]]) -> None: 23 | """Mimic Chroma upsert.""" 24 | for entry_id, doc, meta in zip(ids, documents, metadatas, strict=False): 25 | self.docs.append({"id": entry_id, "document": doc, "metadata": meta}) 26 | 27 | def get(self, where: dict[str, Any], include: list[str] | None = None) -> dict[str, Any]: 28 | """Mimic filtered get.""" 29 | _ = include 30 | 31 | def matches(entry: dict[str, Any]) -> bool: 32 | meta = entry["metadata"] 33 | 34 | def match_clause(clause: dict[str, Any]) -> bool: 35 | for key, value in clause.items(): 36 | if isinstance(value, dict) and "$ne" in value: 37 | if meta.get(key) == value["$ne"]: 38 | return False 39 | elif meta.get(key) != value: 40 | return False 41 | return True 42 | 43 | # Support simple dict or {"$and": [ ... ]} 44 | if "$and" in where: 45 | return all(match_clause(cl) for cl in where["$and"]) 46 | return match_clause(where) 47 | 48 | filtered = [entry for entry in self.docs if matches(entry)] 49 | return { 50 | "documents": [e["document"] for e in filtered], 51 | "metadatas": [e["metadata"] for e in filtered], 52 | "ids": [e["id"] for e in filtered], 53 | } 54 | 55 | def delete(self, ids: list[str]) -> None: 56 | """Mimic delete by IDs.""" 57 | self.docs = [entry for entry in self.docs if entry["id"] not in ids] 58 | 59 | 60 | def test_evict_if_needed_removes_oldest(tmp_path: Path) -> None: 61 | collection = FakeCollection() 62 | base_meta = {"conversation_id": "c1", "role": "memory"} 63 | collection.upsert( 64 | ids=["old", "mid", "new"], 65 | documents=["old doc", "mid doc", "new doc"], 66 | metadatas=[ 67 | {**base_meta, "created_at": "2024-01-01T00:00:00Z"}, 68 | {**base_meta, "created_at": "2024-06-01T00:00:00Z"}, 69 | {**base_meta, "created_at": "2024-12-01T00:00:00Z"}, 70 | ], 71 | ) 72 | 73 | with patch("agent_cli.memory._ingest.delete_memory_files"): 74 | evict_if_needed(collection, tmp_path, "c1", max_entries=2) 75 | 76 | remaining = list_conversation_entries(collection, "c1") 77 | remaining_ids = {e.id for e in remaining} 78 | assert remaining_ids == {"mid", "new"} 79 | -------------------------------------------------------------------------------- /tests/mocks/wyoming.py: -------------------------------------------------------------------------------- 1 | """Mock Wyoming servers and clients for testing.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Self 6 | 7 | from wyoming.asr import Transcript 8 | from wyoming.audio import AudioChunk, AudioStart, AudioStop 9 | 10 | if TYPE_CHECKING: 11 | from collections.abc import AsyncGenerator 12 | 13 | from wyoming.event import Event 14 | 15 | 16 | class MockWyomingClient: 17 | """Base class for mock Wyoming clients.""" 18 | 19 | def __init__(self) -> None: 20 | """Initialize mock client.""" 21 | self.events_written: list[Event] = [] 22 | self.is_active = True 23 | 24 | async def write_event(self, event: Event) -> None: 25 | """Mock writing an event.""" 26 | if self.is_active: 27 | self.events_written.append(event) 28 | 29 | async def read_event(self) -> Event | None: 30 | """Mock reading an event.""" 31 | raise NotImplementedError 32 | 33 | async def __aenter__(self) -> Self: 34 | """Async context manager entry.""" 35 | return self 36 | 37 | async def __aexit__(self, *args: object) -> None: 38 | """Async context manager exit.""" 39 | self.is_active = False 40 | 41 | 42 | class MockASRClient(MockWyomingClient): 43 | """Mock Wyoming ASR client for testing transcription.""" 44 | 45 | def __init__(self, transcript_text: str) -> None: 46 | """Initialize mock ASR client.""" 47 | super().__init__() 48 | self.transcript_text = transcript_text 49 | self._event_generator = self._generate_events() 50 | 51 | async def read_event(self) -> Event | None: 52 | """Mock reading events from the server.""" 53 | try: 54 | return await self._event_generator.__anext__() 55 | except StopAsyncIteration: 56 | return None 57 | 58 | async def _generate_events(self) -> AsyncGenerator[Event, None]: 59 | """Generate transcript events.""" 60 | yield Transcript(text=self.transcript_text).event() 61 | 62 | 63 | class MockTTSClient(MockWyomingClient): 64 | """Mock Wyoming TTS client for testing speech synthesis.""" 65 | 66 | def __init__(self, audio_data: bytes) -> None: 67 | """Initialize mock TTS client.""" 68 | super().__init__() 69 | self.audio_data = audio_data 70 | self._event_generator = self._generate_events() 71 | 72 | async def read_event(self) -> Event | None: 73 | """Mock reading events from the server.""" 74 | try: 75 | return await self._event_generator.__anext__() 76 | except StopAsyncIteration: 77 | return None 78 | 79 | async def _generate_events(self) -> AsyncGenerator[Event, None]: 80 | """Generate audio synthesis events.""" 81 | yield AudioStart(rate=22050, width=2, channels=1).event() 82 | yield AudioChunk( 83 | rate=22050, 84 | width=2, 85 | channels=1, 86 | audio=self.audio_data, 87 | ).event() 88 | yield AudioStop().event() 89 | 90 | async def connect(self) -> None: 91 | """Mock connect.""" 92 | 93 | async def disconnect(self) -> None: 94 | """Mock disconnect.""" 95 | -------------------------------------------------------------------------------- /agent_cli/install/services.py: -------------------------------------------------------------------------------- 1 | """Service installation and management commands.""" 2 | 3 | from __future__ import annotations 4 | 5 | import os 6 | import subprocess 7 | 8 | import typer 9 | 10 | from agent_cli.cli import app 11 | from agent_cli.core.utils import console, print_error_message, print_with_style 12 | from agent_cli.install.common import ( 13 | execute_installation_script, 14 | get_platform_script, 15 | get_script_path, 16 | ) 17 | 18 | 19 | @app.command("install-services", rich_help_panel="Installation") 20 | def install_services() -> None: 21 | """Install all required services (Ollama, Whisper, Piper, OpenWakeWord). 22 | 23 | This command installs: 24 | - Ollama (local LLM server) 25 | - Wyoming Faster Whisper (speech-to-text) 26 | - Wyoming Piper (text-to-speech) 27 | - Wyoming OpenWakeWord (wake word detection) 28 | 29 | The appropriate installation method is used based on your operating system. 30 | """ 31 | script_name = get_platform_script("setup-macos.sh", "setup-linux.sh") 32 | 33 | execute_installation_script( 34 | script_name=script_name, 35 | operation_name="Install services", 36 | success_message="Services installed successfully!", 37 | next_steps=[ 38 | "Start services: agent-cli start-services", 39 | "Set up hotkeys: agent-cli install-hotkeys", 40 | ], 41 | ) 42 | 43 | 44 | @app.command("start-services", rich_help_panel="Service Management") 45 | def start_services( 46 | attach: bool = typer.Option( 47 | True, # noqa: FBT003 48 | "--attach/--no-attach", 49 | help="Attach to Zellij session after starting", 50 | ), 51 | ) -> None: 52 | """Start all agent-cli services in a Zellij session. 53 | 54 | This starts: 55 | - Ollama (LLM server) 56 | - Wyoming Faster Whisper (speech-to-text) 57 | - Wyoming Piper (text-to-speech) 58 | - Wyoming OpenWakeWord (wake word detection) 59 | 60 | Services run in a Zellij terminal multiplexer session named 'agent-cli'. 61 | Use Ctrl-Q to quit or Ctrl-O d to detach from the session. 62 | """ 63 | try: 64 | script_path = get_script_path("start-all-services.sh") 65 | except FileNotFoundError as e: 66 | print_error_message("Service scripts not found") 67 | console.print(str(e)) 68 | raise typer.Exit(1) from None 69 | 70 | env = os.environ.copy() 71 | if not attach: 72 | env["AGENT_CLI_NO_ATTACH"] = "true" 73 | 74 | try: 75 | subprocess.run([str(script_path)], check=True, env=env) 76 | if not attach: 77 | print_with_style("✅ Services started in background.", "green") 78 | print_with_style("Run 'zellij attach agent-cli' to view the session.", "yellow") 79 | else: 80 | # If we get here with attach=True, user likely detached 81 | print_with_style("\n👋 Detached from Zellij session.") 82 | print_with_style( 83 | "Services are still running. Use 'zellij attach agent-cli' to reattach.", 84 | ) 85 | except subprocess.CalledProcessError as e: 86 | print_error_message(f"Failed to start services. Exit code: {e.returncode}") 87 | raise typer.Exit(e.returncode) from None 88 | -------------------------------------------------------------------------------- /tests/agents/test_transcribe_agent.py: -------------------------------------------------------------------------------- 1 | """Tests for the transcribe agent.""" 2 | 3 | from __future__ import annotations 4 | 5 | from unittest.mock import AsyncMock, MagicMock, patch 6 | 7 | from typer.testing import CliRunner 8 | 9 | from agent_cli.cli import app 10 | 11 | runner = CliRunner(env={"NO_COLOR": "1", "TERM": "dumb"}) 12 | 13 | 14 | @patch("agent_cli.agents.transcribe.asr.create_transcriber") 15 | @patch("agent_cli.agents.transcribe.process.pid_file_context") 16 | @patch("agent_cli.agents.transcribe.setup_devices") 17 | def test_transcribe_agent( 18 | mock_setup_devices: MagicMock, 19 | mock_pid_context: MagicMock, 20 | mock_create_transcriber: MagicMock, 21 | ) -> None: 22 | """Test the transcribe agent.""" 23 | mock_transcriber = AsyncMock(return_value="hello") 24 | mock_create_transcriber.return_value = mock_transcriber 25 | mock_setup_devices.return_value = (0, "mock_device", None) 26 | with patch("agent_cli.agents.transcribe.pyperclip.copy") as mock_copy: 27 | result = runner.invoke( 28 | app, 29 | [ 30 | "transcribe", 31 | "--asr-provider", 32 | "wyoming", 33 | "--openai-api-key", 34 | "test", 35 | ], 36 | ) 37 | assert result.exit_code == 0, result.output 38 | mock_pid_context.assert_called_once() 39 | mock_create_transcriber.assert_called_once() 40 | mock_transcriber.assert_called_once() 41 | mock_copy.assert_called_once_with("hello") 42 | 43 | 44 | @patch("agent_cli.agents.transcribe.process.kill_process") 45 | def test_transcribe_stop(mock_kill_process: MagicMock) -> None: 46 | """Test the --stop flag.""" 47 | mock_kill_process.return_value = True 48 | result = runner.invoke(app, ["transcribe", "--stop"]) 49 | assert result.exit_code == 0 50 | assert "Transcribe stopped" in result.stdout 51 | mock_kill_process.assert_called_once_with("transcribe") 52 | 53 | 54 | @patch("agent_cli.agents.transcribe.process.kill_process") 55 | def test_transcribe_stop_not_running(mock_kill_process: MagicMock) -> None: 56 | """Test the --stop flag when the process is not running.""" 57 | mock_kill_process.return_value = False 58 | result = runner.invoke(app, ["transcribe", "--stop"]) 59 | assert result.exit_code == 0 60 | assert "No transcribe is running" in result.stdout 61 | 62 | 63 | @patch("agent_cli.agents.transcribe.process.is_process_running") 64 | def test_transcribe_status_running(mock_is_process_running: MagicMock) -> None: 65 | """Test the --status flag when the process is running.""" 66 | mock_is_process_running.return_value = True 67 | with patch("agent_cli.agents.transcribe.process.read_pid_file", return_value=123): 68 | result = runner.invoke(app, ["transcribe", "--status"]) 69 | assert result.exit_code == 0 70 | assert "Transcribe is running" in result.stdout 71 | 72 | 73 | @patch("agent_cli.agents.transcribe.process.is_process_running") 74 | def test_transcribe_status_not_running(mock_is_process_running: MagicMock) -> None: 75 | """Test the --status flag when the process is not running.""" 76 | mock_is_process_running.return_value = False 77 | result = runner.invoke(app, ["transcribe", "--status"]) 78 | assert result.exit_code == 0 79 | assert "Transcribe is not running" in result.stdout 80 | -------------------------------------------------------------------------------- /agent_cli/install/common.py: -------------------------------------------------------------------------------- 1 | """Common utilities for installation commands.""" 2 | 3 | from __future__ import annotations 4 | 5 | import platform 6 | import subprocess 7 | from pathlib import Path 8 | from typing import TYPE_CHECKING 9 | 10 | import typer 11 | 12 | from agent_cli.core.utils import print_error_message, print_with_style 13 | 14 | if TYPE_CHECKING: 15 | from subprocess import CompletedProcess 16 | 17 | 18 | def _script_directory() -> Path: 19 | """Get the directory containing all scripts.""" 20 | # First check if we're running from source (development) 21 | source_scripts = Path(__file__).parent.parent.parent / "scripts" 22 | if source_scripts.exists(): 23 | return source_scripts 24 | # Check for scripts bundled with the package 25 | package_scripts = Path(__file__).parent.parent / "scripts" 26 | if package_scripts.exists(): 27 | return package_scripts 28 | msg = "Should never happen: no scripts directory found, please report an issue." 29 | raise RuntimeError(msg) 30 | 31 | 32 | def get_script_path(script_name: str) -> Path: 33 | """Get the path to a script in the scripts directory.""" 34 | script_dir = _script_directory() 35 | return script_dir / script_name 36 | 37 | 38 | def _run_script(script_path: Path) -> CompletedProcess[bytes]: 39 | """Run a shell script, streaming its output directly to the terminal.""" 40 | if not script_path.exists(): 41 | msg = f"Script not found: {script_path}" 42 | raise FileNotFoundError(msg) 43 | 44 | # Run the script through the shell, which handles execution permissions 45 | # This avoids modifying file permissions in the package directory 46 | return subprocess.run( 47 | ["bash", str(script_path)], # noqa: S607 48 | check=True, 49 | cwd=script_path.parent, 50 | ) 51 | 52 | 53 | def get_platform_script(macos_script: str, linux_script: str) -> str: 54 | """Get the appropriate script name based on the platform.""" 55 | system = platform.system().lower() 56 | 57 | if system == "darwin": 58 | return macos_script 59 | if system == "linux": 60 | return linux_script 61 | print_error_message(f"Unsupported operating system: {system}") 62 | raise typer.Exit(1) from None 63 | 64 | 65 | def execute_installation_script( 66 | script_name: str, 67 | operation_name: str, 68 | success_message: str, 69 | next_steps: list[str] | None = None, 70 | ) -> None: 71 | """Execute an installation script with standard error handling.""" 72 | script_path = get_script_path(script_name) 73 | print_with_style(f"🚀 Running {script_name} to {operation_name.lower()}...", "green") 74 | 75 | try: 76 | _run_script(script_path) 77 | print_with_style(f"✅ {success_message}", "green") 78 | if next_steps: 79 | print_with_style("\nNext steps:", "yellow") 80 | for i, step in enumerate(next_steps, 1): 81 | print_with_style(f" {i}. {step}", "cyan") 82 | except FileNotFoundError as e: 83 | # This case is for when the script file itself is not found 84 | print_error_message(f"{operation_name} failed: {e}") 85 | raise typer.Exit(1) from None 86 | except subprocess.CalledProcessError as e: 87 | # This case handles non-zero exit codes from the script 88 | print_error_message(f"{operation_name} failed with exit code {e.returncode}") 89 | raise typer.Exit(e.returncode) from None 90 | -------------------------------------------------------------------------------- /tests/agents/test_voice_edit.py: -------------------------------------------------------------------------------- 1 | """Tests for the voice assistant agent.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from unittest.mock import MagicMock, patch 7 | 8 | from typer.testing import CliRunner 9 | 10 | from agent_cli.cli import app 11 | 12 | runner = CliRunner(env={"NO_COLOR": "1", "TERM": "dumb"}) 13 | 14 | 15 | @patch("agent_cli.agents.voice_edit._async_main", return_value=None) 16 | @patch("agent_cli.agents.voice_edit.asyncio.run") 17 | @patch("agent_cli.agents.voice_edit.process.pid_file_context") 18 | def test_voice_edit_agent( 19 | mock_pid_ctx: MagicMock, 20 | mock_run: MagicMock, 21 | mock_async_main: MagicMock, 22 | ) -> None: 23 | """Test the voice assistant agent.""" 24 | mock_pid_ctx.return_value.__enter__.return_value = None 25 | with runner.isolated_filesystem(): 26 | # Provide a real config file to satisfy CLI preflight. 27 | Path("config.toml").write_text("", encoding="utf-8") 28 | result = runner.invoke( 29 | app, 30 | [ 31 | "voice-edit", 32 | "--config", 33 | "config.toml", 34 | "--llm-provider", 35 | "ollama", 36 | "--asr-provider", 37 | "wyoming", 38 | "--tts-provider", 39 | "wyoming", 40 | "--openai-api-key", 41 | "test", 42 | ], 43 | ) 44 | assert result.exit_code == 0, result.output 45 | mock_run.assert_called_once() 46 | mock_async_main.assert_called_once() 47 | 48 | 49 | @patch("agent_cli.agents.voice_edit.process.kill_process") 50 | def test_voice_edit_stop(mock_kill_process: MagicMock) -> None: 51 | """Test the --stop flag.""" 52 | mock_kill_process.return_value = True 53 | result = runner.invoke(app, ["voice-edit", "--stop"]) 54 | assert result.exit_code == 0 55 | assert "Voice assistant stopped" in result.stdout 56 | mock_kill_process.assert_called_once_with("voice-edit") 57 | 58 | 59 | @patch("agent_cli.agents.voice_edit.process.kill_process") 60 | def test_voice_edit_stop_not_running(mock_kill_process: MagicMock) -> None: 61 | """Test the --stop flag when the process is not running.""" 62 | mock_kill_process.return_value = False 63 | result = runner.invoke(app, ["voice-edit", "--stop"]) 64 | assert result.exit_code == 0 65 | assert "No voice assistant is running" in result.stdout 66 | 67 | 68 | @patch("agent_cli.agents.voice_edit.process.is_process_running") 69 | def test_voice_edit_status_running(mock_is_process_running: MagicMock) -> None: 70 | """Test the --status flag when the process is running.""" 71 | mock_is_process_running.return_value = True 72 | with patch( 73 | "agent_cli.agents.voice_edit.process.read_pid_file", 74 | return_value=123, 75 | ): 76 | result = runner.invoke(app, ["voice-edit", "--status"]) 77 | assert result.exit_code == 0 78 | assert "Voice assistant is running" in result.stdout 79 | 80 | 81 | @patch("agent_cli.agents.voice_edit.process.is_process_running") 82 | def test_voice_edit_status_not_running(mock_is_process_running: MagicMock) -> None: 83 | """Test the --status flag when the process is not running.""" 84 | mock_is_process_running.return_value = False 85 | result = runner.invoke(app, ["voice-edit", "--status"]) 86 | assert result.exit_code == 0 87 | assert "Voice assistant is not running" in result.stdout 88 | -------------------------------------------------------------------------------- /agent_cli/services/__init__.py: -------------------------------------------------------------------------------- 1 | """Module for interacting with online services like OpenAI.""" 2 | 3 | from __future__ import annotations 4 | 5 | import io 6 | from typing import TYPE_CHECKING 7 | 8 | if TYPE_CHECKING: 9 | import logging 10 | 11 | from openai import AsyncOpenAI 12 | 13 | from agent_cli import config 14 | 15 | 16 | def _get_openai_client(api_key: str | None, base_url: str | None = None) -> AsyncOpenAI: 17 | """Get an OpenAI client instance. 18 | 19 | For custom endpoints (base_url is set), API key is optional and a dummy value 20 | is used if not provided, since custom endpoints may not require authentication. 21 | """ 22 | from openai import AsyncOpenAI # noqa: PLC0415 23 | 24 | # Use dummy API key for custom endpoints if none provided 25 | effective_api_key = api_key or "dummy-api-key" 26 | return AsyncOpenAI(api_key=effective_api_key, base_url=base_url) 27 | 28 | 29 | async def transcribe_audio_openai( 30 | audio_data: bytes, 31 | openai_asr_cfg: config.OpenAIASR, 32 | logger: logging.Logger, 33 | **_kwargs: object, # Accept extra kwargs for consistency with Wyoming 34 | ) -> str: 35 | """Transcribe audio using OpenAI's Whisper API or a compatible endpoint. 36 | 37 | When openai_base_url is set, uses the custom endpoint instead of the official OpenAI API. 38 | This allows using self-hosted Whisper models or other compatible services. 39 | """ 40 | if openai_asr_cfg.openai_base_url: 41 | logger.info( 42 | "Transcribing audio with custom OpenAI-compatible endpoint: %s", 43 | openai_asr_cfg.openai_base_url, 44 | ) 45 | else: 46 | logger.info("Transcribing audio with OpenAI Whisper...") 47 | if not openai_asr_cfg.openai_api_key: 48 | msg = "OpenAI API key is not set." 49 | raise ValueError(msg) 50 | 51 | client = _get_openai_client( 52 | api_key=openai_asr_cfg.openai_api_key, 53 | base_url=openai_asr_cfg.openai_base_url, 54 | ) 55 | audio_file = io.BytesIO(audio_data) 56 | audio_file.name = "audio.wav" 57 | 58 | transcription_params = {"model": openai_asr_cfg.asr_openai_model, "file": audio_file} 59 | if openai_asr_cfg.asr_openai_prompt: 60 | transcription_params["prompt"] = openai_asr_cfg.asr_openai_prompt 61 | 62 | response = await client.audio.transcriptions.create(**transcription_params) 63 | return response.text 64 | 65 | 66 | async def synthesize_speech_openai( 67 | text: str, 68 | openai_tts_cfg: config.OpenAITTS, 69 | logger: logging.Logger, 70 | ) -> bytes: 71 | """Synthesize speech using OpenAI's TTS API or a compatible endpoint.""" 72 | if openai_tts_cfg.tts_openai_base_url: 73 | logger.info( 74 | "Synthesizing speech with custom OpenAI-compatible endpoint: %s", 75 | openai_tts_cfg.tts_openai_base_url, 76 | ) 77 | else: 78 | logger.info("Synthesizing speech with OpenAI TTS...") 79 | if not openai_tts_cfg.openai_api_key: 80 | msg = "OpenAI API key is not set." 81 | raise ValueError(msg) 82 | 83 | client = _get_openai_client( 84 | api_key=openai_tts_cfg.openai_api_key, 85 | base_url=openai_tts_cfg.tts_openai_base_url, 86 | ) 87 | response = await client.audio.speech.create( 88 | model=openai_tts_cfg.tts_openai_model, 89 | voice=openai_tts_cfg.tts_openai_voice, 90 | input=text, 91 | response_format="wav", 92 | ) 93 | return response.content 94 | -------------------------------------------------------------------------------- /tests/core/test_audio_format.py: -------------------------------------------------------------------------------- 1 | """Tests for audio format conversion utilities.""" 2 | 3 | from __future__ import annotations 4 | 5 | import shutil 6 | import struct 7 | from unittest.mock import MagicMock, patch 8 | 9 | import pytest 10 | 11 | from agent_cli.core import audio_format 12 | 13 | 14 | @pytest.fixture 15 | def sample_wav_data() -> bytes: 16 | """Create a minimal valid WAV file header + silence. 17 | 18 | Simplified minimal 44 bytes header for 44.1kHz mono 16-bit. 19 | """ 20 | sample_rate = 44100 21 | channels = 1 22 | duration_s = 0.1 23 | num_samples = int(sample_rate * duration_s) 24 | data_size = num_samples * channels * 2 # 2 bytes per sample (16 bit) 25 | 26 | # RIFF header 27 | header = b"RIFF" + struct.pack(" None: 45 | """Test that RuntimeError is raised if ffmpeg is missing.""" 46 | with ( 47 | patch("shutil.which", return_value=None), 48 | pytest.raises(RuntimeError, match="FFmpeg not found"), 49 | ): 50 | audio_format.convert_audio_to_wyoming_format(b"data", "test.wav") 51 | 52 | 53 | def test_convert_audio_arguments() -> None: 54 | """Regression test: Ensure subprocess.run is called with text=False.""" 55 | with ( 56 | patch("shutil.which", return_value="/usr/bin/ffmpeg"), 57 | patch("subprocess.run") as mock_run, 58 | patch("pathlib.Path.read_bytes", return_value=b"converted_data"), 59 | patch("pathlib.Path.unlink"), # Prevent file deletion issues on Windows 60 | ): 61 | # Setup mock to simulate success 62 | mock_run.return_value = MagicMock(returncode=0, stdout=b"", stderr=b"") 63 | 64 | audio_format.convert_audio_to_wyoming_format(b"input_data", "test.mp3") 65 | 66 | # Check that subprocess.run was called 67 | assert mock_run.called 68 | args, kwargs = mock_run.call_args 69 | 70 | # CRITICAL: Verify text=False (or not present/True) was passed 71 | # The fix explicitly sets text=False 72 | assert kwargs.get("text") is False, "subprocess.run must be called with text=False" 73 | 74 | # Verify command structure 75 | cmd = args[0] 76 | assert cmd[0] == "ffmpeg" 77 | assert "-f" in cmd 78 | assert "s16le" in cmd 79 | 80 | 81 | def test_convert_audio_integration(sample_wav_data: bytes) -> None: 82 | """Integration test using actual ffmpeg if available.""" 83 | if not shutil.which("ffmpeg"): 84 | pytest.skip("ffmpeg not found") 85 | 86 | try: 87 | converted = audio_format.convert_audio_to_wyoming_format( 88 | sample_wav_data, 89 | "test.wav", 90 | ) 91 | 92 | # Wyoming format is 16kHz, 16-bit, mono (2 bytes per sample) 93 | # Check if output seems reasonable (non-empty) 94 | assert len(converted) > 0 95 | 96 | # Ideally check if it's multiple of 2 (16-bit) 97 | assert len(converted) % 2 == 0 98 | 99 | except RuntimeError as e: 100 | pytest.fail(f"Conversion failed: {e}") 101 | -------------------------------------------------------------------------------- /docs/installation/docker.md: -------------------------------------------------------------------------------- 1 | # Docker Installation 2 | 3 | Universal Docker setup that works on any platform with Docker support. 4 | 5 | > **⚠️ Important Limitations** 6 | > 7 | > - **macOS**: Docker does not support GPU acceleration. For 10x better performance, use [macOS native setup](macos.md) 8 | > - **Linux**: Limited GPU support. For full NVIDIA GPU acceleration, use [Linux native setup](linux.md) 9 | > - **Ollama on macOS**: Can be memory-intensive without GPU acceleration 10 | 11 | ## Prerequisites 12 | 13 | - Docker and Docker Compose installed 14 | - At least 8GB RAM available for Docker 15 | - 10GB free disk space 16 | 17 | ## Quick Start 18 | 19 | 1. **Start the services:** 20 | 21 | ```bash 22 | docker compose -f docker/docker-compose.yml up --build 23 | ``` 24 | 25 | 2. **Check if services are running:** 26 | 27 | ```bash 28 | docker compose -f docker/docker-compose.yml logs 29 | ``` 30 | 31 | You should see logs from all services, with Ollama downloading the `gemma3:4b` model. 32 | 33 | 3. **Install agent-cli:** 34 | 35 | ```bash 36 | uv tools install agent-cli 37 | # or: pip install agent-cli 38 | ``` 39 | 40 | 4. **Test the setup:** 41 | ```bash 42 | agent-cli autocorrect "this has an eror" 43 | ``` 44 | 45 | ## Services Overview 46 | 47 | The Docker setup provides: 48 | 49 | | Service | Image | Port | Purpose | 50 | | ---------------- | ---------------------------- | ----- | -------------------------- | 51 | | **ollama** | Custom build | 11434 | LLM server with gemma3:4b | 52 | | **whisper** | rhasspy/wyoming-whisper | 10300 | Speech-to-text (large-v3) | 53 | | **piper** | rhasspy/wyoming-piper | 10200 | Text-to-speech (ryan-high) | 54 | | **openwakeword** | rhasspy/wyoming-openwakeword | 10400 | Wake word detection | 55 | 56 | ## Configuration Files 57 | 58 | The Docker setup uses: 59 | 60 | - `docker/docker-compose.yml` - Service orchestration 61 | - `docker/Dockerfile` - Custom Ollama container 62 | - Data volumes for model persistence 63 | 64 | ## Important Limitations 65 | 66 | - **macOS**: No GPU acceleration (10x slower than native) 67 | - **Linux**: Limited GPU support 68 | - **Memory**: Requires 8GB+ RAM for smooth operation 69 | 70 | ## Managing Services 71 | 72 | ```bash 73 | # Start services in background 74 | docker compose -f docker/docker-compose.yml up -d 75 | 76 | # Stop services 77 | docker compose -f docker/docker-compose.yml down 78 | 79 | # View logs 80 | docker compose -f docker/docker-compose.yml logs -f 81 | 82 | # Restart a specific service 83 | docker compose -f docker/docker-compose.yml restart ollama 84 | ``` 85 | 86 | ## Data Persistence 87 | 88 | Services store data in local directories: 89 | 90 | - `./ollama/` - Ollama models and config 91 | - `./whisper-data/` - Whisper models 92 | - `./piper-data/` - Piper voice models 93 | - `./openwakeword-data/` - Wake word models 94 | 95 | ## Troubleshooting 96 | 97 | ### Common Issues 98 | 99 | - **Slow performance**: Use native setup for better performance 100 | - **Memory issues**: Increase Docker memory allocation to 8GB+ 101 | - **Port conflicts**: Change port mappings in `docker/docker-compose.yml` 102 | 103 | ## Alternative: Native Installation 104 | 105 | For better performance, consider platform-specific native installation: 106 | 107 | - [macOS Native Setup](macos.md) - Metal GPU acceleration 108 | - [Linux Native Setup](linux.md) - NVIDIA GPU acceleration 109 | -------------------------------------------------------------------------------- /tests/agents/test_transcribe_e2e.py: -------------------------------------------------------------------------------- 1 | """End-to-end tests for the transcribe agent with minimal mocking.""" 2 | 3 | from __future__ import annotations 4 | 5 | import asyncio 6 | from typing import TYPE_CHECKING 7 | from unittest.mock import MagicMock, patch 8 | 9 | import pytest 10 | 11 | from agent_cli import config 12 | from agent_cli.agents.transcribe import _async_main 13 | from tests.mocks.audio import MockSoundDeviceStream 14 | from tests.mocks.wyoming import MockASRClient 15 | 16 | if TYPE_CHECKING: 17 | from rich.console import Console 18 | 19 | 20 | @pytest.mark.asyncio 21 | @patch("agent_cli.agents.transcribe.signal_handling_context") 22 | @patch("agent_cli.services.asr.wyoming_client_context") 23 | @patch("agent_cli.services.asr.open_audio_stream") 24 | @patch("agent_cli.services.asr.setup_input_stream") 25 | async def test_transcribe_e2e( 26 | mock_setup_input_stream: MagicMock, 27 | mock_open_audio_stream: MagicMock, 28 | mock_wyoming_client_context: MagicMock, 29 | mock_signal_handling_context: MagicMock, 30 | mock_console: Console, 31 | ) -> None: 32 | """Test end-to-end transcription with simplified mocks.""" 33 | # Setup mock stream 34 | mock_stream = MockSoundDeviceStream(input=True) 35 | mock_open_audio_stream.return_value.__enter__.return_value = mock_stream 36 | mock_setup_input_stream.return_value = {"dtype": "int16"} 37 | 38 | # Setup mock Wyoming client 39 | transcript_text = "This is a test transcription." 40 | mock_asr_client = MockASRClient(transcript_text) 41 | mock_wyoming_client_context.return_value.__aenter__.return_value = mock_asr_client 42 | 43 | # Setup stop event 44 | stop_event = asyncio.Event() 45 | mock_signal_handling_context.return_value.__enter__.return_value = stop_event 46 | asyncio.get_event_loop().call_later(0.1, stop_event.set) 47 | 48 | provider_cfg = config.ProviderSelection( 49 | asr_provider="wyoming", 50 | llm_provider="ollama", 51 | tts_provider="wyoming", 52 | ) 53 | general_cfg = config.General( 54 | log_level="INFO", 55 | log_file=None, 56 | quiet=False, 57 | list_devices=False, 58 | clipboard=False, 59 | ) 60 | audio_in_cfg = config.AudioInput(input_device_index=0) 61 | wyoming_asr_cfg = config.WyomingASR(asr_wyoming_ip="mock-host", asr_wyoming_port=10300) 62 | openai_asr_cfg = config.OpenAIASR(asr_openai_model="whisper-1") 63 | ollama_cfg = config.Ollama(llm_ollama_model="", llm_ollama_host="") 64 | openai_llm_cfg = config.OpenAILLM(llm_openai_model="", openai_base_url=None) 65 | gemini_llm_cfg = config.GeminiLLM( 66 | llm_gemini_model="gemini-1.5-flash", 67 | gemini_api_key="test-key", 68 | ) 69 | 70 | with patch("agent_cli.core.utils.console", mock_console): 71 | await _async_main( 72 | extra_instructions=None, 73 | provider_cfg=provider_cfg, 74 | general_cfg=general_cfg, 75 | audio_in_cfg=audio_in_cfg, 76 | wyoming_asr_cfg=wyoming_asr_cfg, 77 | openai_asr_cfg=openai_asr_cfg, 78 | ollama_cfg=ollama_cfg, 79 | openai_llm_cfg=openai_llm_cfg, 80 | gemini_llm_cfg=gemini_llm_cfg, 81 | llm_enabled=False, 82 | transcription_log=None, 83 | save_recording=False, 84 | ) 85 | 86 | # Assert that the final transcript is in the console output 87 | output = mock_console.file.getvalue() 88 | assert transcript_text in output 89 | 90 | # Ensure the mock client was used 91 | mock_wyoming_client_context.assert_called_once() 92 | -------------------------------------------------------------------------------- /tests/rag/test_rag_proxy_passthrough.py: -------------------------------------------------------------------------------- 1 | """Tests for the RAG proxy passthrough functionality.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | from unittest.mock import AsyncMock, Mock 7 | 8 | import pytest 9 | from fastapi.testclient import TestClient 10 | 11 | from agent_cli.rag.api import create_app 12 | 13 | if TYPE_CHECKING: 14 | from pathlib import Path 15 | 16 | from pytest_mock import MockerFixture 17 | 18 | 19 | @pytest.fixture 20 | def mock_rag_dependencies(mocker: MockerFixture) -> None: 21 | """Mock the RAG dependencies to avoid side effects.""" 22 | mocker.patch("agent_cli.rag.api.init_collection") 23 | mocker.patch("agent_cli.rag.api.get_reranker_model") 24 | mocker.patch("agent_cli.rag.api.load_hashes_from_metadata", return_value=({}, {})) 25 | mocker.patch("agent_cli.rag.api.watch_docs") 26 | mocker.patch("agent_cli.rag.api.initial_index") 27 | # Also mock threading to prevent background threads 28 | mocker.patch("threading.Thread") 29 | 30 | 31 | @pytest.fixture 32 | def app(tmp_path: Path, mock_rag_dependencies: None) -> TestClient: # noqa: ARG001 33 | """Create the FastAPI app with mocked dependencies.""" 34 | fastapi_app = create_app( 35 | docs_folder=tmp_path / "docs", 36 | chroma_path=tmp_path / "chroma", 37 | openai_base_url="http://upstream.test/v1", 38 | chat_api_key="dummy-rag-key", 39 | ) 40 | return TestClient(fastapi_app) 41 | 42 | 43 | def test_rag_proxy_passthrough_models(app: TestClient, mocker: MockerFixture) -> None: 44 | """Test that /v1/models is forwarded to the upstream.""" 45 | mock_send = AsyncMock() 46 | mock_response = Mock() 47 | mock_response.status_code = 200 48 | mock_response.content = b'{"data": [{"id": "gpt-4"}]}' 49 | mock_response.headers = {"Content-Type": "application/json"} 50 | mock_send.return_value = mock_response 51 | 52 | mocker.patch("httpx.AsyncClient.send", side_effect=mock_send) 53 | 54 | response = app.get("/v1/models") 55 | 56 | assert response.status_code == 200 57 | assert response.json() == {"data": [{"id": "gpt-4"}]} 58 | 59 | assert mock_send.call_count == 1 60 | request_obj = mock_send.call_args[0][0] 61 | 62 | assert str(request_obj.url) == "http://upstream.test/v1/models" 63 | assert request_obj.method == "GET" 64 | # Ensure correct Auth header from RAG config 65 | assert request_obj.headers["Authorization"] == "Bearer dummy-rag-key" 66 | 67 | 68 | def test_rag_proxy_passthrough_catchall(app: TestClient, mocker: MockerFixture) -> None: 69 | """Test that an arbitrary path is forwarded.""" 70 | mock_send = AsyncMock() 71 | mock_response = Mock() 72 | mock_response.status_code = 200 73 | mock_response.content = b"OK" 74 | mock_response.headers = {"Content-Type": "text/plain"} 75 | mock_send.return_value = mock_response 76 | 77 | mocker.patch("httpx.AsyncClient.send", side_effect=mock_send) 78 | 79 | response = app.post("/custom/endpoint", content=b"data") 80 | 81 | assert response.status_code == 200 82 | assert response.content == b"OK" 83 | 84 | assert mock_send.call_count == 1 85 | request_obj = mock_send.call_args[0][0] 86 | assert str(request_obj.url) == "http://upstream.test/v1/custom/endpoint" 87 | 88 | 89 | def test_rag_proxy_passthrough_upstream_error(app: TestClient, mocker: MockerFixture) -> None: 90 | """Test handling of upstream errors.""" 91 | mock_send = AsyncMock() 92 | mock_send.side_effect = Exception("Network error") 93 | 94 | mocker.patch("httpx.AsyncClient.send", side_effect=mock_send) 95 | 96 | response = app.get("/v1/models") 97 | 98 | assert response.status_code == 502 99 | assert response.content == b"Upstream Proxy Error" 100 | -------------------------------------------------------------------------------- /tests/rag/test_indexer.py: -------------------------------------------------------------------------------- 1 | """Tests for RAG indexer.""" 2 | 3 | from collections.abc import AsyncGenerator 4 | from pathlib import Path 5 | from typing import Any 6 | from unittest.mock import MagicMock, patch 7 | 8 | import pytest 9 | from watchfiles import Change 10 | 11 | from agent_cli.rag import _indexer 12 | from agent_cli.rag._utils import should_ignore_path 13 | 14 | 15 | @pytest.mark.asyncio 16 | async def test_watch_docs(tmp_path: Path) -> None: 17 | """Test watching docs folder.""" 18 | mock_collection = MagicMock() 19 | docs_folder = tmp_path / "docs" 20 | docs_folder.mkdir() 21 | file_hashes: dict[str, str] = {} 22 | file_mtimes: dict[str, float] = {} 23 | 24 | # Create dummy files so is_file() returns True 25 | (docs_folder / "new.txt").touch() 26 | (docs_folder / "mod.txt").touch() 27 | # del.txt doesn't need to exist 28 | 29 | # Mock awatch to yield changes 30 | changes = { 31 | (Change.added, str(docs_folder / "new.txt")), 32 | (Change.modified, str(docs_folder / "mod.txt")), 33 | (Change.deleted, str(docs_folder / "del.txt")), 34 | } 35 | 36 | async def mock_awatch_gen( 37 | *_args: Any, 38 | **_kwargs: Any, 39 | ) -> AsyncGenerator[set[tuple[Change, str]], None]: 40 | yield changes 41 | 42 | async def fake_watch_directory(_root: Path, handler: Any, **_kwargs) -> None: # type: ignore[no-untyped-def] 43 | for change, path in changes: 44 | handler(change, Path(path)) 45 | 46 | with ( 47 | patch("agent_cli.rag._indexer.watch_directory", side_effect=fake_watch_directory), 48 | patch("agent_cli.rag._indexer.index_file") as mock_index, 49 | patch("agent_cli.rag._indexer.remove_file") as mock_remove, 50 | ): 51 | await _indexer.watch_docs(mock_collection, docs_folder, file_hashes, file_mtimes) 52 | 53 | # Check calls 54 | assert mock_index.call_count == 2 # added and modified 55 | assert mock_remove.call_count == 1 # deleted 56 | 57 | 58 | @pytest.mark.asyncio 59 | async def test_watch_docs_passes_ignore_filter(tmp_path: Path) -> None: 60 | """Test that watch_docs passes the should_ignore_path filter to watch_directory.""" 61 | mock_collection = MagicMock() 62 | docs_folder = tmp_path / "docs" 63 | docs_folder.mkdir() 64 | file_hashes: dict[str, str] = {} 65 | file_mtimes: dict[str, float] = {} 66 | 67 | async def fake_watch_directory( 68 | _root: Path, 69 | _handler: Any, 70 | *, 71 | ignore_filter: Any = None, 72 | **_kwargs: Any, 73 | ) -> None: 74 | # Verify ignore_filter is provided and is the should_ignore_path function 75 | assert ignore_filter is not None 76 | assert ignore_filter.__name__ == "should_ignore_path" 77 | 78 | with patch( 79 | "agent_cli.rag._indexer.watch_directory", 80 | side_effect=fake_watch_directory, 81 | ): 82 | await _indexer.watch_docs(mock_collection, docs_folder, file_hashes, file_mtimes) 83 | 84 | 85 | @pytest.mark.asyncio 86 | async def test_watch_docs_ignore_filter_works(tmp_path: Path) -> None: 87 | """Test that the ignore filter correctly filters out ignored paths.""" 88 | docs_folder = tmp_path / "docs" 89 | docs_folder.mkdir() 90 | 91 | # Test that the filter correctly identifies paths to ignore 92 | git_file = docs_folder / ".git" / "config" 93 | venv_file = docs_folder / "venv" / "bin" / "python" 94 | pycache_file = docs_folder / "__pycache__" / "module.pyc" 95 | normal_file = docs_folder / "readme.md" 96 | 97 | assert should_ignore_path(git_file, docs_folder) 98 | assert should_ignore_path(venv_file, docs_folder) 99 | assert should_ignore_path(pycache_file, docs_folder) 100 | assert not should_ignore_path(normal_file, docs_folder) 101 | -------------------------------------------------------------------------------- /scripts/start-all-services.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Check if zellij is installed 4 | if ! command -v zellij &> /dev/null; then 5 | echo "📺 Zellij not found. Installing..." 6 | uvx dotbins get zellij-org/zellij 7 | export PATH="$HOME/.local/bin:$PATH" 8 | fi 9 | 10 | # Get the current directory 11 | SCRIPTS_DIR="$(cd "$(dirname "$0")" && pwd)" 12 | 13 | 14 | # Create .runtime directory and Zellij layout file 15 | mkdir -p "$SCRIPTS_DIR/.runtime" 16 | cat > "$SCRIPTS_DIR/.runtime/agent-cli-layout.kdl" << EOF 17 | session_name "agent-cli" 18 | 19 | layout { 20 | pane split_direction="vertical" { 21 | pane split_direction="horizontal" { 22 | pane { 23 | name "Ollama" 24 | command "ollama" 25 | args "serve" 26 | } 27 | pane { 28 | name "Help" 29 | command "sh" 30 | args "-c" "cat $SCRIPTS_DIR/zellij_help.txt | less" 31 | } 32 | } 33 | pane split_direction="horizontal" { 34 | pane { 35 | name "Whisper" 36 | cwd "$SCRIPTS_DIR" 37 | command "./run-whisper.sh" 38 | } 39 | pane split_direction="horizontal" { 40 | pane { 41 | name "Piper" 42 | cwd "$SCRIPTS_DIR" 43 | command "./run-piper.sh" 44 | } 45 | pane { 46 | name "OpenWakeWord" 47 | cwd "$SCRIPTS_DIR" 48 | command "./run-openwakeword.sh" 49 | } 50 | } 51 | } 52 | } 53 | 54 | floating_panes { 55 | pane { 56 | name "Help" 57 | x "10%" 58 | y "10%" 59 | width "80%" 60 | height "80%" 61 | command "sh" 62 | close_on_exit true 63 | args "-c" "cat $SCRIPTS_DIR/zellij_help.txt | less" 64 | } 65 | } 66 | } 67 | EOF 68 | 69 | # Function to show common usage instructions 70 | show_usage() { 71 | echo "❌ Use 'Ctrl-Q' to quit Zellij" 72 | echo "🔌 Use 'Ctrl-O d' to detach from the session" 73 | echo "🔗 Use 'zellij attach agent-cli' to reattach" 74 | } 75 | 76 | # Function to start a new Zellij session 77 | start_new_session() { 78 | if [ "$AGENT_CLI_NO_ATTACH" = "true" ]; then 79 | # Start detached 80 | zellij --session agent-cli --layout "$SCRIPTS_DIR/.runtime/agent-cli-layout.kdl" & 81 | sleep 1 # Give it a moment to start 82 | echo "✅ Session 'agent-cli' started in background. Use 'zellij attach agent-cli' to view." 83 | else 84 | show_usage 85 | # Start zellij with layout file - session name is specified in the layout 86 | zellij --layout "$SCRIPTS_DIR/.runtime/agent-cli-layout.kdl" 87 | fi 88 | } 89 | 90 | # Check if agent-cli session already exists and is running 91 | # Case 1: Session exists but has exited - clean it up and start fresh 92 | if zellij list-sessions 2>/dev/null | grep "agent-cli" | grep -q "EXITED"; then 93 | echo "🧹 Found exited session 'agent-cli'. Cleaning up..." 94 | zellij delete-session agent-cli 95 | echo "🆕 Starting fresh services in Zellij..." 96 | start_new_session 97 | # Case 2: Session exists and is running - attach to it if requested 98 | elif zellij list-sessions 2>/dev/null | grep -q "agent-cli"; then 99 | if [ "$AGENT_CLI_NO_ATTACH" = "true" ]; then 100 | echo "✅ Session 'agent-cli' is already running. Not attaching as requested." 101 | else 102 | echo "🔗 Session 'agent-cli' already exists and is running. Attaching..." 103 | show_usage 104 | zellij attach agent-cli 105 | fi 106 | # Case 3: No session exists - create a new one 107 | else 108 | echo "🚀 Starting all services in Zellij..." 109 | start_new_session 110 | fi 111 | -------------------------------------------------------------------------------- /agent_cli/core/audio_format.py: -------------------------------------------------------------------------------- 1 | """Audio format conversion utilities using FFmpeg.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | import shutil 7 | import subprocess 8 | import tempfile 9 | from pathlib import Path 10 | 11 | from agent_cli import constants 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | VALID_EXTENSIONS = (".wav", ".mp3", ".m4a", ".flac", ".ogg", ".aac", ".webm") 16 | 17 | 18 | def convert_audio_to_wyoming_format( 19 | audio_data: bytes, 20 | source_filename: str, 21 | ) -> bytes: 22 | """Convert audio data to Wyoming-compatible format using FFmpeg. 23 | 24 | Args: 25 | audio_data: Raw audio data 26 | source_filename: Source filename to help FFmpeg detect format 27 | 28 | Returns: 29 | Converted audio data as raw PCM bytes (16kHz, 16-bit, mono) 30 | 31 | Raises: 32 | RuntimeError: If FFmpeg is not available or conversion fails 33 | 34 | """ 35 | # Check if FFmpeg is available 36 | if not shutil.which("ffmpeg"): 37 | msg = "FFmpeg not found in PATH. Please install FFmpeg to convert audio formats." 38 | raise RuntimeError(msg) 39 | 40 | # Create temporary files for input and output 41 | suffix = _get_file_extension(source_filename) 42 | with ( 43 | tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as input_file, 44 | tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as output_file, 45 | ): 46 | input_path = Path(input_file.name) 47 | output_path = Path(output_file.name) 48 | 49 | try: 50 | # Write input audio data 51 | input_file.write(audio_data) 52 | input_file.flush() 53 | 54 | # Build FFmpeg command to convert to Wyoming format 55 | # -f s16le: 16-bit signed little-endian PCM 56 | # -ar 16000: 16kHz sample rate 57 | # -ac 1: mono (1 channel) 58 | cmd = [ 59 | "ffmpeg", 60 | "-y", 61 | "-i", 62 | str(input_path), 63 | "-f", 64 | "s16le", 65 | "-ar", 66 | str(constants.AUDIO_RATE), 67 | "-ac", 68 | str(constants.AUDIO_CHANNELS), 69 | str(output_path), 70 | ] 71 | 72 | logger.debug("Running FFmpeg command: %s", " ".join(cmd)) 73 | 74 | # Run FFmpeg 75 | result = subprocess.run( 76 | cmd, 77 | capture_output=True, 78 | text=False, 79 | check=False, 80 | ) 81 | 82 | if result.returncode != 0: 83 | stderr_text = result.stderr.decode("utf-8", errors="replace") 84 | logger.error("FFmpeg failed with return code %d", result.returncode) 85 | logger.error("FFmpeg stderr: %s", stderr_text) 86 | msg = f"FFmpeg conversion failed: {stderr_text}" 87 | raise RuntimeError(msg) 88 | 89 | # Read converted audio data 90 | return output_path.read_bytes() 91 | 92 | finally: 93 | # Clean up temporary files 94 | input_path.unlink(missing_ok=True) 95 | output_path.unlink(missing_ok=True) 96 | 97 | 98 | def _get_file_extension(filename: str) -> str: 99 | """Get file extension from filename, defaulting to .tmp. 100 | 101 | Args: 102 | filename: Source filename 103 | 104 | Returns: 105 | File extension including the dot 106 | 107 | """ 108 | filename = str(filename).lower() 109 | 110 | for ext in VALID_EXTENSIONS: 111 | if filename.endswith(ext): 112 | return ext 113 | 114 | return ".tmp" 115 | 116 | 117 | def check_ffmpeg_available() -> bool: 118 | """Check if FFmpeg is available in the system PATH. 119 | 120 | Returns: 121 | True if FFmpeg is available, False otherwise 122 | 123 | """ 124 | return shutil.which("ffmpeg") is not None 125 | -------------------------------------------------------------------------------- /scripts/setup-linux.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | echo "🚀 Setting up agent-cli services on Linux..." 6 | 7 | # Function to install uv based on the distribution 8 | install_uv() { 9 | if command -v curl &> /dev/null; then 10 | echo "📦 Installing uv using curl..." 11 | curl -LsSf https://astral.sh/uv/install.sh | sh 12 | # Add to PATH for current session 13 | export PATH="$HOME/.local/bin:$PATH" 14 | else 15 | echo "curl not found. Please install curl first:" 16 | echo " Ubuntu/Debian: sudo apt install curl" 17 | echo " Fedora/RHEL: sudo dnf install curl" 18 | exit 1 19 | fi 20 | } 21 | 22 | # Check if uv is installed 23 | if ! command -v uv &> /dev/null; then 24 | echo "📦 Installing uv..." 25 | install_uv 26 | fi 27 | 28 | # Check for PortAudio (required for audio processing) 29 | echo "🔊 Checking PortAudio..." 30 | if ! pkg-config --exists portaudio-2.0 2>/dev/null; then 31 | echo "❌ ERROR: PortAudio development libraries are not installed." 32 | echo "" 33 | echo "PyAudio requires PortAudio. Install using your distribution's package manager:" 34 | echo "" 35 | echo "Ubuntu/Debian:" 36 | echo " sudo apt install portaudio19-dev" 37 | echo "" 38 | echo "Fedora/RHEL/CentOS:" 39 | echo " sudo dnf install portaudio-devel" 40 | echo "" 41 | echo "Arch Linux:" 42 | echo " sudo pacman -S portaudio" 43 | echo "" 44 | echo "openSUSE:" 45 | echo " sudo zypper install portaudio-devel" 46 | echo "" 47 | echo "After installing PortAudio, run this script again." 48 | exit 1 49 | else 50 | echo "✅ PortAudio is already installed" 51 | fi 52 | 53 | # Install Ollama 54 | echo "🧠 Checking Ollama..." 55 | if ! command -v ollama &> /dev/null; then 56 | echo "📦 Installing Ollama..." 57 | curl -fsSL https://ollama.ai/install.sh | sh 58 | echo "✅ Ollama installed successfully" 59 | else 60 | echo "✅ Ollama is already installed" 61 | fi 62 | 63 | # Check if zellij is available or offer alternatives 64 | if ! command -v zellij &> /dev/null; then 65 | echo "📺 Zellij not found. Installing..." 66 | 67 | # Try different installation methods based on what's available 68 | if command -v cargo &> /dev/null; then 69 | echo "🦀 Installing zellij via cargo..." 70 | cargo install zellij 71 | elif command -v flatpak &> /dev/null; then 72 | echo "📦 Installing zellij via flatpak..." 73 | flatpak install -y flathub org.zellij_developers.zellij 74 | else 75 | echo "📥 Installing zellij binary..." 76 | curl -L https://github.com/zellij-org/zellij/releases/latest/download/zellij-x86_64-unknown-linux-musl.tar.gz | tar -xz -C ~/.local/bin/ 77 | chmod +x ~/.local/bin/zellij 78 | export PATH="$HOME/.local/bin:$PATH" 79 | fi 80 | fi 81 | 82 | # Install agent-cli 83 | echo "🤖 Installing/upgrading agent-cli..." 84 | uv tool install --upgrade agent-cli 85 | 86 | # Preload default Ollama model 87 | echo "⬇️ Preloading default Ollama model (gemma3:4b)..." 88 | echo "⏳ This may take a few minutes depending on your internet connection..." 89 | # Start Ollama in background, then pull model synchronously 90 | (ollama serve >/dev/null 2>&1 &) && sleep 2 && ollama pull gemma3:4b 91 | # Stop the temporary ollama server 92 | pkill -f "ollama serve" || true 93 | 94 | echo "" 95 | echo "✅ Setup complete! You can now run the services:" 96 | echo "" 97 | echo "Option 1 - Run all services at once:" 98 | echo " scripts/start-all-services.sh" 99 | echo "" 100 | echo "Option 2 - Run services individually:" 101 | echo " 1. Ollama: ollama serve" 102 | echo " 2. Whisper: scripts/run-whisper.sh" 103 | echo " 3. Piper: scripts/run-piper.sh" 104 | echo " 4. OpenWakeWord: scripts/run-openwakeword.sh" 105 | echo "" 106 | echo "📝 Note: Services use uvx to run without needing virtual environments." 107 | echo "For GPU acceleration, make sure NVIDIA drivers and CUDA are installed." 108 | echo "🎉 agent-cli has been installed and is ready to use!" 109 | -------------------------------------------------------------------------------- /tests/memory/test_proxy_passthrough.py: -------------------------------------------------------------------------------- 1 | """Tests for the memory proxy passthrough functionality.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | from unittest.mock import AsyncMock, Mock 7 | 8 | import pytest 9 | from fastapi.testclient import TestClient 10 | 11 | from agent_cli.memory.api import create_app 12 | 13 | if TYPE_CHECKING: 14 | from pathlib import Path 15 | 16 | from pytest_mock import MockerFixture 17 | 18 | 19 | @pytest.fixture 20 | def mock_memory_client(mocker: MockerFixture) -> Mock: 21 | """Mock the MemoryClient to avoid side effects.""" 22 | mock_client_cls = mocker.patch("agent_cli.memory.api.MemoryClient") 23 | mock_client = mock_client_cls.return_value 24 | mock_client.memory_path = "dummy_path" 25 | mock_client.openai_base_url = "http://upstream.test/v1" 26 | mock_client.chat_api_key = "dummy-key" 27 | mock_client.default_top_k = 5 28 | return mock_client 29 | 30 | 31 | @pytest.fixture 32 | def app(tmp_path: Path, mock_memory_client: Mock) -> TestClient: # noqa: ARG001 33 | """Create the FastAPI app with mocked client.""" 34 | fastapi_app = create_app( 35 | memory_path=tmp_path, 36 | openai_base_url="http://upstream.test/v1", 37 | ) 38 | return TestClient(fastapi_app) 39 | 40 | 41 | def test_proxy_passthrough_models(app: TestClient, mocker: MockerFixture) -> None: 42 | """Test that /v1/models is forwarded to the upstream.""" 43 | # Mock httpx.AsyncClient.send 44 | mock_send = AsyncMock() 45 | mock_response = Mock() 46 | mock_response.status_code = 200 47 | mock_response.content = b'{"data": [{"id": "gpt-4"}]}' 48 | mock_response.headers = {"Content-Type": "application/json"} 49 | mock_send.return_value = mock_response 50 | 51 | mocker.patch("httpx.AsyncClient.send", side_effect=mock_send) 52 | 53 | response = app.get("/v1/models") 54 | 55 | assert response.status_code == 200 56 | assert response.json() == {"data": [{"id": "gpt-4"}]} 57 | 58 | # Verify the request was constructed correctly 59 | assert mock_send.call_count == 1 60 | call_args = mock_send.call_args 61 | request_obj = call_args[0][0] 62 | 63 | # Check that URL was constructed correctly (v1 should not be duplicated if base has it) 64 | # base="http://upstream.test/v1", path="v1/models" -> "http://upstream.test/v1/models" 65 | assert str(request_obj.url) == "http://upstream.test/v1/models" 66 | assert request_obj.method == "GET" 67 | 68 | 69 | def test_proxy_passthrough_catchall_other_path(app: TestClient, mocker: MockerFixture) -> None: 70 | """Test that an arbitrary path is forwarded.""" 71 | mock_send = AsyncMock() 72 | mock_response = Mock() 73 | mock_response.status_code = 200 74 | mock_response.content = b"OK" 75 | mock_response.headers = {"Content-Type": "text/plain"} 76 | mock_send.return_value = mock_response 77 | 78 | mocker.patch("httpx.AsyncClient.send", side_effect=mock_send) 79 | 80 | response = app.post("/other/path?foo=bar", content=b"payload") 81 | 82 | assert response.status_code == 200 83 | assert response.content == b"OK" 84 | 85 | # Verify construction 86 | assert mock_send.call_count == 1 87 | request_obj = mock_send.call_args[0][0] 88 | assert str(request_obj.url) == "http://upstream.test/v1/other/path?foo=bar" 89 | assert request_obj.method == "POST" 90 | # Note: TestClient sends body, but httpx.build_request might consume it differently 91 | # depending on how we mock. We just verify the call happened. 92 | 93 | 94 | def test_proxy_passthrough_upstream_error(app: TestClient, mocker: MockerFixture) -> None: 95 | """Test handling of upstream errors.""" 96 | mock_send = AsyncMock() 97 | mock_send.side_effect = Exception("Connection refused") 98 | 99 | mocker.patch("httpx.AsyncClient.send", side_effect=mock_send) 100 | 101 | response = app.get("/v1/models") 102 | 103 | assert response.status_code == 502 104 | assert response.content == b"Upstream Proxy Error" 105 | -------------------------------------------------------------------------------- /agent_cli/memory/api.py: -------------------------------------------------------------------------------- 1 | """FastAPI application factory for memory proxy.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | from typing import TYPE_CHECKING, Any 7 | 8 | from fastapi import FastAPI, Request 9 | from fastapi.middleware.cors import CORSMiddleware 10 | 11 | from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL 12 | from agent_cli.core.openai_proxy import proxy_request_to_upstream 13 | from agent_cli.memory.client import MemoryClient 14 | from agent_cli.memory.models import ChatRequest # noqa: TC001 15 | 16 | if TYPE_CHECKING: 17 | from pathlib import Path 18 | 19 | LOGGER = logging.getLogger(__name__) 20 | 21 | 22 | def create_app( 23 | memory_path: Path, 24 | openai_base_url: str, 25 | embedding_model: str = DEFAULT_OPENAI_EMBEDDING_MODEL, 26 | embedding_api_key: str | None = None, 27 | chat_api_key: str | None = None, 28 | default_top_k: int = 5, 29 | enable_summarization: bool = True, 30 | max_entries: int = 500, 31 | mmr_lambda: float = 0.7, 32 | recency_weight: float = 0.2, 33 | score_threshold: float = 0.35, 34 | enable_git_versioning: bool = True, 35 | ) -> FastAPI: 36 | """Create the FastAPI app for memory-backed chat.""" 37 | LOGGER.info("Initializing memory client...") 38 | 39 | client = MemoryClient( 40 | memory_path=memory_path, 41 | openai_base_url=openai_base_url, 42 | embedding_model=embedding_model, 43 | embedding_api_key=embedding_api_key, 44 | chat_api_key=chat_api_key, 45 | default_top_k=default_top_k, 46 | enable_summarization=enable_summarization, 47 | max_entries=max_entries, 48 | mmr_lambda=mmr_lambda, 49 | recency_weight=recency_weight, 50 | score_threshold=score_threshold, 51 | start_watcher=False, # We control start/stop via app events 52 | enable_git_versioning=enable_git_versioning, 53 | ) 54 | 55 | app = FastAPI(title="Memory Proxy") 56 | 57 | app.add_middleware( 58 | CORSMiddleware, 59 | allow_origins=["*"], 60 | allow_credentials=True, 61 | allow_methods=["*"], 62 | allow_headers=["*"], 63 | ) 64 | 65 | @app.post("/v1/chat/completions") 66 | async def chat_completions(request: Request, chat_request: ChatRequest) -> Any: 67 | auth_header = request.headers.get("Authorization") 68 | api_key = None 69 | if auth_header and auth_header.startswith("Bearer "): 70 | api_key = auth_header.split(" ")[1] 71 | 72 | return await client.chat( 73 | messages=chat_request.messages, 74 | conversation_id=chat_request.memory_id or "default", 75 | model=chat_request.model, 76 | stream=chat_request.stream or False, 77 | api_key=api_key, 78 | memory_top_k=chat_request.memory_top_k, 79 | recency_weight=chat_request.memory_recency_weight, 80 | score_threshold=chat_request.memory_score_threshold, 81 | ) 82 | 83 | @app.on_event("startup") 84 | async def start_watch() -> None: 85 | client.start() 86 | 87 | @app.on_event("shutdown") 88 | async def stop_watch() -> None: 89 | await client.stop() 90 | 91 | @app.get("/health") 92 | def health() -> dict[str, str]: 93 | return { 94 | "status": "ok", 95 | "memory_store": str(client.memory_path), 96 | "openai_base_url": client.openai_base_url, 97 | "default_top_k": str(client.default_top_k), 98 | } 99 | 100 | @app.api_route( 101 | "/{path:path}", 102 | methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH"], 103 | ) 104 | async def proxy_catch_all(request: Request, path: str) -> Any: 105 | """Forward any other request to the upstream provider.""" 106 | return await proxy_request_to_upstream( 107 | request, 108 | path, 109 | client.openai_base_url, 110 | client.chat_api_key, 111 | ) 112 | 113 | return app 114 | -------------------------------------------------------------------------------- /agent_cli/core/reranker.py: -------------------------------------------------------------------------------- 1 | """Shared ONNX Cross-Encoder for reranking (used by both RAG and Memory).""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | 7 | from huggingface_hub import hf_hub_download 8 | from onnxruntime import InferenceSession 9 | from transformers import AutoTokenizer 10 | 11 | LOGGER = logging.getLogger(__name__) 12 | 13 | 14 | def _download_onnx_model(model_name: str, onnx_filename: str) -> str: 15 | """Download the ONNX model, favoring the common `onnx/` folder layout.""" 16 | if "/" in onnx_filename: 17 | return hf_hub_download(repo_id=model_name, filename=onnx_filename) 18 | 19 | try: 20 | return hf_hub_download(repo_id=model_name, filename=onnx_filename, subfolder="onnx") 21 | except Exception as first_error: 22 | LOGGER.debug( 23 | "ONNX file not found under onnx/ for %s: %s. Falling back to repo root.", 24 | model_name, 25 | first_error, 26 | ) 27 | try: 28 | return hf_hub_download(repo_id=model_name, filename=onnx_filename) 29 | except Exception as second_error: 30 | LOGGER.exception( 31 | "Failed to download ONNX model %s (filename=%s)", 32 | model_name, 33 | onnx_filename, 34 | exc_info=second_error, 35 | ) 36 | raise 37 | 38 | 39 | class OnnxCrossEncoder: 40 | """A lightweight CrossEncoder using ONNX Runtime.""" 41 | 42 | def __init__( 43 | self, 44 | model_name: str = "Xenova/ms-marco-MiniLM-L-6-v2", 45 | onnx_filename: str = "model.onnx", 46 | ) -> None: 47 | """Initialize the ONNX CrossEncoder.""" 48 | self.model_name = model_name 49 | 50 | # Download model if needed 51 | LOGGER.info("Loading ONNX model: %s", model_name) 52 | model_path = _download_onnx_model(model_name, onnx_filename) 53 | 54 | self.session = InferenceSession(model_path) 55 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 56 | 57 | def predict( 58 | self, 59 | pairs: list[tuple[str, str]], 60 | batch_size: int = 32, 61 | ) -> list[float]: 62 | """Predict relevance scores for query-document pairs.""" 63 | import numpy as np # noqa: PLC0415 64 | 65 | if not pairs: 66 | return [] 67 | 68 | all_scores = [] 69 | 70 | # Process in batches 71 | for i in range(0, len(pairs), batch_size): 72 | batch = pairs[i : i + batch_size] 73 | queries = [q for q, d in batch] 74 | docs = [d for q, d in batch] 75 | 76 | # Tokenize 77 | inputs = self.tokenizer( 78 | queries, 79 | docs, 80 | padding=True, 81 | truncation=True, 82 | return_tensors="np", 83 | max_length=512, 84 | ) 85 | 86 | # ONNX Input 87 | # Check what inputs the model expects. usually input_ids, attention_mask, token_type_ids 88 | # specific models might not need token_type_ids 89 | ort_inputs = { 90 | "input_ids": inputs["input_ids"].astype(np.int64), 91 | "attention_mask": inputs["attention_mask"].astype(np.int64), 92 | } 93 | if "token_type_ids" in inputs: 94 | ort_inputs["token_type_ids"] = inputs["token_type_ids"].astype(np.int64) 95 | 96 | # Run inference 97 | logits = self.session.run(None, ort_inputs)[0] 98 | 99 | # Extract scores (usually shape [batch, 1] or [batch]) 100 | batch_scores = logits.flatten() if logits.ndim > 1 else logits 101 | 102 | all_scores.extend(batch_scores.tolist()) 103 | 104 | return all_scores 105 | 106 | 107 | def get_reranker_model( 108 | model_name: str = "Xenova/ms-marco-MiniLM-L-6-v2", 109 | ) -> OnnxCrossEncoder: 110 | """Load the CrossEncoder model.""" 111 | return OnnxCrossEncoder(model_name) 112 | 113 | 114 | def predict_relevance( 115 | model: OnnxCrossEncoder, 116 | pairs: list[tuple[str, str]], 117 | ) -> list[float]: 118 | """Predict relevance scores for query-document pairs.""" 119 | return model.predict(pairs) 120 | -------------------------------------------------------------------------------- /docs/installation/windows.md: -------------------------------------------------------------------------------- 1 | # Windows Installation Guide 2 | 3 | While `agent-cli` does not have an automated setup script for native Windows, you can achieve a seamless experience by using a **Split Setup**. 4 | 5 | This approach uses **WSL 2 (Windows Subsystem for Linux)** to run the heavy AI services (the "Brain") while running the lightweight `agent-cli` tool natively on Windows (the "Ears") to access your microphone and clipboard. 6 | 7 | ## Prerequisites 8 | 9 | 1. **WSL 2**: Ensure you have WSL 2 installed (typically Ubuntu). 10 | * [How to install WSL](https://learn.microsoft.com/en-us/windows/wsl/install) 11 | 2. **Git**: Installed in both WSL and Windows. 12 | 3. **uv**: The Python package manager (installed on Windows). 13 | 14 | --- 15 | 16 | ## Part 1: The "Brain" (WSL Side) 17 | 18 | We will run the backend services (Ollama, Whisper, Piper, etc.) inside WSL. 19 | 20 | 1. **Open your WSL terminal** (e.g., Ubuntu). 21 | 2. **Clone the repository and run the Linux setup:** 22 | 23 | ```bash 24 | git clone https://github.com/basnijholt/agent-cli.git 25 | cd agent-cli 26 | ./scripts/setup-linux.sh 27 | ``` 28 | 29 | 3. **Start the services:** 30 | 31 | ```bash 32 | ./scripts/start-all-services.sh 33 | ``` 34 | 35 | This will launch a Zellij session with all services running. By default, WSL forwards these ports (11434, 10300, 10200, 10400) to your Windows `localhost`. 36 | 37 | --- 38 | 39 | ## Part 2: The "Ears" (Windows Side) 40 | 41 | Now we install the client on Windows so it can access your hardware (microphone) and interact with your desktop (clipboard). 42 | 43 | ### 1. Install uv 44 | If you haven't installed `uv` yet, run this in PowerShell: 45 | ```powershell 46 | powershell -c "irm https://astral.sh/uv/install.ps1 | iex" 47 | ``` 48 | `uv` will automatically manage the required Python version for the tool. 49 | 50 | ### 2. Install agent-cli 51 | Run the following command to install the tool: 52 | 53 | ```powershell 54 | uv tool install agent-cli 55 | ``` 56 | 57 | > [!NOTE] 58 | > `agent-cli` uses **sounddevice** for audio, which automatically includes the necessary PortAudio binaries for Windows. You typically do not need to install any external drivers or libraries manually. 59 | 60 | ### 3. Test the Connection 61 | Run a command in PowerShell to verify that Windows can talk to the WSL services: 62 | 63 | ```powershell 64 | # This records audio on Windows -> sends to WSL -> copies text to Windows clipboard 65 | agent-cli transcribe 66 | ``` 67 | 68 | --- 69 | 70 | ## Part 3: Automation (AutoHotkey) 71 | 72 | To invoke these commands globally (like the macOS/Linux hotkeys), use [AutoHotkey v2](https://www.autohotkey.com/). 73 | 74 | 1. Create a file named `agent-cli.ahk`. 75 | 2. Paste the following script: 76 | 77 | ```autohotkey 78 | #Requires AutoHotkey v2.0 79 | Persistent ; Keep script running with tray icon 80 | 81 | ; Win+Shift+W to toggle transcription (W for Whisper) 82 | #+w::{ 83 | statusFile := A_Temp . "\agent-cli-status.txt" 84 | cmd := Format('{1} /C agent-cli transcribe --status > "{2}" 2>&1', A_ComSpec, statusFile) 85 | RunWait(cmd, , "Hide") 86 | status := FileRead(statusFile) 87 | if InStr(status, "not running") { 88 | TrayTip("🎤 Starting transcription...", "agent-cli", 1) 89 | Run("agent-cli transcribe --toggle --input-device-index 1", , "Hide") ; adjust device index if needed 90 | } else { 91 | TrayTip("🛑 Stopping transcription...", "agent-cli", 1) 92 | Run("agent-cli transcribe --toggle", , "Hide") 93 | } 94 | } 95 | 96 | ; Win+Shift+A to autocorrect clipboard 97 | #+a::{ 98 | TrayTip("✍️ Autocorrecting clipboard...", "agent-cli", 1) 99 | Run("agent-cli autocorrect", , "Hide") 100 | } 101 | 102 | ; Win+Shift+E to voice edit selection 103 | #+e::{ 104 | Send("^c") 105 | ClipWait(1) 106 | TrayTip("🗣️ Voice editing selection...", "agent-cli", 1) 107 | Run("agent-cli voice-edit --input-device-index 1", , "Hide") ; adjust device index if needed 108 | } 109 | ``` 110 | 111 | 3. Double-click the script to run it. 112 | 113 | > [!TIP] 114 | > Using `--toggle` stops an existing background recorder if it's already running, so you can press the same hotkey to start/stop the session without leaving a stray process behind. 115 | 116 | **Note on Audio Devices:** 117 | If `agent-cli` doesn't pick up your microphone, run `agent-cli transcribe --list-devices` to find the correct `--input-device-index`. 118 | -------------------------------------------------------------------------------- /tests/test_asr.py: -------------------------------------------------------------------------------- 1 | """Unit tests for the asr module.""" 2 | 3 | from __future__ import annotations 4 | 5 | from unittest.mock import AsyncMock, MagicMock, patch 6 | 7 | import pytest 8 | from wyoming.asr import Transcribe, Transcript, TranscriptChunk 9 | from wyoming.audio import AudioChunk, AudioStart, AudioStop 10 | 11 | from agent_cli.services import asr 12 | 13 | 14 | @pytest.mark.asyncio 15 | async def test_send_audio() -> None: 16 | """Test that _send_audio sends the correct events.""" 17 | # Arrange 18 | client = AsyncMock() 19 | stream = MagicMock() 20 | stop_event = MagicMock() 21 | stop_event.is_set.side_effect = [False, True] # Allow one iteration then stop 22 | stop_event.ctrl_c_pressed = False 23 | 24 | mock_data = MagicMock() 25 | mock_data.tobytes.return_value = b"fake_audio_chunk" 26 | stream.read.return_value = (mock_data, False) 27 | logger = MagicMock() 28 | 29 | # Act 30 | # No need to create a task and sleep, just await the coroutine. 31 | # The side_effect will stop the loop. 32 | await asr._send_audio( 33 | client, 34 | stream, 35 | stop_event, 36 | logger, 37 | live=MagicMock(), 38 | quiet=False, 39 | save_recording=False, 40 | ) 41 | 42 | # Assert 43 | assert client.write_event.call_count == 4 44 | client.write_event.assert_any_call(Transcribe().event()) 45 | client.write_event.assert_any_call( 46 | AudioStart(rate=16000, width=2, channels=1).event(), 47 | ) 48 | client.write_event.assert_any_call( 49 | AudioChunk( 50 | rate=16000, 51 | width=2, 52 | channels=1, 53 | audio=b"fake_audio_chunk", 54 | ).event(), 55 | ) 56 | client.write_event.assert_any_call(AudioStop().event()) 57 | 58 | 59 | @pytest.mark.asyncio 60 | async def test_receive_text() -> None: 61 | """Test that receive_transcript correctly processes events.""" 62 | # Arrange 63 | client = AsyncMock() 64 | client.read_event.side_effect = [ 65 | TranscriptChunk(text="hello").event(), 66 | Transcript(text="hello world").event(), 67 | None, # To stop the loop 68 | ] 69 | logger = MagicMock() 70 | chunk_callback = MagicMock() 71 | final_callback = MagicMock() 72 | 73 | # Act 74 | result = await asr._receive_transcript( 75 | client, 76 | logger, 77 | chunk_callback=chunk_callback, 78 | final_callback=final_callback, 79 | ) 80 | 81 | # Assert 82 | assert result == "hello world" 83 | chunk_callback.assert_called_once_with("hello") 84 | final_callback.assert_called_once_with("hello world") 85 | 86 | 87 | def test_create_transcriber(): 88 | """Test that the correct transcriber is returned.""" 89 | provider_cfg = MagicMock() 90 | provider_cfg.asr_provider = "openai" 91 | transcriber = asr.create_transcriber( 92 | provider_cfg, 93 | MagicMock(), 94 | MagicMock(), 95 | MagicMock(), 96 | ) 97 | assert transcriber.func is asr._transcribe_live_audio_openai 98 | 99 | provider_cfg.asr_provider = "wyoming" 100 | transcriber = asr.create_transcriber( 101 | provider_cfg, 102 | MagicMock(), 103 | MagicMock(), 104 | MagicMock(), 105 | ) 106 | assert transcriber.func is asr._transcribe_live_audio_wyoming 107 | 108 | 109 | def test_create_recorded_audio_transcriber(): 110 | """Test that the correct recorded audio transcriber is returned.""" 111 | provider_cfg = MagicMock() 112 | provider_cfg.asr_provider = "openai" 113 | transcriber = asr.create_recorded_audio_transcriber(provider_cfg) 114 | assert transcriber is asr.transcribe_audio_openai 115 | 116 | provider_cfg.asr_provider = "wyoming" 117 | transcriber = asr.create_recorded_audio_transcriber(provider_cfg) 118 | assert transcriber is asr._transcribe_recorded_audio_wyoming 119 | 120 | 121 | @pytest.mark.asyncio 122 | @patch("agent_cli.services.asr.wyoming_client_context", side_effect=ConnectionRefusedError) 123 | async def test_transcribe_recorded_audio_wyoming_connection_error( 124 | mock_wyoming_client_context: MagicMock, 125 | ): 126 | """Test that transcribe_recorded_audio_wyoming handles ConnectionRefusedError.""" 127 | result = await asr._transcribe_recorded_audio_wyoming( 128 | audio_data=b"test", 129 | wyoming_asr_cfg=MagicMock(), 130 | logger=MagicMock(), 131 | ) 132 | assert result == "" 133 | mock_wyoming_client_context.assert_called_once() 134 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42", "wheel", "versioningit"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "agent-cli" 7 | description = "A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance." 8 | dynamic = ["version"] 9 | authors = [{ name = "Bas Nijholt", email = "bas@nijho.lt" }] 10 | dependencies = [ 11 | "wyoming", 12 | "sounddevice", 13 | "numpy", 14 | "rich", 15 | "pyperclip", 16 | "pydantic-ai-slim[openai,duckduckgo,vertexai]", 17 | "typer", 18 | "openai", 19 | "dotenv", 20 | "google-genai>=1.25.0", 21 | "httpx", 22 | "psutil; sys_platform == 'win32'", 23 | ] 24 | requires-python = ">=3.11" 25 | 26 | [project.readme] 27 | file = "README.md" 28 | content-type = "text/markdown" 29 | 30 | [project.urls] 31 | Homepage = "https://github.com/basnijholt/agent-cli" 32 | 33 | [project.optional-dependencies] 34 | server = [ 35 | "fastapi[standard]", 36 | ] 37 | rag = [ 38 | "fastapi[standard]", 39 | "chromadb>=0.4.22", 40 | "onnxruntime>=1.17.0", 41 | "huggingface-hub>=0.20.0", 42 | "transformers>=4.30.0", 43 | "watchfiles>=0.21.0", 44 | # Until here same as 'memory' extras 45 | "markitdown[docx,pdf,pptx]>=0.1.3", 46 | ] 47 | memory = [ 48 | "fastapi[standard]", 49 | "chromadb>=0.4.22", 50 | "onnxruntime>=1.17.0", 51 | "huggingface-hub>=0.20.0", 52 | "transformers>=4.30.0", 53 | "watchfiles>=0.21.0", 54 | # Until here same as 'rag' extras 55 | "pyyaml>=6.0.0", 56 | ] 57 | test = [ 58 | "pytest>=7.0.0", 59 | "pytest-asyncio>=0.20.0", 60 | "pytest-cov>=4.0.0", 61 | "pydantic-ai-slim[openai]", 62 | "pytest-timeout", 63 | "pytest-mock", 64 | ] 65 | dev = [ 66 | "agent-cli[test]", 67 | "pre-commit>=3.0.0", 68 | "versioningit", 69 | "markdown-code-runner", 70 | "ruff", 71 | "notebook", 72 | ] 73 | speed = ["audiostretchy>=1.3.0"] 74 | 75 | # Duplicate of test+dev optional-dependencies groups 76 | [dependency-groups] 77 | dev = [ 78 | "pytest>=7.0.0", 79 | "pytest-asyncio>=0.20.0", 80 | "pytest-cov>=4.0.0", 81 | "pydantic-ai-slim[openai]", 82 | "pytest-timeout", 83 | "pytest-mock", 84 | "pre-commit>=3.0.0", 85 | "versioningit", 86 | "markdown-code-runner", 87 | "ruff", 88 | "notebook", 89 | "audiostretchy>=1.3.0", 90 | "pre-commit-uv>=4.1.4", 91 | ] 92 | 93 | [project.scripts] 94 | agent-cli = "agent_cli.cli:app" 95 | 96 | [tool.setuptools.packages.find] 97 | include = ["agent_cli.*", "agent_cli"] 98 | 99 | [tool.setuptools.package-data] 100 | "agent_cli" = ["py.typed", "scripts/**/*", "example-config.toml"] 101 | 102 | [tool.pytest.ini_options] 103 | asyncio_default_fixture_loop_scope = "function" 104 | filterwarnings = [ 105 | "ignore::DeprecationWarning", 106 | "ignore::PendingDeprecationWarning", 107 | ] 108 | timeout = 10 109 | norecursedirs = ["rag_db", "memory_db", "build", "dist"] 110 | addopts = [ 111 | "--cov=agent_cli", 112 | "--cov-report=term", 113 | "--cov-report=html", 114 | "--cov-report=xml", 115 | "--no-cov-on-fail", 116 | "-v", 117 | ] 118 | 119 | [tool.coverage.run] 120 | omit = [] 121 | 122 | [tool.coverage.report] 123 | exclude_lines = [ 124 | "pragma: no cover", 125 | "raise NotImplementedError", 126 | "if TYPE_CHECKING:", 127 | 'if __name__ == "__main__":', 128 | ] 129 | 130 | [tool.black] 131 | line-length = 100 132 | 133 | [tool.ruff] 134 | line-length = 100 135 | target-version = "py311" 136 | 137 | [tool.ruff.lint] 138 | select = ["ALL"] 139 | ignore = [ 140 | "T20", 141 | "S101", 142 | "S603", 143 | "ANN401", 144 | "D402", 145 | "PLW0603", 146 | "D401", 147 | "SLF001", 148 | "PLR0913", 149 | "TD002", 150 | "ANN002", 151 | "ANN003", 152 | "E501", 153 | "TRY300", 154 | "FBT001", # Boolean-typed positional argument in function definition 155 | "FBT002", # Boolean-typed keyword-only argument in function definition 156 | "BLE001", # Do not catch blind exception: `Exception` 157 | ] 158 | 159 | [tool.ruff.lint.per-file-ignores] 160 | "tests/*" = ["SLF001", "D103", "E501", "PLR2004", "S101", "ANN201", "FBT003"] 161 | "tests/test_examples.py" = ["E501"] 162 | ".github/*" = ["INP001"] 163 | "example/*" = ["INP001", "D100"] 164 | "docs/*" = ["INP001", "E501"] 165 | 166 | [tool.ruff.lint.mccabe] 167 | max-complexity = 18 168 | 169 | [tool.mypy] 170 | python_version = "3.11" 171 | 172 | [tool.versioningit] 173 | default-version = "0.0.0" 174 | 175 | [tool.versioningit.vcs] 176 | method = "git" 177 | match = ["v*"] 178 | default-tag = "0.0.0" 179 | -------------------------------------------------------------------------------- /agent_cli/agents/_voice_agent_common.py: -------------------------------------------------------------------------------- 1 | r"""Common functionalities for voice-based agents.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | import time 7 | from typing import TYPE_CHECKING 8 | 9 | import pyperclip 10 | 11 | from agent_cli.core.utils import print_input_panel, print_with_style 12 | from agent_cli.services import asr 13 | from agent_cli.services.llm import process_and_update_clipboard 14 | from agent_cli.services.tts import handle_tts_playback 15 | 16 | if TYPE_CHECKING: 17 | from rich.live import Live 18 | 19 | from agent_cli import config 20 | 21 | LOGGER = logging.getLogger() 22 | 23 | 24 | async def get_instruction_from_audio( 25 | *, 26 | audio_data: bytes, 27 | provider_cfg: config.ProviderSelection, 28 | audio_input_cfg: config.AudioInput, 29 | wyoming_asr_cfg: config.WyomingASR, 30 | openai_asr_cfg: config.OpenAIASR, 31 | ollama_cfg: config.Ollama, 32 | logger: logging.Logger, 33 | quiet: bool, 34 | ) -> str | None: 35 | """Transcribe audio data and return the instruction.""" 36 | try: 37 | start_time = time.monotonic() 38 | transcriber = asr.create_recorded_audio_transcriber(provider_cfg) 39 | instruction = await transcriber( 40 | audio_data=audio_data, 41 | provider_cfg=provider_cfg, 42 | audio_input_cfg=audio_input_cfg, 43 | wyoming_asr_cfg=wyoming_asr_cfg, 44 | openai_asr_cfg=openai_asr_cfg, 45 | ollama_cfg=ollama_cfg, 46 | logger=logger, 47 | quiet=quiet, 48 | ) 49 | elapsed = time.monotonic() - start_time 50 | 51 | if not instruction or not instruction.strip(): 52 | if not quiet: 53 | print_with_style( 54 | "No speech detected in recording", 55 | style="yellow", 56 | ) 57 | return None 58 | 59 | if not quiet: 60 | print_input_panel( 61 | instruction, 62 | title="🎯 Instruction", 63 | style="bold yellow", 64 | subtitle=f"[dim]took {elapsed:.2f}s[/dim]", 65 | ) 66 | 67 | return instruction 68 | 69 | except Exception as e: 70 | logger.exception("Failed to process audio with ASR") 71 | if not quiet: 72 | print_with_style(f"ASR processing failed: {e}", style="red") 73 | return None 74 | 75 | 76 | async def process_instruction_and_respond( 77 | *, 78 | instruction: str, 79 | original_text: str, 80 | provider_cfg: config.ProviderSelection, 81 | general_cfg: config.General, 82 | ollama_cfg: config.Ollama, 83 | openai_llm_cfg: config.OpenAILLM, 84 | gemini_llm_cfg: config.GeminiLLM, 85 | audio_output_cfg: config.AudioOutput, 86 | wyoming_tts_cfg: config.WyomingTTS, 87 | openai_tts_cfg: config.OpenAITTS, 88 | kokoro_tts_cfg: config.KokoroTTS, 89 | system_prompt: str, 90 | agent_instructions: str, 91 | live: Live | None, 92 | logger: logging.Logger, 93 | ) -> None: 94 | """Process instruction with LLM and handle TTS response.""" 95 | # Process with LLM if clipboard mode is enabled 96 | if general_cfg.clipboard: 97 | await process_and_update_clipboard( 98 | system_prompt=system_prompt, 99 | agent_instructions=agent_instructions, 100 | provider_cfg=provider_cfg, 101 | ollama_cfg=ollama_cfg, 102 | openai_cfg=openai_llm_cfg, 103 | gemini_cfg=gemini_llm_cfg, 104 | logger=logger, 105 | original_text=original_text, 106 | instruction=instruction, 107 | clipboard=general_cfg.clipboard, 108 | quiet=general_cfg.quiet, 109 | live=live, 110 | ) 111 | 112 | # Handle TTS response if enabled 113 | if audio_output_cfg.enable_tts: 114 | response_text = pyperclip.paste() 115 | if response_text and response_text.strip(): 116 | await handle_tts_playback( 117 | text=response_text, 118 | provider_cfg=provider_cfg, 119 | audio_output_cfg=audio_output_cfg, 120 | wyoming_tts_cfg=wyoming_tts_cfg, 121 | openai_tts_cfg=openai_tts_cfg, 122 | kokoro_tts_cfg=kokoro_tts_cfg, 123 | save_file=general_cfg.save_file, 124 | quiet=general_cfg.quiet, 125 | logger=logger, 126 | play_audio=not general_cfg.save_file, 127 | status_message="🔊 Speaking response...", 128 | description="TTS audio", 129 | live=live, 130 | ) 131 | -------------------------------------------------------------------------------- /docs/installation/macos.md: -------------------------------------------------------------------------------- 1 | # macOS Native Installation 2 | 3 | Native macOS setup with full Metal GPU acceleration for optimal performance. 4 | 5 | > **🍎 Recommended for macOS** 6 | > This setup provides ~10x better performance than Docker by utilizing Metal GPU acceleration. 7 | 8 | ## Prerequisites 9 | 10 | - macOS 12 Monterey or later 11 | - 8GB+ RAM (16GB+ recommended) 12 | - 10GB free disk space 13 | - Homebrew installed 14 | 15 | ## Quick Start 16 | 17 | 1. **Run the setup script:** 18 | 19 | ```bash 20 | scripts/setup-macos.sh 21 | ``` 22 | 23 | 2. **Start all services:** 24 | 25 | ```bash 26 | scripts/start-all-services.sh 27 | ``` 28 | 29 | 3. **Install agent-cli:** 30 | 31 | ```bash 32 | uv tool install agent-cli 33 | # or: pip install agent-cli 34 | ``` 35 | 36 | 4. **Test the setup:** 37 | ```bash 38 | agent-cli autocorrect "this has an eror" 39 | ``` 40 | 41 | ## What the Setup Does 42 | 43 | The `setup-macos.sh` script: 44 | 45 | - ✅ Checks for Homebrew 46 | - ✅ Installs `uv` if needed 47 | - ✅ Installs/checks Ollama (native macOS app) 48 | - ✅ Installs Zellij for session management 49 | - ✅ Prepares Wyoming service runners 50 | 51 | ## Services Overview 52 | 53 | | Service | Implementation | Port | GPU Support | 54 | | ---------------- | ---------------------- | ----- | -------------------- | 55 | | **Ollama** | Native macOS app | 11434 | ✅ Metal GPU | 56 | | **Whisper** | Wyoming MLX Whisper | 10300 | ✅ Apple Silicon MLX | 57 | | **Piper** | Wyoming Piper (via uv) | 10200 | N/A | 58 | | **OpenWakeWord** | Wyoming OpenWakeWord | 10400 | N/A | 59 | 60 | > **Note:** Whisper uses [wyoming-mlx-whisper](https://github.com/basnijholt/wyoming-mlx-whisper) with `whisper-large-v3-turbo` for near real-time transcription on Apple Silicon. 61 | 62 | ## Session Management with Zellij 63 | 64 | The setup uses Zellij for managing all services in one session: 65 | 66 | ### Starting Services 67 | 68 | ```bash 69 | scripts/start-all-services.sh 70 | ``` 71 | 72 | ### Zellij Commands 73 | 74 | - `Ctrl-O d` - Detach (services keep running) 75 | - `zellij attach agent-cli` - Reattach to session 76 | - `zellij list-sessions` - List all sessions 77 | - `zellij kill-session agent-cli` - Stop all services 78 | - `Alt + arrow keys` - Navigate between panes 79 | - `Ctrl-Q` - Quit (stops all services) 80 | 81 | ## Manual Service Management 82 | 83 | If you prefer running services individually: 84 | 85 | ```bash 86 | # Terminal 1: Ollama (native GPU acceleration) 87 | ollama serve 88 | 89 | # Terminal 2: Whisper (CPU optimized) 90 | scripts/run-whisper.sh 91 | 92 | # Terminal 3: Piper (Apple Silicon compatible) 93 | scripts/run-piper.sh 94 | 95 | # Terminal 4: OpenWakeWord (macOS compatible fork) 96 | scripts/run-openwakeword.sh 97 | ``` 98 | 99 | ## Why Native Setup? 100 | 101 | - **10x faster than Docker** - Full Metal GPU acceleration 102 | - **Better resource usage** - Native integration with macOS 103 | - **Automatic model management** - Services handle downloads 104 | 105 | ## Troubleshooting 106 | 107 | ### Terminal-notifier Popup Issues 108 | 109 | - Ensure Settings > Notifications > terminal-notifier > Allow Notifications is enabled. 110 | - For a persistent “Listening…” badge, set the Alert style to **Persistent** (or choose **Alerts** on macOS versions that still offer Alert/Banner). This keeps the recording indicator visible while other notifications still auto-dismiss automatically. 111 | 112 | ### Ollama Issues 113 | 114 | ```bash 115 | # Check if Ollama is running 116 | ollama list 117 | 118 | # Pull a model manually 119 | ollama pull gemma3:4b 120 | 121 | # Check Ollama logs 122 | tail -f ~/.ollama/logs/server.log 123 | ``` 124 | 125 | ### Service Port Conflicts 126 | 127 | ```bash 128 | # Check what's using a port 129 | lsof -i :11434 130 | lsof -i :10300 131 | lsof -i :10200 132 | lsof -i :10400 133 | ``` 134 | 135 | ### uv/Python Issues 136 | 137 | ```bash 138 | # Reinstall uv 139 | brew reinstall uv 140 | 141 | # Check uv installation 142 | uv --version 143 | ``` 144 | 145 | ### Zellij Issues 146 | 147 | ```bash 148 | # Kill stuck sessions 149 | zellij kill-all-sessions 150 | 151 | # Check session status 152 | zellij list-sessions 153 | 154 | # Start without Zellij (manual) 155 | # Run each script in separate terminals 156 | ``` 157 | 158 | ### Memory/Performance Issues 159 | 160 | - Close other apps to free RAM 161 | - Check Activity Monitor for high CPU/Memory usage 162 | - Services will automatically download required models 163 | 164 | ## Alternative: Docker 165 | 166 | If you prefer Docker despite performance limitations: 167 | 168 | - [Docker Setup Guide](docker.md) 169 | - Note: ~10x slower due to no GPU acceleration 170 | -------------------------------------------------------------------------------- /agent_cli/memory/_prompt.py: -------------------------------------------------------------------------------- 1 | """Centralized prompts for memory LLM calls.""" 2 | 3 | FACT_SYSTEM_PROMPT = """ 4 | You are a memory extractor. From the latest exchange, return 1-3 concise fact sentences based ONLY on user messages. 5 | 6 | Guidelines: 7 | - If there is no meaningful fact, return []. 8 | - Ignore assistant/system content completely. 9 | - Facts must be short, readable sentences (e.g., "The user's wife is Anne.", "Planning a trip to Japan next spring."). 10 | - Do not return acknowledgements, questions, or meta statements; only factual statements from the user. 11 | - NEVER output refusals like "I cannot..." or "I don't know..." or "I don't have that information". If you can't extract a fact, return []. 12 | - Return a JSON list of strings. 13 | 14 | Few-shots: 15 | - Input: User: "Hi." / Assistant: "Hello" -> [] 16 | - Input: User: "My wife is Anne." / Assistant: "Got it." -> ["The user's wife is Anne."] 17 | - Input: User: "I like biking on weekends." / Assistant: "Cool!" -> ["User likes biking on weekends."] 18 | """.strip() 19 | 20 | FACT_INSTRUCTIONS = """ 21 | Return only factual sentences grounded in the user text. No assistant acknowledgements or meta-text. 22 | """.strip() 23 | 24 | UPDATE_MEMORY_PROMPT = """You are a smart memory manager which controls the memory of a system. 25 | You can perform four operations: (1) ADD into the memory, (2) UPDATE the memory, (3) DELETE from the memory, and (4) NONE (no change). 26 | 27 | Compare new facts with existing memory. For each new fact, decide whether to: 28 | - ADD: Add it to the memory as a new element (new information not present in any existing memory) 29 | - UPDATE: Update an existing memory element (only if facts are about THE SAME TOPIC, e.g., both about pizza preferences) 30 | - DELETE: Delete an existing memory element (if new fact explicitly contradicts it) 31 | - NONE: Make no change (if fact is already present, a duplicate, or the existing memory is unrelated to new facts) 32 | 33 | **Guidelines:** 34 | 35 | 1. **ADD**: If the new fact contains new information not present in any existing memory, add it with a new ID. 36 | - Existing unrelated memories should have event "NONE". 37 | - **Example**: 38 | - Current memory: [{"id": 0, "text": "User is a software engineer"}] 39 | - New facts: ["Name is John"] 40 | - Output: [ 41 | {"id": 0, "text": "User is a software engineer", "event": "NONE"}, 42 | {"id": 1, "text": "Name is John", "event": "ADD"} 43 | ] 44 | 45 | 2. **UPDATE**: Only if the new fact refines/expands an existing memory about THE SAME TOPIC. 46 | - Keep the same ID, update the text. 47 | - Example: "User likes pizza" + "User loves pepperoni pizza" → UPDATE (same topic: pizza) 48 | - Example: "Met Sarah today" + "Went running" → NOT same topic, do NOT update! 49 | - **Example**: 50 | - Current memory: [{"id": 0, "text": "User likes pizza"}] 51 | - New facts: ["User loves pepperoni pizza"] 52 | - Output: [{"id": 0, "text": "User loves pepperoni pizza", "event": "UPDATE"}] 53 | 54 | 3. **DELETE**: If the new fact explicitly contradicts an existing memory. 55 | - **Example**: 56 | - Current memory: [{"id": 0, "text": "Loves pizza"}, {"id": 1, "text": "Name is John"}] 57 | - New facts: ["Hates pizza"] 58 | - Output: [ 59 | {"id": 0, "text": "Loves pizza", "event": "DELETE"}, 60 | {"id": 1, "text": "Name is John", "event": "NONE"}, 61 | {"id": 2, "text": "Hates pizza", "event": "ADD"} 62 | ] 63 | 64 | 4. **NONE**: If the new fact is already present or existing memory is unrelated to new facts. 65 | - **Example**: 66 | - Current memory: [{"id": 0, "text": "Name is John"}] 67 | - New facts: ["Name is John"] 68 | - Output: [{"id": 0, "text": "Name is John", "event": "NONE"}] 69 | 70 | 5. **IMPORTANT - Unrelated topics example**: 71 | - Current memory: [{"id": 0, "text": "Met Sarah to discuss quantum computing"}] 72 | - New facts: ["Went for a 5km run"] 73 | - These are COMPLETELY DIFFERENT topics (meeting vs running). Do NOT use UPDATE! 74 | - Output: [ 75 | {"id": 0, "text": "Met Sarah to discuss quantum computing", "event": "NONE"}, 76 | {"id": 1, "text": "Went for a 5km run", "event": "ADD"} 77 | ] 78 | 79 | **CRITICAL RULES:** 80 | - You MUST return ALL memories (existing + new) in your response. 81 | - Each existing memory MUST have an event (NONE, UPDATE, or DELETE). 82 | - Each genuinely NEW fact (not related to any existing memory) MUST be ADDed with a new ID. 83 | - Do NOT use UPDATE for unrelated topics! "Met Sarah" and "Went running" are DIFFERENT topics → use NONE for existing + ADD for new. 84 | 85 | Return ONLY a JSON list. No prose or code fences.""".strip() 86 | 87 | SUMMARY_PROMPT = """ 88 | You are a concise conversation summarizer. Update the running summary with the new facts. 89 | Keep it brief, factual, and focused on durable information; do not restate transient chit-chat. 90 | Prefer aggregating related facts into compact statements; drop redundancies. 91 | """.strip() 92 | --------------------------------------------------------------------------------