├── .env.example
├── agent_cli
    ├── py.typed
    ├── scripts
    ├── core
    │   ├── __init__.py
    │   ├── deps.py
    │   ├── watch.py
    │   ├── transcription_logger.py
    │   ├── sse.py
    │   ├── chroma.py
    │   ├── audio_format.py
    │   └── reranker.py
    ├── __main__.py
    ├── install
    │   ├── __init__.py
    │   ├── hotkeys.py
    │   ├── services.py
    │   └── common.py
    ├── __init__.py
    ├── agents
    │   ├── __init__.py
    │   ├── memory
    │   │   └── __init__.py
    │   ├── server.py
    │   └── _voice_agent_common.py
    ├── rag
    │   ├── __init__.py
    │   ├── _prompt.py
    │   ├── models.py
    │   ├── _store.py
    │   └── _indexer.py
    ├── constants.py
    ├── memory
    │   ├── __init__.py
    │   ├── entities.py
    │   ├── _streaming.py
    │   ├── _tasks.py
    │   ├── _filters.py
    │   ├── models.py
    │   ├── api.py
    │   └── _prompt.py
    ├── services
    │   ├── _wyoming_utils.py
    │   └── __init__.py
    └── cli.py
├── scripts
    ├── .runtime
    │   └── .gitkeep
    ├── __init__.py
    ├── macos-hotkeys
    │   ├── skhd-config-example
    │   ├── toggle-autocorrect.sh
    │   ├── toggle-transcription.sh
    │   ├── README.md
    │   └── toggle-voice-edit.sh
    ├── run-whisper-macos.sh
    ├── run-whisper.sh
    ├── run-openwakeword.sh
    ├── nvidia-asr-server
    │   ├── pyproject.toml
    │   ├── shell.nix
    │   └── README.md
    ├── run-piper.sh
    ├── run-whisper-linux.sh
    ├── linux-hotkeys
    │   ├── toggle-autocorrect.sh
    │   ├── toggle-voice-edit.sh
    │   ├── toggle-transcription.sh
    │   └── README.md
    ├── zellij_help.txt
    ├── setup-macos.sh
    ├── setup-macos-hotkeys.sh
    ├── setup-linux-hotkeys.sh
    ├── start-all-services.sh
    └── setup-linux.sh
├── tests
    ├── memory
    │   ├── __init__.py
    │   ├── test_api_health.py
    │   ├── test_files.py
    │   ├── test_indexer.py
    │   ├── test_utils.py
    │   └── test_proxy_passthrough.py
    ├── core
    │   ├── __init__.py
    │   ├── test_chroma.py
    │   ├── test_watch.py
    │   ├── test_sse.py
    │   └── test_audio_format.py
    ├── rag
    │   ├── __init__.py
    │   ├── test_store.py
    │   ├── test_retriever.py
    │   ├── test_history.py
    │   ├── test_rag_proxy_passthrough.py
    │   └── test_indexer.py
    ├── __init__.py
    ├── agents
    │   ├── __init__.py
    │   ├── test_tts_common_extra.py
    │   ├── test_wake_word_assistant.py
    │   ├── test_speak_e2e.py
    │   ├── test_transcribe_agent.py
    │   ├── test_voice_edit.py
    │   └── test_transcribe_e2e.py
    ├── mocks
    │   ├── __init__.py
    │   ├── llm.py
    │   ├── audio.py
    │   └── wyoming.py
    ├── test_env_vars.py
    ├── test_llm_gemini.py
    ├── test_tools.py
    ├── test_cli.py
    ├── test_wyoming_utils.py
    ├── conftest.py
    └── test_asr.py
├── example.agent-cli-config.toml
├── .github
    ├── release-drafter.yml
    ├── workflows
    │   ├── toc.yaml
    │   ├── release-drafter.yml
    │   ├── release.yml
    │   ├── automerge.yml
    │   ├── pytest.yml
    │   └── update-readme.yml
    └── renovate.json
├── docker
    ├── Dockerfile
    └── docker-compose.yml
├── shell.nix
├── .pre-commit-config.yaml
├── .gitignore
├── LICENSE
├── .cursorrules
├── docs
    └── installation
    │   ├── README.md
    │   ├── docker.md
    │   ├── windows.md
    │   └── macos.md
└── pyproject.toml


/.env.example:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_cli/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_cli/scripts:
--------------------------------------------------------------------------------
1 | ../scripts


--------------------------------------------------------------------------------
/scripts/.runtime/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/memory/__init__.py:
--------------------------------------------------------------------------------
1 | """Memory tests."""
2 | 


--------------------------------------------------------------------------------
/tests/core/__init__.py:
--------------------------------------------------------------------------------
1 | """Core helper tests."""
2 | 


--------------------------------------------------------------------------------
/tests/rag/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for RAG module."""
2 | 


--------------------------------------------------------------------------------
/example.agent-cli-config.toml:
--------------------------------------------------------------------------------
1 | agent_cli/example-config.toml


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for the agent-cli package."""
2 | 


--------------------------------------------------------------------------------
/tests/agents/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for the agents package."""
2 | 


--------------------------------------------------------------------------------
/agent_cli/core/__init__.py:
--------------------------------------------------------------------------------
1 | """Core functionalities for the agent CLI."""
2 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
1 | template: |
2 |   ## What’s Changed
3 | 
4 |   $CHANGES
5 | 


--------------------------------------------------------------------------------
/tests/mocks/__init__.py:
--------------------------------------------------------------------------------
1 | """Test mocks package."""
2 | 
3 | from __future__ import annotations
4 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | """Scripts package containing installation and service management scripts."""
2 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ollama/ollama
2 | 
3 | COPY examples/run_ollama.sh /usr/local/bin/run_ollama.sh
4 | 
5 | ENTRYPOINT ["run_ollama.sh"]
6 | 


--------------------------------------------------------------------------------
/agent_cli/__main__.py:
--------------------------------------------------------------------------------
1 | """Entry point for the Agent CLI."""
2 | 
3 | from agent_cli.cli import app
4 | 
5 | if __name__ == "__main__":
6 |     app()
7 | 


--------------------------------------------------------------------------------
/agent_cli/install/__init__.py:
--------------------------------------------------------------------------------
1 | """Installation commands for agent-cli."""
2 | 
3 | from __future__ import annotations
4 | 
5 | __all__ = ["hotkeys", "services"]
6 | 


--------------------------------------------------------------------------------
/agent_cli/__init__.py:
--------------------------------------------------------------------------------
1 | """A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance."""
2 | 
3 | from importlib.metadata import version
4 | 
5 | __version__ = version("agent-cli")
6 | 


--------------------------------------------------------------------------------
/scripts/macos-hotkeys/skhd-config-example:
--------------------------------------------------------------------------------
1 | # Simple skhd configuration for transcription toggle
2 | # Save to ~/.config/skhd/skhdrc
3 | 
4 | # Press Cmd+Shift+R to toggle transcription
5 | cmd + shift - r : /path/to/toggle-transcription.sh
6 | 


--------------------------------------------------------------------------------
/scripts/run-whisper-macos.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # macOS: MLX-based Whisper for Apple Silicon
3 | echo "🎤 Starting Wyoming MLX Whisper on port 10300..."
4 | echo "🍎 Using MLX for Apple Silicon optimization"
5 | 
6 | uvx --python 3.12 wyoming-mlx-whisper
7 | 


--------------------------------------------------------------------------------
/.github/workflows/toc.yaml:
--------------------------------------------------------------------------------
 1 | on: push
 2 | name: TOC Generator
 3 | jobs:
 4 |   generateTOC:
 5 |     name: TOC Generator
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |       - uses: technote-space/toc-generator@v4
 9 |         with:
10 |           TOC_TITLE: ""
11 | 


--------------------------------------------------------------------------------
/scripts/run-whisper.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Wrapper that calls the platform-specific whisper script
 3 | SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 4 | 
 5 | if [ "$(uname -s)" = "Darwin" ]; then
 6 |     exec "$SCRIPT_DIR/run-whisper-macos.sh"
 7 | else
 8 |     exec "$SCRIPT_DIR/run-whisper-linux.sh"
 9 | fi
10 | 


--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   update_release_draft:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: release-drafter/release-drafter@v6
13 |         env:
14 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
15 | 


--------------------------------------------------------------------------------
/shell.nix:
--------------------------------------------------------------------------------
 1 | # nix-direnv file
 2 | { pkgs ? import <nixpkgs> {}}:
 3 | 
 4 | pkgs.mkShell {
 5 |   packages = [
 6 |     pkgs.portaudio
 7 |     pkgs.ffmpeg
 8 |     pkgs.pkg-config
 9 |     pkgs.gcc
10 |     pkgs.python3
11 |   ];
12 | 
13 |   shellHook = ''
14 |     export LD_LIBRARY_PATH=${pkgs.lib.makeLibraryPath [ pkgs.portaudio ]}:$LD_LIBRARY_PATH
15 |   '';
16 | }
17 | 


--------------------------------------------------------------------------------
/agent_cli/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | """Agent implementations for the Agent CLI."""
 2 | 
 3 | from . import (
 4 |     assistant,
 5 |     autocorrect,
 6 |     chat,
 7 |     memory,
 8 |     rag_proxy,
 9 |     server,
10 |     speak,
11 |     transcribe,
12 |     voice_edit,
13 | )
14 | 
15 | __all__ = [
16 |     "assistant",
17 |     "autocorrect",
18 |     "chat",
19 |     "memory",
20 |     "rag_proxy",
21 |     "server",
22 |     "speak",
23 |     "transcribe",
24 |     "voice_edit",
25 | ]
26 | 


--------------------------------------------------------------------------------
/scripts/run-openwakeword.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | echo "👂 Starting Wyoming OpenWakeWord on port 10400..."
 3 | 
 4 | # Use the LiteRT fork until the PR is merged
 5 | # PR: https://github.com/rhasspy/wyoming-openwakeword/pull/XXX
 6 | # This version works on macOS and other platforms without tflite-runtime
 7 | 
 8 | uvx --python 3.12 --from git+https://github.com/basnijholt/wyoming-openwakeword.git@litert \
 9 |     wyoming-openwakeword \
10 |     --uri 'tcp://0.0.0.0:10400' \
11 |     --preload-model 'ok_nabu'
12 | 


--------------------------------------------------------------------------------
/scripts/macos-hotkeys/toggle-autocorrect.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Toggle script for agent-cli autocorrect on macOS
 4 | 
 5 | /opt/homebrew/bin/terminal-notifier -title "📝 Autocorrect" -message "Processing clipboard text..."
 6 | 
 7 | OUTPUT=$("$HOME/.local/bin/agent-cli" autocorrect --quiet 2>/dev/null)
 8 | if [ -n "$OUTPUT" ]; then
 9 |     /opt/homebrew/bin/terminal-notifier -title "✅ Corrected" -message "$OUTPUT"
10 | else
11 |     /opt/homebrew/bin/terminal-notifier -title "❌ Error" -message "No text to correct"
12 | fi
13 | 


--------------------------------------------------------------------------------
/agent_cli/agents/memory/__init__.py:
--------------------------------------------------------------------------------
 1 | """Memory system CLI commands."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import typer
 6 | 
 7 | from agent_cli.cli import app
 8 | 
 9 | memory_app = typer.Typer(
10 |     name="memory",
11 |     help="Memory system operations (add, proxy, etc.).",
12 |     rich_markup_mode="markdown",
13 |     no_args_is_help=True,
14 | )
15 | 
16 | app.add_typer(memory_app, name="memory")
17 | 
18 | # Import subcommands to register them with memory_app
19 | from agent_cli.agents.memory import add, proxy  # noqa: E402
20 | 
21 | __all__ = ["add", "memory_app", "proxy"]
22 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   deploy:
 9 |     runs-on: ubuntu-latest
10 |     environment:
11 |       name: pypi
12 |       url: https://pypi.org/p/${{ github.repository }}
13 |     permissions:
14 |       id-token: write
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - name: Install uv
18 |         uses: astral-sh/setup-uv@v6
19 |       - name: Build
20 |         run: uv build
21 |       - name: Publish package distributions to PyPI
22 |         uses: pypa/gh-action-pypi-publish@release/v1
23 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v6.0.0
 4 |     hooks:
 5 |       - id: check-added-large-files
 6 |         exclude: uv\.lock
 7 |       - id: trailing-whitespace
 8 |       - id: end-of-file-fixer
 9 |       - id: mixed-line-ending
10 |   - repo: https://github.com/astral-sh/ruff-pre-commit
11 |     rev: "v0.14.9"
12 |     hooks:
13 |       - id: ruff
14 |         args: ["--fix"]
15 |       - id: ruff-format
16 |   - repo: https://github.com/pre-commit/mirrors-mypy
17 |     rev: "v1.19.1"
18 |     hooks:
19 |       - id: mypy
20 |         additional_dependencies: ["types-PyYAML"]
21 | 


--------------------------------------------------------------------------------
/agent_cli/rag/__init__.py:
--------------------------------------------------------------------------------
 1 | """RAG module."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from agent_cli.core.deps import ensure_optional_dependencies
 6 | 
 7 | _REQUIRED_DEPS = {
 8 |     "chromadb": "chromadb",
 9 |     "watchfiles": "watchfiles",
10 |     "markitdown": "markitdown",
11 |     "fastapi": "fastapi",
12 |     "uvicorn": "uvicorn",
13 |     "onnxruntime": "onnxruntime",
14 |     "huggingface_hub": "huggingface-hub",
15 |     "transformers": "transformers",
16 | }
17 | 
18 | ensure_optional_dependencies(
19 |     _REQUIRED_DEPS,
20 |     extra_name="rag",
21 |     install_hint="`pip install agent-cli[rag]` or `uv sync --extra rag`",
22 | )
23 | 


--------------------------------------------------------------------------------
/agent_cli/constants.py:
--------------------------------------------------------------------------------
 1 | """Default configuration settings for the Agent CLI package."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | # --- Audio Configuration ---
 6 | AUDIO_FORMAT_STR = "int16"  # sounddevice/numpy format
 7 | AUDIO_FORMAT_WIDTH = 2  # 2 bytes (16-bit)
 8 | AUDIO_CHANNELS = 1
 9 | AUDIO_RATE = 16000
10 | AUDIO_CHUNK_SIZE = 1024
11 | 
12 | # Standard Wyoming audio configuration
13 | WYOMING_AUDIO_CONFIG = {
14 |     "rate": AUDIO_RATE,
15 |     "width": AUDIO_FORMAT_WIDTH,
16 |     "channels": AUDIO_CHANNELS,
17 | }
18 | 
19 | # --- HTTP Defaults ---
20 | DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1"
21 | DEFAULT_OPENAI_MODEL = "gpt-5-mini"
22 | DEFAULT_OPENAI_EMBEDDING_MODEL = "text-embedding-3-small"
23 | 


--------------------------------------------------------------------------------
/agent_cli/memory/__init__.py:
--------------------------------------------------------------------------------
 1 | """Memory module for long-term chat history."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from agent_cli.core.deps import ensure_optional_dependencies
 6 | 
 7 | _REQUIRED_DEPS = {
 8 |     "chromadb": "chromadb",
 9 |     "fastapi": "fastapi",
10 |     "uvicorn": "uvicorn",
11 |     "onnxruntime": "onnxruntime",
12 |     "huggingface_hub": "huggingface-hub",
13 |     "transformers": "transformers",
14 | }
15 | 
16 | ensure_optional_dependencies(
17 |     _REQUIRED_DEPS,
18 |     extra_name="memory",
19 |     install_hint="`pip install agent-cli[memory]` or `uv sync --extra memory`",
20 | )
21 | 
22 | from agent_cli.memory.client import MemoryClient  # noqa: E402
23 | 
24 | __all__ = ["MemoryClient"]
25 | 


--------------------------------------------------------------------------------
/scripts/nvidia-asr-server/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "nvidia-asr-server"
 3 | version = "1.0.0"
 4 | description = "NVIDIA ASR server with OpenAI-compatible API"
 5 | readme = "README.md"
 6 | requires-python = ">=3.13"
 7 | dependencies = [
 8 |     "fastapi[standard]>=0.115.0",
 9 |     "torch>=2.5.0",
10 |     "soundfile>=0.12.1",
11 |     "sacrebleu>=2.4.0",
12 |     "typer>=0.9.0",
13 |     "nemo-toolkit[asr,tts] @ git+https://github.com/NVIDIA/NeMo.git",
14 | ]
15 | 
16 | [tool.uv.sources]
17 | torch = [{ index = "pytorch-cu124" }]
18 | 
19 | [[tool.uv.index]]
20 | name = "pytorch-cu124"
21 | url = "https://download.pytorch.org/whl/cu124"
22 | explicit = true
23 | 
24 | [tool.uv]
25 | override-dependencies = [
26 |     "ml-dtypes>=0.5.0",
27 | ]
28 | 


--------------------------------------------------------------------------------
/agent_cli/core/deps.py:
--------------------------------------------------------------------------------
 1 | """Helpers for optional dependency checks."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from importlib.util import find_spec
 6 | 
 7 | 
 8 | def ensure_optional_dependencies(
 9 |     required: dict[str, str],
10 |     *,
11 |     extra_name: str,
12 |     install_hint: str | None = None,
13 | ) -> None:
14 |     """Ensure optional dependencies are present, otherwise raise ImportError."""
15 |     missing = [
16 |         pkg_name for module_name, pkg_name in required.items() if find_spec(module_name) is None
17 |     ]
18 |     if not missing:
19 |         return
20 | 
21 |     hint = install_hint or f"`pip install agent-cli[{extra_name}]`"
22 |     msg = f"Missing required dependencies for {extra_name}: {', '.join(missing)}. Please install with {hint}."
23 |     raise ImportError(msg)
24 | 


--------------------------------------------------------------------------------
/.github/workflows/automerge.yml:
--------------------------------------------------------------------------------
 1 | name: Auto Merge
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [opened, synchronize, reopened, labeled, unlabeled]
 6 | 
 7 | jobs:
 8 |   auto-merge:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: Label PR
12 |         if: github.event.pull_request.title == '[pre-commit.ci] pre-commit autoupdate'
13 |         run: |
14 |           curl -X POST \
15 |           -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
16 |           -H "Accept: application/vnd.github.v3+json" \
17 |           https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/labels \
18 |           -d '{"labels":["automerge"]}'
19 |       - name: Automatic Merge
20 |         uses: pascalgn/automerge-action@v0.16.4
21 |         env:
22 |           GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
23 | 


--------------------------------------------------------------------------------
/.github/renovate.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://docs.renovatebot.com/renovate-schema.json",
 3 |     "rebaseWhen": "behind-base-branch",
 4 |     "dependencyDashboard": true,
 5 |     "labels": [
 6 |         "dependencies",
 7 |         "no-stale"
 8 |     ],
 9 |     "commitMessagePrefix": "⬆️",
10 |     "commitMessageTopic": "{{depName}}",
11 |     "prBodyDefinitions": {
12 |         "Release": "yes"
13 |     },
14 |     "packageRules": [
15 |         {
16 |             "matchManagers": [
17 |                 "github-actions"
18 |             ],
19 |             "addLabels": [
20 |                 "github_actions"
21 |             ],
22 |             "rangeStrategy": "pin"
23 |         },
24 |         {
25 |             "matchManagers": [
26 |                 "github-actions"
27 |             ],
28 |             "matchUpdateTypes": [
29 |                 "minor",
30 |                 "patch"
31 |             ],
32 |             "automerge": true
33 |         }
34 |     ]
35 | }
36 | 


--------------------------------------------------------------------------------
/scripts/run-piper.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | echo "🔊 Starting Wyoming Piper on port 10200..."
 3 | 
 4 | # Create .runtime directory for local assets
 5 | SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 6 | mkdir -p "$SCRIPT_DIR/.runtime"
 7 | 
 8 | # Download voice if not present using uvx
 9 | if [ ! -d "$SCRIPT_DIR/.runtime/piper-data/en_US-lessac-medium" ]; then
10 |     echo "⬇️ Downloading voice model..."
11 |     mkdir -p "$SCRIPT_DIR/.runtime/piper-data"
12 |     cd "$SCRIPT_DIR/.runtime/piper-data"
13 |     uvx --python 3.12 --from piper-tts python -m piper.download_voices en_US-lessac-medium
14 |     cd "$SCRIPT_DIR"
15 | fi
16 | 
17 | # Run Wyoming Piper using uvx wrapper
18 | uvx --python 3.12 \
19 |     --from git+https://github.com/rhasspy/wyoming-piper.git@v2.1.1 \
20 |     wyoming-piper \
21 |     --voice en_US-lessac-medium \
22 |     --uri 'tcp://0.0.0.0:10200' \
23 |     --data-dir "$SCRIPT_DIR/.runtime/piper-data" \
24 |     --download-dir "$SCRIPT_DIR/.runtime/piper-data"
25 | 


--------------------------------------------------------------------------------
/agent_cli/rag/_prompt.py:
--------------------------------------------------------------------------------
 1 | """Centralized prompts for RAG LLM calls."""
 2 | 
 3 | RAG_PROMPT_WITH_TOOLS = """
 4 | ## Retrieved Documentation
 5 | The following was automatically retrieved based on the user's query:
 6 | 
 7 | <retrieved_documents>
 8 | {context}
 9 | </retrieved_documents>
10 | 
11 | ## RAG Instructions
12 | - Use the retrieved context ONLY if it's relevant to the question
13 | - If the context is irrelevant, ignore it and answer based on your knowledge
14 | - When using context, cite sources: [Source: filename]
15 | - If snippets are insufficient, call read_full_document(file_path) to get full content
16 | """.strip()
17 | 
18 | RAG_PROMPT_NO_TOOLS = """
19 | ## Retrieved Documentation
20 | The following was automatically retrieved based on the user's query:
21 | 
22 | <retrieved_documents>
23 | {context}
24 | </retrieved_documents>
25 | 
26 | ## RAG Instructions
27 | - Use the retrieved context ONLY if it's relevant to the question
28 | - If the context is irrelevant, ignore it and answer based on your knowledge
29 | - When using context, cite sources: [Source: filename]
30 | """.strip()
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Environment variables
 2 | .env
 3 | 
 4 | # Python
 5 | __pycache__/
 6 | *.py[cod]
 7 | *$py.class
 8 | *.so
 9 | .Python
10 | build/
11 | develop-eggs/
12 | dist/
13 | downloads/
14 | eggs/
15 | .eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | wheels/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # Audio files
27 | *.wav
28 | *.mp3
29 | 
30 | # Virtual Environment
31 | venv/
32 | env/
33 | .direnv/
34 | ENV/
35 | 
36 | # IDE
37 | .idea/
38 | .vscode/
39 | *.swp
40 | *.swo
41 | 
42 | # Worktrees directory
43 | worktrees/
44 | 
45 | # Submodule
46 | cli-submodule/
47 | 
48 | # other
49 | .coverage*
50 | coverage.*
51 | .envrc
52 | .ruff_cache/
53 | .mypy_cache/
54 | .pytest_cache/
55 | .vscode/
56 | .vscode/
57 | 
58 | # Examples and scripts - exclude downloaded models and data
59 | examples/ollama/models/
60 | examples/piper-data/
61 | examples/whisper-data/
62 | examples/ollama/id_ed25519*
63 | scripts/.runtime/
64 | *.onnx
65 | *.onnx.json
66 | *.bin
67 | 
68 | # RAG/memory
69 | rag_proxy_*.log
70 | transcription_log.json
71 | temp_rag_*/
72 | memory_db
73 | rag_db/
74 | *.log
75 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Bas Nijholt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/scripts/nvidia-asr-server/shell.nix:
--------------------------------------------------------------------------------
 1 | { pkgs ? import <nixpkgs> { config.allowUnfree = true; } }:
 2 | 
 3 | pkgs.mkShell {
 4 |   buildInputs = with pkgs; [
 5 |     # Python and uv
 6 |     python313
 7 |     uv
 8 | 
 9 |     # Audio libraries
10 |     ffmpeg
11 |   ];
12 | 
13 |   shellHook = ''
14 |     # Set up CUDA environment (use system NVIDIA drivers and CUDA libraries)
15 |     export LD_LIBRARY_PATH=/run/opengl-driver/lib:/run/current-system/sw/lib:$LD_LIBRARY_PATH
16 | 
17 |     # Tell triton where to find libcuda.so (avoids calling /sbin/ldconfig)
18 |     export TRITON_LIBCUDA_PATH=/run/opengl-driver/lib
19 | 
20 |     # PyTorch memory management - avoid fragmentation
21 |     export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
22 | 
23 |     # Canary server defaults
24 |     export CANARY_PORT=9898
25 |     # CANARY_DEVICE auto-detects GPU with most free memory (override if needed)
26 | 
27 |     echo "CUDA environment configured (using system NVIDIA drivers)"
28 |     echo "TRITON_LIBCUDA_PATH: $TRITON_LIBCUDA_PATH"
29 |     echo "PYTORCH_CUDA_ALLOC_CONF: $PYTORCH_CUDA_ALLOC_CONF"
30 |     echo "Run 'uv run server.py' to start the server"
31 |   '';
32 | }
33 | 


--------------------------------------------------------------------------------
/.cursorrules:
--------------------------------------------------------------------------------
 1 | # Development Rules
 2 | 
 3 | ## Core Rules
 4 | - Always start by exploring the context of this package: list the existing files and read the full `README.md`.
 5 | - Always use `uv sync --all-extras` to install packages then activate the virtual environment with `source .venv/bin/activate`
 6 | - Commit frequently but always make sure tests pass first, using `pytest`
 7 | - When working on a feature, check out `git diff origin/main | cat`. Make sure to use --no-pager, or pipe the output to `cat`.
 8 | - Prefer functional style Python instead of classes with inheritance
 9 | - Keep it DRY - reuse code as much as possible
10 | - Always run pre-commit hooks before committing
11 | - Implement the simplest solution possible and don't generalize when not yet needed
12 | - Only implement the feature that is asked for, not anything extra
13 | - NEVER run `git add .` - only use `git add filename` to avoid committing unrelated files
14 | - NEVER claim that you are done with a task without running pytest.
15 | - The linter might flag issues in pyproject.toml but ignore these because it is incorrect!
16 | - DO NOT MANUALLY update the CLI help messages in the `README.md`. These are automatically generated.
17 | 


--------------------------------------------------------------------------------
/tests/core/test_chroma.py:
--------------------------------------------------------------------------------
 1 | """Tests for core Chroma helpers."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Any
 6 | 
 7 | from pydantic import BaseModel
 8 | 
 9 | from agent_cli.core import chroma
10 | 
11 | 
12 | class _Meta(BaseModel):
13 |     source: str
14 |     tags: list[str]
15 |     score: float | None = None
16 | 
17 | 
18 | class _FakeCollection:
19 |     def __init__(self) -> None:
20 |         self.calls: list[tuple[list[str], list[str], list[dict[str, Any]]]] = []
21 | 
22 |     def upsert(self, ids: list[str], documents: list[str], metadatas: list[dict[str, Any]]) -> None:
23 |         self.calls.append((ids, documents, metadatas))
24 | 
25 | 
26 | def test_flatten_and_upsert_uses_base_models() -> None:
27 |     """Ensure metadata serialization accepts BaseModel and preserves lists."""
28 |     m = _Meta(source="doc", tags=["a", "b"])
29 |     collection = _FakeCollection()
30 | 
31 |     chroma.upsert(collection, ids=["1"], documents=["text"], metadatas=[m])
32 | 
33 |     assert collection.calls
34 |     ids, docs, metas = collection.calls[0]
35 |     assert ids == ["1"]
36 |     assert docs == ["text"]
37 |     assert metas == [{"source": "doc", "tags": ["a", "b"]}]
38 | 


--------------------------------------------------------------------------------
/tests/rag/test_store.py:
--------------------------------------------------------------------------------
 1 | """Tests for RAG store."""
 2 | 
 3 | from pathlib import Path
 4 | from unittest.mock import MagicMock, patch
 5 | 
 6 | from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL
 7 | from agent_cli.core import chroma
 8 | from agent_cli.rag import _store
 9 | 
10 | 
11 | def test_init_collection(tmp_path: Path) -> None:
12 |     """Test collection initialization."""
13 |     with (
14 |         patch("chromadb.PersistentClient") as mock_client,
15 |         patch("agent_cli.core.chroma.embedding_functions.OpenAIEmbeddingFunction") as mock_openai,
16 |     ):
17 |         chroma.init_collection(
18 |             tmp_path,
19 |             name="docs",
20 |             embedding_model=DEFAULT_OPENAI_EMBEDDING_MODEL,
21 |         )
22 | 
23 |         mock_client.assert_called_once()
24 |         mock_openai.assert_called_once()
25 |         mock_client.return_value.get_or_create_collection.assert_called_once()
26 | 
27 | 
28 | def test_delete_by_file_path() -> None:
29 |     """Test deleting by file path."""
30 |     mock_collection = MagicMock()
31 |     _store.delete_by_file_path(mock_collection, "path/to/file")
32 |     mock_collection.delete.assert_called_with(where={"file_path": "path/to/file"})
33 | 


--------------------------------------------------------------------------------
/.github/workflows/pytest.yml:
--------------------------------------------------------------------------------
 1 | name: pytest
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main]
 7 | 
 8 | env:
 9 |   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
10 | 
11 | jobs:
12 |   test:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         os: [ubuntu-latest, macos-latest, windows-latest]
18 |         python-version: ["3.11", "3.13"]
19 | 
20 |     steps:
21 |       - uses: actions/checkout@v4
22 |       - name: Set up Python ${{ matrix.python-version }}
23 |         uses: actions/setup-python@v5
24 |         with:
25 |           python-version: ${{ matrix.python-version }}
26 |       - name: Install portaudio (Linux only)
27 |         if: matrix.os == 'ubuntu-latest'
28 |         run: sudo apt-get update && sudo apt-get install -y portaudio19-dev
29 |       - name: Install uv
30 |         uses: astral-sh/setup-uv@v6
31 |       - name: Run pytest
32 |         run: uv run --all-extras pytest -vvv
33 |       - name: Upload coverage reports to Codecov
34 |         if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
35 |         uses: codecov/codecov-action@v5
36 |         env:
37 |           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
38 | 


--------------------------------------------------------------------------------
/tests/test_env_vars.py:
--------------------------------------------------------------------------------
 1 | """Test that environment variables are correctly mapped to CLI options."""
 2 | 
 3 | import os
 4 | import re
 5 | from unittest import mock
 6 | 
 7 | from typer.testing import CliRunner
 8 | 
 9 | from agent_cli.cli import app
10 | 
11 | runner = CliRunner(env={"NO_COLOR": "1", "TERM": "dumb"})
12 | 
13 | 
14 | def test_openai_base_url_env_var() -> None:
15 |     """Test that OPENAI_BASE_URL environment variable sets the openai_base_url option."""
16 |     env_vars = {"OPENAI_BASE_URL": "http://test"}
17 | 
18 |     with (
19 |         mock.patch.dict(os.environ, env_vars),
20 |         mock.patch("agent_cli.agents.autocorrect._async_autocorrect"),
21 |     ):
22 |         # We use --print-args to see what the CLI parsed.
23 |         # We need to provide a dummy text argument so it doesn't try to read clipboard if it's empty/fails.
24 |         result = runner.invoke(app, ["autocorrect", "--print-args", "dummy text"])
25 | 
26 |     assert result.exit_code == 0
27 |     # Strip ANSI codes
28 |     clean_output = re.sub(r"\x1b\[[0-9;]*m", "", result.stdout)
29 | 
30 |     # Check if openai_base_url matches the env var
31 |     assert "openai_base_url" in clean_output
32 |     assert "http://test" in clean_output
33 | 


--------------------------------------------------------------------------------
/tests/test_llm_gemini.py:
--------------------------------------------------------------------------------
 1 | """Tests for the Gemini LLM provider."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import pytest
 6 | 
 7 | from agent_cli import config
 8 | from agent_cli.constants import DEFAULT_OPENAI_MODEL
 9 | from agent_cli.services.llm import create_llm_agent
10 | 
11 | 
12 | @pytest.mark.asyncio
13 | async def test_create_llm_agent_with_gemini() -> None:
14 |     """Test that the create_llm_agent function can build an agent with the Gemini provider."""
15 |     provider_cfg = config.ProviderSelection(
16 |         llm_provider="gemini",
17 |         asr_provider="wyoming",
18 |         tts_provider="wyoming",
19 |     )
20 |     gemini_cfg = config.GeminiLLM(
21 |         llm_gemini_model="gemini-1.5-flash",
22 |         gemini_api_key="test-key",
23 |     )
24 |     ollama_cfg = config.Ollama(
25 |         llm_ollama_model="gemma3:4b",
26 |         llm_ollama_host="http://localhost:11434",
27 |     )
28 |     openai_cfg = config.OpenAILLM(
29 |         llm_openai_model=DEFAULT_OPENAI_MODEL,
30 |         openai_api_key="test-key",
31 |     )
32 | 
33 |     agent = create_llm_agent(
34 |         provider_cfg=provider_cfg,
35 |         ollama_cfg=ollama_cfg,
36 |         openai_cfg=openai_cfg,
37 |         gemini_cfg=gemini_cfg,
38 |     )
39 |     assert agent is not None
40 | 


--------------------------------------------------------------------------------
/agent_cli/rag/models.py:
--------------------------------------------------------------------------------
 1 | """RAG data models."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pydantic import BaseModel, ConfigDict
 6 | 
 7 | 
 8 | class Message(BaseModel):
 9 |     """Chat message model."""
10 | 
11 |     role: str
12 |     content: str
13 | 
14 | 
15 | class ChatRequest(BaseModel):
16 |     """Chat completion request model."""
17 | 
18 |     model_config = ConfigDict(extra="allow")
19 | 
20 |     model: str
21 |     messages: list[Message]
22 |     temperature: float | None = 0.7
23 |     max_tokens: int | None = 1000
24 |     stream: bool | None = False
25 |     rag_top_k: int | None = None
26 |     rag_enable_tools: bool | None = True
27 | 
28 | 
29 | class DocMetadata(BaseModel):
30 |     """Metadata for an indexed document chunk."""
31 | 
32 |     source: str
33 |     file_path: str
34 |     file_type: str
35 |     chunk_id: int
36 |     total_chunks: int
37 |     indexed_at: str
38 |     file_hash: str
39 |     file_mtime: float
40 | 
41 | 
42 | class RagSource(BaseModel):
43 |     """Source information for RAG response."""
44 | 
45 |     source: str
46 |     path: str
47 |     chunk_id: int
48 |     score: float
49 | 
50 | 
51 | class RetrievalResult(BaseModel):
52 |     """Result of a RAG retrieval operation."""
53 | 
54 |     context: str
55 |     sources: list[RagSource]
56 | 


--------------------------------------------------------------------------------
/tests/test_tools.py:
--------------------------------------------------------------------------------
 1 | """Tests for the tools."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING
 6 | from unittest.mock import patch
 7 | 
 8 | from agent_cli._tools import execute_code, read_file
 9 | 
10 | if TYPE_CHECKING:
11 |     from pathlib import Path
12 | 
13 | 
14 | def test_read_file_tool(tmp_path: Path) -> None:
15 |     """Test the ReadFileTool."""
16 |     # 1. Test reading a file that exists
17 |     file = tmp_path / "test.txt"
18 |     file.write_text("hello")
19 |     assert read_file(path=str(file)) == "hello"
20 | 
21 |     # 2. Test reading a file that does not exist
22 |     assert "Error: File not found" in read_file(path="non_existent_file.txt")
23 | 
24 |     # 3. Test OSError
25 |     with patch("pathlib.Path.read_text", side_effect=OSError("Test error")):
26 |         assert "Error reading file" in read_file(path=str(file))
27 | 
28 | 
29 | def test_execute_code_tool() -> None:
30 |     """Test the ExecuteCodeTool."""
31 |     # 1. Test a simple command
32 |     assert execute_code(code="echo hello").strip() == "hello"
33 | 
34 |     # 2. Test a command that fails
35 |     assert "Error: Command not found" in execute_code(code="non_existent_command")
36 | 
37 |     # 3. Test a command that returns a non-zero exit code
38 |     assert "Error executing code" in execute_code(code="ls non_existent_file")
39 | 


--------------------------------------------------------------------------------
/scripts/macos-hotkeys/toggle-transcription.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Toggle script for agent-cli transcription on macOS
 4 | 
 5 | NOTIFIER=${NOTIFIER:-/opt/homebrew/bin/terminal-notifier}
 6 | RECORDING_GROUP="agent-cli-transcribe-recording"
 7 | TEMP_PREFIX="agent-cli-transcribe-temp"
 8 | 
 9 | notify_temp() {
10 |     local title=$1
11 |     local message=$2
12 |     local duration=${3:-4}  # 4 seconds default
13 |     local group="${TEMP_PREFIX}-${RANDOM}-$$"
14 | 
15 |     "$NOTIFIER" -title "$title" -message "$message" -group "$group"
16 |     (
17 |         sleep "$duration"
18 |         "$NOTIFIER" -remove "$group" >/dev/null 2>&1 || true
19 |     ) &
20 | }
21 | 
22 | if pgrep -f "agent-cli transcribe" > /dev/null; then
23 |     pkill -INT -f "agent-cli transcribe"
24 |     "$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true
25 |     notify_temp "🛑 Stopped" "Processing results..."
26 | else
27 |     "$NOTIFIER" -title "🎙️ Started" -message "Listening..." -group "$RECORDING_GROUP"
28 |     (
29 |         OUTPUT=$("$HOME/.local/bin/agent-cli" transcribe --llm --quiet 2>/dev/null)
30 |         "$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true
31 |         if [ -n "$OUTPUT" ]; then
32 |             notify_temp "📄 Result" "$OUTPUT"
33 |         else
34 |             notify_temp "❌ Error" "No output"
35 |         fi
36 |     ) &
37 | fi
38 | 


--------------------------------------------------------------------------------
/scripts/macos-hotkeys/README.md:
--------------------------------------------------------------------------------
 1 | # macOS Hotkeys
 2 | 
 3 | System-wide hotkeys for agent-cli voice AI features on macOS.
 4 | 
 5 | ## Setup
 6 | 
 7 | ```bash
 8 | ./setup-macos-hotkeys.sh
 9 | ```
10 | 
11 | ## Usage
12 | 
13 | - **`Cmd+Shift+R`** → Toggle voice transcription (start/stop with result)
14 | - **`Cmd+Shift+A`** → Autocorrect clipboard text
15 | - **`Cmd+Shift+V`** → Toggle voice edit mode for clipboard
16 | 
17 | Results appear in notifications and clipboard.
18 | 
19 | > **Tip:** For a persistent “Listening…” indicator, open System Settings → Notifications → *terminal-notifier* and set the Alert style to **Persistent** (or choose **Alerts** on older macOS versions).
20 | > Also enable "Allow notification when mirroring or sharing the display".
21 | > The scripts keep that alert pinned while dismissing status/result notifications automatically.
22 | 
23 | ## What it installs
24 | 
25 | - **skhd**: Hotkey manager
26 | - **terminal-notifier**: Notifications
27 | - **Configuration**: Automatic setup
28 | 
29 | ## Troubleshooting
30 | 
31 | **Hotkey not working?**
32 | - Grant accessibility permissions in System Settings
33 | 
34 | **No notifications?**
35 | ```bash
36 | terminal-notifier -title "Test" -message "Hello"
37 | ```
38 | 
39 | **Services not running?**
40 | ```bash
41 | ./start-all-services.sh
42 | ```
43 | 
44 | That's it! System-wide hotkeys for agent-cli on macOS.
45 | 


--------------------------------------------------------------------------------
/scripts/macos-hotkeys/toggle-voice-edit.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Toggle script for agent-cli voice-edit on macOS
 4 | 
 5 | NOTIFIER=${NOTIFIER:-/opt/homebrew/bin/terminal-notifier}
 6 | RECORDING_GROUP="agent-cli-voice-edit-recording"
 7 | TEMP_PREFIX="agent-cli-voice-edit-temp"
 8 | 
 9 | notify_temp() {
10 |     local title=$1
11 |     local message=$2
12 |     local duration=${3:-4}  # 4 seconds default
13 |     local group="${TEMP_PREFIX}-${RANDOM}-$$"
14 | 
15 |     "$NOTIFIER" -title "$title" -message "$message" -group "$group"
16 |     (
17 |         sleep "$duration"
18 |         "$NOTIFIER" -remove "$group" >/dev/null 2>&1 || true
19 |     ) &
20 | }
21 | 
22 | if pgrep -f "agent-cli voice-edit" > /dev/null; then
23 |     pkill -INT -f "agent-cli voice-edit"
24 |     "$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true
25 |     notify_temp "🛑 Stopped" "Processing voice command..."
26 | else
27 |     "$NOTIFIER" -title "🎙️ Started" -message "Listening for voice command..." -group "$RECORDING_GROUP"
28 |     (
29 |         OUTPUT=$("$HOME/.local/bin/agent-cli" voice-edit --quiet 2>/dev/null)
30 |         "$NOTIFIER" -remove "$RECORDING_GROUP" >/dev/null 2>&1 || true
31 |         if [ -n "$OUTPUT" ]; then
32 |             notify_temp "✨ Voice Edit Result" "$OUTPUT"
33 |         else
34 |             notify_temp "❌ Error" "No output"
35 |         fi
36 |     ) &
37 | fi
38 | 


--------------------------------------------------------------------------------
/scripts/run-whisper-linux.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Linux: faster-whisper with CUDA/CPU detection
 3 | echo "🎤 Starting Wyoming Faster Whisper on port 10300..."
 4 | 
 5 | # Detect if CUDA is available
 6 | if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then
 7 |     echo "⚡ NVIDIA GPU detected"
 8 |     DETECTED_DEVICE="cuda"
 9 | else
10 |     echo "💻 No GPU detected or CUDA unavailable"
11 |     DETECTED_DEVICE="cpu"
12 | fi
13 | 
14 | # Allow device override via environment variable
15 | DEVICE="${WHISPER_DEVICE:-$DETECTED_DEVICE}"
16 | 
17 | # Set default model based on final device choice
18 | if [ "$DEVICE" = "cuda" ]; then
19 |     DEFAULT_MODEL="large-v3"
20 | else
21 |     DEFAULT_MODEL="tiny"
22 | fi
23 | 
24 | # Allow model override via environment variable
25 | MODEL="${WHISPER_MODEL:-$DEFAULT_MODEL}"
26 | echo "📦 Using model: $MODEL on device: $DEVICE"
27 | 
28 | # Create .runtime directory for whisper data
29 | SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
30 | mkdir -p "$SCRIPT_DIR/.runtime"
31 | 
32 | uvx --python 3.12 \
33 |     --from git+https://github.com/rhasspy/wyoming-faster-whisper.git@v3.0.1 \
34 |     wyoming-faster-whisper \
35 |     --model "$MODEL" \
36 |     --language en \
37 |     --device "$DEVICE" \
38 |     --uri 'tcp://0.0.0.0:10300' \
39 |     --data-dir "$SCRIPT_DIR/.runtime/whisper-data" \
40 |     --download-dir "$SCRIPT_DIR/.runtime/whisper-data"
41 | 


--------------------------------------------------------------------------------
/agent_cli/memory/entities.py:
--------------------------------------------------------------------------------
 1 | """Domain entities for the memory system.
 2 | 
 3 | These models represent the "Truth" of the system with strict validation.
 4 | Unlike the storage models (files/DB), these entities do not have optional fields
 5 | where they shouldn't.
 6 | """
 7 | 
 8 | from __future__ import annotations
 9 | 
10 | from datetime import datetime  # noqa: TC003
11 | from typing import Literal
12 | 
13 | from pydantic import BaseModel, Field
14 | 
15 | 
16 | class Turn(BaseModel):
17 |     """A single user or assistant message in the conversation."""
18 | 
19 |     id: str = Field(..., description="Unique UUID for this turn")
20 |     conversation_id: str
21 |     role: Literal["user", "assistant"]
22 |     content: str
23 |     created_at: datetime
24 | 
25 | 
26 | class Fact(BaseModel):
27 |     """An atomic piece of information extracted from a user message."""
28 | 
29 |     id: str = Field(..., description="Unique UUID for this fact")
30 |     conversation_id: str
31 |     content: str
32 |     source_id: str = Field(..., description="UUID of the Turn this fact was extracted from")
33 |     created_at: datetime
34 |     # Facts are always role="memory" implicitly in the storage layer
35 | 
36 | 
37 | class Summary(BaseModel):
38 |     """The rolling summary of a conversation."""
39 | 
40 |     conversation_id: str
41 |     content: str
42 |     created_at: datetime
43 |     # Summaries are role="summary" implicitly
44 | 


--------------------------------------------------------------------------------
/scripts/linux-hotkeys/toggle-autocorrect.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Toggle script for agent-cli autocorrect on Linux
 4 | #
 5 | # This script corrects text from clipboard using AI:
 6 | # - Reads text from clipboard
 7 | # - Processes it with LLM for grammar/spelling corrections
 8 | # - Displays the corrected result
 9 | #
10 | # Works across different Linux desktop environments
11 | 
12 | # Function to send notification
13 | notify() {
14 |     local title="$1"
15 |     local message="$2"
16 |     local timeout="${3:-3000}"
17 | 
18 |     if command -v notify-send &> /dev/null; then
19 |         notify-send -t "$timeout" "$title" "$message"
20 |     elif command -v dunstify &> /dev/null; then
21 |         dunstify -t "$timeout" "$title" "$message"
22 |     else
23 |         echo "$title: $message"
24 |     fi
25 | }
26 | 
27 | # Function to sync clipboard (Wayland)
28 | sync_clipboard() {
29 |     if command -v wl-paste &> /dev/null && command -v wl-copy &> /dev/null; then
30 |         wl-paste | wl-copy -p 2>/dev/null || true
31 |     fi
32 | }
33 | 
34 | # Ensure agent-cli is in PATH
35 | export PATH="$PATH:$HOME/.local/bin"
36 | 
37 | notify "📝 Autocorrect" "Processing clipboard text..."
38 | 
39 | OUTPUT=$(agent-cli autocorrect --quiet 2>/dev/null) && {
40 |     # Sync clipboard to primary selection (Wayland)
41 |     sync_clipboard
42 |     notify "✅ Corrected" "$OUTPUT" 5000
43 | } || {
44 |     notify "❌ Error" "No text to correct or processing failed" 3000
45 | }
46 | 


--------------------------------------------------------------------------------
/agent_cli/memory/_streaming.py:
--------------------------------------------------------------------------------
 1 | """Streaming helpers for chat completions."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Any
 6 | 
 7 | import httpx
 8 | 
 9 | from agent_cli.core.sse import extract_content_from_chunk, parse_chunk
10 | 
11 | if TYPE_CHECKING:
12 |     from collections.abc import AsyncGenerator
13 | 
14 | 
15 | async def stream_chat_sse(
16 |     *,
17 |     openai_base_url: str,
18 |     payload: dict[str, Any],
19 |     headers: dict[str, str] | None = None,
20 |     request_timeout: float = 120.0,
21 | ) -> AsyncGenerator[str, None]:
22 |     """Stream Server-Sent Events from an OpenAI-compatible chat completion endpoint."""
23 |     url = f"{openai_base_url.rstrip('/')}/chat/completions"
24 |     async with (
25 |         httpx.AsyncClient(timeout=request_timeout) as client,
26 |         client.stream("POST", url, json=payload, headers=headers) as response,
27 |     ):
28 |         if response.status_code != 200:  # noqa: PLR2004
29 |             error_text = await response.aread()
30 |             yield f"data: {error_text.decode(errors='ignore')}\n\n"
31 |             return
32 |         async for line in response.aiter_lines():
33 |             if line:
34 |                 yield line
35 | 
36 | 
37 | def accumulate_assistant_text(line: str, buffer: list[str]) -> None:
38 |     """Parse SSE line and append any assistant text delta into buffer."""
39 |     chunk = parse_chunk(line)
40 |     if chunk is None:
41 |         return
42 |     piece = extract_content_from_chunk(chunk)
43 |     if piece:
44 |         buffer.append(piece)
45 | 


--------------------------------------------------------------------------------
/agent_cli/rag/_store.py:
--------------------------------------------------------------------------------
 1 | """ChromaDB functional interface."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import logging
 6 | from typing import TYPE_CHECKING, Any
 7 | 
 8 | from agent_cli.core.chroma import delete_where, upsert
 9 | 
10 | if TYPE_CHECKING:
11 |     from collections.abc import Sequence
12 | 
13 |     from chromadb import Collection
14 | 
15 |     from agent_cli.rag.models import DocMetadata
16 | 
17 | LOGGER = logging.getLogger(__name__)
18 | 
19 | 
20 | def upsert_docs(
21 |     collection: Collection,
22 |     ids: list[str],
23 |     documents: list[str],
24 |     metadatas: Sequence[DocMetadata],
25 | ) -> None:
26 |     """Upsert documents into the collection."""
27 |     upsert(collection, ids=ids, documents=documents, metadatas=metadatas)
28 | 
29 | 
30 | def delete_by_file_path(collection: Collection, file_path: str) -> None:
31 |     """Delete all chunks associated with a file path."""
32 |     delete_where(collection, {"file_path": file_path})
33 | 
34 | 
35 | def query_docs(collection: Collection, text: str, n_results: int) -> dict[str, Any]:
36 |     """Query the collection."""
37 |     return collection.query(query_texts=[text], n_results=n_results)
38 | 
39 | 
40 | def get_all_metadata(collection: Collection) -> list[dict[str, Any]]:
41 |     """Retrieve all metadata from the collection."""
42 |     result = collection.get(include=["metadatas"])
43 |     return result.get("metadatas", []) or []  # type: ignore[return-value]
44 | 
45 | 
46 | def count_docs(collection: Collection) -> int:
47 |     """Return total number of documents."""
48 |     return collection.count()
49 | 


--------------------------------------------------------------------------------
/docker/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   ollama:
 3 |     build:
 4 |       context: ..
 5 |       dockerfile: docker/Dockerfile
 6 |     ports:
 7 |       - "11434:11434"
 8 |     environment:
 9 |       - OLLAMA_THINK=false
10 |     volumes:
11 |       - ./ollama:/root/.ollama
12 |     # On macOS, Docker does not support GPU acceleration. For better
13 |     # performance, it is recommended to install Ollama natively:
14 |     # https://ollama.com/download
15 |     #
16 |     # On Linux with an NVIDIA GPU, you can uncomment the following
17 |     # lines to enable GPU acceleration.
18 |     # deploy:
19 |     #   resources:
20 |     #     reservations:
21 |     #       devices:
22 |     #         - driver: nvidia
23 |     #           count: all
24 |     #           capabilities: [gpu]
25 | 
26 |   piper:
27 |     image: rhasspy/wyoming-piper
28 |     ports:
29 |       - "10200:10200"
30 |     volumes:
31 |       - ./piper-data:/data
32 |     command: --voice en-us-ryan-high
33 | 
34 |   whisper:
35 |     image: rhasspy/wyoming-whisper
36 |     ports:
37 |       - "10300:10300"
38 |     volumes:
39 |       - ./whisper-data:/data
40 |     command: --model large-v3 --language en
41 |     # The official rhasspy/wyoming-whisper image does not currently
42 |     # support GPU acceleration.
43 |     # see https://github.com/rhasspy/wyoming-faster-whisper/issues/35
44 |     # and https://github.com/rhasspy/wyoming-faster-whisper/pull/44
45 | 
46 |   openwakeword:
47 |     image: rhasspy/wyoming-openwakeword
48 |     ports:
49 |       - "10400:10400"
50 |     volumes:
51 |       - ./openwakeword-data:/data
52 |     command: --preload-model ok_nabu --custom-model-dir /data
53 | 


--------------------------------------------------------------------------------
/tests/mocks/llm.py:
--------------------------------------------------------------------------------
 1 | """Mock LLM agents and responses for testing."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Any
 6 | 
 7 | if TYPE_CHECKING:
 8 |     from collections.abc import Awaitable
 9 | 
10 | 
11 | class MockLLMResult:
12 |     """Mock result from LLM agent execution."""
13 | 
14 |     def __init__(self, output: str) -> None:
15 |         """Initialize mock result."""
16 |         self.output = output
17 | 
18 | 
19 | class MockLLMAgent:
20 |     """Mock LLM agent for testing without real API calls."""
21 | 
22 |     def __init__(self, responses: dict[str, str]) -> None:
23 |         """Initialize mock agent.
24 | 
25 |         Args:
26 |         ----
27 |             responses: Mapping of input patterns to responses
28 | 
29 |         """
30 |         self.responses = responses
31 |         self.call_history: list[dict[str, Any]] = []
32 | 
33 |     def run(self, user_prompt: str) -> Awaitable[MockLLMResult]:
34 |         """Mock execution of the agent."""
35 |         self.call_history.append({"user_prompt": user_prompt})
36 | 
37 |         async def mock_run() -> MockLLMResult:
38 |             response = self._get_response_for_prompt(user_prompt)
39 |             return MockLLMResult(response)
40 | 
41 |         return mock_run()
42 | 
43 |     def _get_response_for_prompt(self, prompt: str) -> str:
44 |         """Get appropriate response for the given prompt."""
45 |         prompt_lower = prompt.lower()
46 |         for pattern, response in self.responses.items():
47 |             if pattern.lower() in prompt_lower:
48 |                 return response
49 |         return self.responses.get("default", "Mock LLM response")
50 | 


--------------------------------------------------------------------------------
/agent_cli/memory/_tasks.py:
--------------------------------------------------------------------------------
 1 | """Utilities for tracking background tasks in the memory proxy."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import asyncio
 6 | import logging
 7 | from typing import TYPE_CHECKING, Any
 8 | 
 9 | if TYPE_CHECKING:
10 |     from collections.abc import Coroutine
11 | 
12 | LOGGER = logging.getLogger(__name__)
13 | 
14 | _BACKGROUND_TASKS: set[asyncio.Task[Any]] = set()
15 | 
16 | 
17 | def _track_background(task: asyncio.Task[Any], label: str) -> asyncio.Task[Any]:
18 |     """Track background tasks and surface failures."""
19 |     _BACKGROUND_TASKS.add(task)
20 | 
21 |     def _done_callback(done: asyncio.Task[Any]) -> None:
22 |         _BACKGROUND_TASKS.discard(done)
23 |         if done.cancelled():
24 |             LOGGER.debug("Background task %s cancelled", label)
25 |             return
26 |         exc = done.exception()
27 |         if exc:
28 |             LOGGER.exception("Background task %s failed", label, exc_info=exc)
29 | 
30 |     task.add_done_callback(_done_callback)
31 |     return task
32 | 
33 | 
34 | def run_in_background(
35 |     coro: asyncio.Task[Any] | Coroutine[Any, Any, Any],
36 |     label: str,
37 | ) -> asyncio.Task[Any]:
38 |     """Create and track a background asyncio task."""
39 |     task = coro if isinstance(coro, asyncio.Task) else asyncio.create_task(coro)
40 |     task.set_name(f"memory-{label}")
41 |     return _track_background(task, label)
42 | 
43 | 
44 | async def wait_for_background_tasks() -> None:
45 |     """Await any in-flight background tasks (useful in tests)."""
46 |     while _BACKGROUND_TASKS:
47 |         tasks = list(_BACKGROUND_TASKS)
48 |         await asyncio.gather(*tasks, return_exceptions=False)
49 | 


--------------------------------------------------------------------------------
/.github/workflows/update-readme.yml:
--------------------------------------------------------------------------------
 1 | name: Update README.md
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   update_readme:
11 |     runs-on: macos-latest
12 |     steps:
13 |       - name: Check out repository
14 |         uses: actions/checkout@v4
15 |         with:
16 |           persist-credentials: false
17 |           fetch-depth: 0
18 | 
19 |       - name: Set up Python
20 |         uses: actions/setup-python@v5
21 | 
22 |       - name: Install uv
23 |         uses: astral-sh/setup-uv@v6
24 | 
25 |       - name: Run markdown-code-runner
26 |         env:
27 |           TERM: dumb
28 |           NO_COLOR: 1
29 |           TERMINAL_WIDTH: 90
30 |         run: |
31 |           uvx --with . markdown-code-runner README.md
32 |           sed -i '' 's/[[:space:]]*$//' README.md
33 | 
34 |       - name: Commit updated README.md
35 |         id: commit
36 |         run: |
37 |           git add README.md
38 |           git config --local user.email "github-actions[bot]@users.noreply.github.com"
39 |           git config --local user.name "github-actions[bot]"
40 |           if git diff --quiet && git diff --staged --quiet; then
41 |             echo "No changes in README.md, skipping commit."
42 |             echo "commit_status=skipped" >> $GITHUB_ENV
43 |           else
44 |             git commit -m "Update README.md"
45 |             echo "commit_status=committed" >> $GITHUB_ENV
46 |           fi
47 | 
48 |       - name: Push changes
49 |         if: env.commit_status == 'committed'
50 |         uses: ad-m/github-push-action@master
51 |         with:
52 |           github_token: ${{ secrets.GITHUB_TOKEN }}
53 |           branch: ${{ github.head_ref }}
54 | 


--------------------------------------------------------------------------------
/tests/core/test_watch.py:
--------------------------------------------------------------------------------
 1 | """Tests for shared watch helper."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Any
 6 | 
 7 | import pytest
 8 | from watchfiles import Change
 9 | 
10 | from agent_cli.core import watch as watch_mod
11 | 
12 | if TYPE_CHECKING:
13 |     from pathlib import Path
14 | 
15 | 
16 | @pytest.mark.asyncio
17 | async def test_watch_directory_skips_hidden(tmp_path: Path) -> None:
18 |     """Only non-hidden files trigger the handler."""
19 |     called: list[tuple[Change, Path]] = []
20 | 
21 |     async def fake_awatch(_root: Path) -> Any:  # type: ignore[override]
22 |         yield {
23 |             (Change.added, str(tmp_path / "visible.txt")),
24 |             (Change.added, str(tmp_path / ".hidden.txt")),
25 |             (Change.added, str(tmp_path / "sub/.nested")),
26 |             (Change.deleted, str(tmp_path / "gone.txt")),
27 |         }
28 | 
29 |     def handler(change: Change, path: Path) -> None:
30 |         called.append((change, path))
31 | 
32 |     tmp_path.mkdir(parents=True, exist_ok=True)
33 |     (tmp_path / "visible.txt").touch()
34 |     (tmp_path / "gone.txt").touch()
35 |     (tmp_path / "sub").mkdir()
36 | 
37 |     # Patch awatch used inside watch_directory
38 |     original = watch_mod.awatch
39 |     watch_mod.awatch = fake_awatch  # type: ignore[assignment]
40 |     try:
41 |         await watch_mod.watch_directory(tmp_path, handler)
42 |     finally:
43 |         watch_mod.awatch = original  # type: ignore[assignment]
44 | 
45 |     seen_paths = {p.name for _, p in called}
46 |     assert "visible.txt" in seen_paths
47 |     assert "gone.txt" in seen_paths
48 |     assert ".hidden.txt" not in seen_paths
49 |     assert ".nested" not in seen_paths
50 | 


--------------------------------------------------------------------------------
/scripts/zellij_help.txt:
--------------------------------------------------------------------------------
 1 | ╔═══════════════════════════════════════════════════════════════════╗
 2 | ║                    Agent CLI Services                             ║
 3 | ╠═══════════════════════════════════════════════════════════════════╣
 4 | ║                                                                   ║
 5 | ║  🔴 IMPORTANT:                                                    ║
 6 | ║  • Ctrl-O d  → Detach (keeps services running in background!)     ║
 7 | ║  • Ctrl-Q    → Quit (STOPS all services!)                         ║
 8 | ║                                                                   ║
 9 | ║  To reattach later: $ zellij attach agent-cli                     ║
10 | ║                                                                   ║
11 | ╠═══════════════════════════════════════════════════════════════════╣
12 | ║                                                                   ║
13 | ║  Services Running:                                                ║
14 | ║  • Ollama (LLM) - Port 11434                                      ║
15 | ║  • Whisper (STT) - Port 10300                                     ║
16 | ║  • Piper (TTS) - Port 10200                                       ║
17 | ║  • OpenWakeWord - Port 10400                                      ║
18 | ║                                                                   ║
19 | ║  Navigation:                                                      ║
20 | ║  • Alt + ← → ↑ ↓  - Move between panes                            ║
21 | ║  • Ctrl-F         - Toggle this help                              ║
22 | ║  • q              - Close this help                               ║
23 | ║                                                                   ║
24 | ╚═══════════════════════════════════════════════════════════════════╝
25 | 


--------------------------------------------------------------------------------
/tests/memory/test_api_health.py:
--------------------------------------------------------------------------------
 1 | """Smoke tests for memory API health and lifecycle."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from contextlib import ExitStack
 6 | from typing import Any
 7 | from unittest.mock import patch
 8 | 
 9 | from fastapi.testclient import TestClient
10 | 
11 | from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL
12 | from agent_cli.memory import api as memory_api
13 | 
14 | 
15 | class _FakeCollection:
16 |     pass
17 | 
18 | 
19 | def test_memory_health_and_startup_shutdown(tmp_path: Any) -> None:
20 |     started: list[str] = []
21 | 
22 |     async def _noop_watch(*_args: Any, **_kwargs: Any) -> None:
23 |         started.append("watch")
24 | 
25 |     with ExitStack() as stack:
26 |         stack.enter_context(
27 |             patch("agent_cli.memory.client.watch_memory_store", side_effect=_noop_watch),
28 |         )
29 |         stack.enter_context(
30 |             patch("agent_cli.memory.client.init_memory_collection", return_value=_FakeCollection()),
31 |         )
32 |         stack.enter_context(
33 |             patch("agent_cli.memory.client.get_reranker_model", return_value=None),
34 |         )
35 | 
36 |         app = memory_api.create_app(
37 |             memory_path=tmp_path,
38 |             openai_base_url="http://mock-llm",
39 |             embedding_model=DEFAULT_OPENAI_EMBEDDING_MODEL,
40 |             enable_summarization=False,
41 |         )
42 |         with TestClient(app) as client:
43 |             resp = client.get("/health")
44 |             assert resp.status_code == 200
45 |             body = resp.json()
46 |             assert body["status"] == "ok"
47 |             assert body["memory_store"] == str(tmp_path.resolve())
48 | 
49 |     # startup/shutdown should have triggered watch task creation
50 |     assert started
51 | 


--------------------------------------------------------------------------------
/tests/rag/test_retriever.py:
--------------------------------------------------------------------------------
 1 | """Tests for RAG retriever."""
 2 | 
 3 | from unittest.mock import MagicMock, patch
 4 | 
 5 | from agent_cli.core import reranker
 6 | from agent_cli.rag import _retriever
 7 | 
 8 | 
 9 | def test_get_reranker_model_installed() -> None:
10 |     """Test loading reranker when installed."""
11 |     with patch("agent_cli.core.reranker.OnnxCrossEncoder") as mock_ce:
12 |         reranker.get_reranker_model()
13 |         mock_ce.assert_called_once()
14 | 
15 | 
16 | def test_search_context() -> None:
17 |     """Test searching context."""
18 |     mock_collection = MagicMock()
19 |     mock_reranker = MagicMock()
20 | 
21 |     # Mock query results
22 |     mock_collection.query.return_value = {
23 |         "documents": [["doc1", "doc2"]],
24 |         "metadatas": [
25 |             [
26 |                 {"source": "s1", "file_path": "p1", "chunk_id": 0},
27 |                 {"source": "s2", "file_path": "p2", "chunk_id": 1},
28 |             ],
29 |         ],
30 |     }
31 | 
32 |     # Mock reranker scores
33 |     mock_reranker.predict.return_value = [-1.0, 5.0]
34 | 
35 |     result = _retriever.search_context(mock_collection, mock_reranker, "query", top_k=1)
36 | 
37 |     # Should return doc2 because it has higher score
38 |     assert "doc2" in result.context
39 |     assert "doc1" not in result.context
40 |     assert len(result.sources) == 1
41 |     assert result.sources[0].path == "p2"
42 | 
43 | 
44 | def test_search_context_empty() -> None:
45 |     """Test search with no results."""
46 |     mock_collection = MagicMock()
47 |     mock_reranker = MagicMock()
48 | 
49 |     mock_collection.query.return_value = {"documents": []}
50 | 
51 |     result = _retriever.search_context(mock_collection, mock_reranker, "query")
52 | 
53 |     assert result.context == ""
54 |     assert result.sources == []
55 | 


--------------------------------------------------------------------------------
/agent_cli/install/hotkeys.py:
--------------------------------------------------------------------------------
 1 | """Hotkey installation commands."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import platform
 6 | 
 7 | from agent_cli.cli import app
 8 | from agent_cli.core.utils import print_with_style
 9 | from agent_cli.install.common import execute_installation_script, get_platform_script
10 | 
11 | 
12 | @app.command("install-hotkeys", rich_help_panel="Installation")
13 | def install_hotkeys() -> None:
14 |     """Install system-wide hotkeys for agent-cli commands.
15 | 
16 |     Sets up the following hotkeys:
17 | 
18 |     macOS:
19 |     - Cmd+Shift+R: Toggle voice transcription
20 |     - Cmd+Shift+A: Autocorrect clipboard text
21 |     - Cmd+Shift+V: Voice edit clipboard text
22 | 
23 |     Linux:
24 |     - Super+Shift+R: Toggle voice transcription
25 |     - Super+Shift+A: Autocorrect clipboard text
26 |     - Super+Shift+V: Voice edit clipboard text
27 | 
28 |     Note: On macOS, you may need to grant Accessibility permissions to skhd
29 |     in System Settings → Privacy & Security → Accessibility.
30 |     """
31 |     script_name = get_platform_script("setup-macos-hotkeys.sh", "setup-linux-hotkeys.sh")
32 |     system = platform.system().lower()
33 | 
34 |     execute_installation_script(
35 |         script_name=script_name,
36 |         operation_name="Set up hotkeys",
37 |         success_message="Hotkeys installed successfully!",
38 |     )
39 | 
40 |     # Post-installation steps for macOS
41 |     if system == "darwin":
42 |         print_with_style("\n⚠️  Important:", "yellow")
43 |         print_with_style("If hotkeys don't work, grant Accessibility permissions:", "yellow")
44 |         print_with_style(
45 |             "  1. Open System Settings → Privacy & Security → Accessibility",
46 |             "cyan",
47 |         )
48 |         print_with_style("  2. Add and enable 'skhd'", "cyan")
49 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | """Tests for the CLI."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING
 6 | from unittest.mock import patch
 7 | 
 8 | from typer.testing import CliRunner
 9 | 
10 | from agent_cli.cli import app
11 | 
12 | if TYPE_CHECKING:
13 |     import pytest
14 | 
15 | runner = CliRunner(env={"NO_COLOR": "1", "TERM": "dumb"})
16 | 
17 | 
18 | def test_main_no_args() -> None:
19 |     """Test the main function with no arguments."""
20 |     result = runner.invoke(app)
21 |     assert "No command specified" in result.stdout
22 |     assert "Usage" in result.stdout
23 | 
24 | 
25 | @patch("agent_cli.core.utils.setup_logging")
26 | def test_main_with_args(mock_setup_logging: pytest.MagicMock) -> None:
27 |     """Test the main function with arguments."""
28 |     result = runner.invoke(app, ["--help"])
29 |     assert result.exit_code == 0
30 |     assert "Usage" in result.stdout
31 |     mock_setup_logging.assert_not_called()
32 | 
33 | 
34 | @patch("agent_cli.agents.server.run_server")
35 | def test_server_command(mock_run_server: pytest.MagicMock) -> None:
36 |     """Test the server command."""
37 |     result = runner.invoke(app, ["server"])
38 |     assert result.exit_code == 0
39 |     assert "Starting Agent CLI transcription server" in result.stdout
40 |     mock_run_server.assert_called_once_with(host="0.0.0.0", port=61337, reload=False)  # noqa: S104
41 | 
42 | 
43 | @patch("agent_cli.agents.server.run_server")
44 | def test_server_command_with_options(mock_run_server: pytest.MagicMock) -> None:
45 |     """Test the server command with custom options."""
46 |     result = runner.invoke(app, ["server", "--host", "127.0.0.1", "--port", "8080", "--reload"])
47 |     assert result.exit_code == 0
48 |     assert "Starting Agent CLI transcription server on 127.0.0.1:8080" in result.stdout
49 |     assert "Auto-reload enabled for development" in result.stdout
50 |     mock_run_server.assert_called_once_with(host="127.0.0.1", port=8080, reload=True)
51 | 


--------------------------------------------------------------------------------
/agent_cli/agents/server.py:
--------------------------------------------------------------------------------
 1 | """FastAPI server command for Agent CLI."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from importlib.util import find_spec
 6 | 
 7 | import typer
 8 | 
 9 | from agent_cli import opts
10 | from agent_cli.cli import app
11 | from agent_cli.core.utils import (
12 |     console,
13 |     print_command_line_args,
14 |     print_error_message,
15 | )
16 | 
17 | has_uvicorn = find_spec("uvicorn") is not None
18 | has_fastapi = find_spec("fastapi") is not None
19 | 
20 | 
21 | def run_server(
22 |     host: str = "0.0.0.0",  # noqa: S104
23 |     port: int = 61337,
24 |     reload: bool = False,
25 | ) -> None:
26 |     """Run the FastAPI server."""
27 |     import uvicorn  # noqa: PLC0415
28 | 
29 |     uvicorn.run(
30 |         "agent_cli.api:app",
31 |         host=host,
32 |         port=port,
33 |         reload=reload,
34 |         log_level="info",
35 |     )
36 | 
37 | 
38 | @app.command("server")
39 | def server(
40 |     host: str = typer.Option("0.0.0.0", help="Host to bind the server to"),  # noqa: S104
41 |     port: int = typer.Option(61337, help="Port to bind the server to"),
42 |     reload: bool = typer.Option(False, "--reload", help="Enable auto-reload for development"),  # noqa: FBT003
43 |     config_file: str | None = opts.CONFIG_FILE,
44 |     print_args: bool = opts.PRINT_ARGS,
45 | ) -> None:
46 |     """Run the FastAPI transcription web server."""
47 |     if print_args:
48 |         print_command_line_args(locals())
49 |     if not has_uvicorn or not has_fastapi:
50 |         msg = "uvicorn or fastapi is not installed, please install it with `pip install fastapi[standard]` or `pip install agent-cli[server]`"
51 |         print_error_message(msg)
52 |         raise typer.Exit(1)
53 |     console.print(
54 |         f"[bold green]Starting Agent CLI transcription server on {host}:{port}[/bold green]",
55 |     )
56 |     if reload:
57 |         console.print("[yellow]Auto-reload enabled for development[/yellow]")
58 |     run_server(host=host, port=port, reload=reload)
59 | 


--------------------------------------------------------------------------------
/scripts/linux-hotkeys/toggle-voice-edit.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Toggle script for agent-cli voice-edit on Linux
 4 | #
 5 | # This script provides voice editing for clipboard text:
 6 | # - First invocation: Starts voice editing in the background
 7 | # - Second invocation: Stops voice editing and displays the result
 8 | #
 9 | # Works across different Linux desktop environments
10 | 
11 | # Function to send notification
12 | notify() {
13 |     local title="$1"
14 |     local message="$2"
15 |     local timeout="${3:-3000}"
16 | 
17 |     if command -v notify-send &> /dev/null; then
18 |         notify-send -t "$timeout" "$title" "$message"
19 |     elif command -v dunstify &> /dev/null; then
20 |         dunstify -t "$timeout" "$title" "$message"
21 |     else
22 |         echo "$title: $message"
23 |     fi
24 | }
25 | 
26 | # Function to sync clipboard (Wayland)
27 | sync_clipboard() {
28 |     if command -v wl-paste &> /dev/null && command -v wl-copy &> /dev/null; then
29 |         wl-paste | wl-copy -p 2>/dev/null || true
30 |     fi
31 | }
32 | 
33 | # Check if agent-cli voice-edit is already running
34 | if pgrep -f "agent-cli voice-edit" > /dev/null; then
35 |     # Voice edit is running - stop it
36 |     pkill -INT -f "agent-cli voice-edit"
37 |     notify "🛑 Voice Edit Stopped" "Processing voice command..."
38 | else
39 |     # Voice edit is not running - start it
40 | 
41 |     # Ensure agent-cli is in PATH
42 |     export PATH="$PATH:$HOME/.local/bin"
43 | 
44 |     # Notify user that recording has started
45 |     notify "🎙️ Voice Edit Started" "Listening for voice command..."
46 | 
47 |     # Start voice edit in background
48 |     (
49 |         OUTPUT=$(agent-cli voice-edit --quiet 2>/dev/null)
50 |         if [ -n "$OUTPUT" ]; then
51 |             # Sync clipboard to primary selection (Wayland)
52 |             sync_clipboard
53 |             notify "✨ Voice Edit Result" "$OUTPUT" 5000
54 |         else
55 |             notify "❌ Error" "No output" 3000
56 |         fi
57 |     ) &
58 | fi
59 | 


--------------------------------------------------------------------------------
/scripts/setup-macos.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | echo "🚀 Setting up agent-cli services on macOS..."
 6 | 
 7 | # Check if Homebrew is installed
 8 | if ! command -v brew &> /dev/null; then
 9 |     echo "❌ Homebrew is not installed. Please install Homebrew first:"
10 |     echo "/bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""
11 |     exit 1
12 | fi
13 | 
14 | # Check if uv is installed
15 | if ! command -v uv &> /dev/null; then
16 |     echo "📦 Installing uv..."
17 |     brew install uv
18 | fi
19 | 
20 | # Install Ollama
21 | echo "🧠 Checking Ollama..."
22 | if ! command -v ollama &> /dev/null; then
23 |     echo "🍺 Installing Ollama via Homebrew..."
24 |     brew install ollama
25 |     echo "✅ Ollama installed successfully"
26 | else
27 |     echo "✅ Ollama is already installed"
28 | fi
29 | 
30 | # Check if zellij is installed
31 | if ! command -v zellij &> /dev/null; then
32 |     echo "📺 Installing zellij..."
33 |     brew install zellij
34 | fi
35 | 
36 | # Install agent-cli
37 | echo "🤖 Installing/upgrading agent-cli..."
38 | uv tool install --upgrade agent-cli
39 | 
40 | # Preload default Ollama model
41 | echo "⬇️ Preloading default Ollama model (gemma3:4b)..."
42 | echo "⏳ This may take a few minutes depending on your internet connection..."
43 | # Start Ollama in background, then pull model synchronously
44 | (ollama serve >/dev/null 2>&1 &) && sleep 2 && ollama pull gemma3:4b
45 | # Stop the temporary ollama server
46 | pkill -f "ollama serve" || true
47 | 
48 | echo ""
49 | echo "✅ Setup complete! You can now run the services:"
50 | echo ""
51 | echo "Option 1 - Run all services at once:"
52 | echo "  ./start-all-services.sh"
53 | echo ""
54 | echo "Option 2 - Run services individually:"
55 | echo "  1. Ollama: ollama serve"
56 | echo "  2. Whisper: ./run-whisper.sh"
57 | echo "  3. Piper: ./run-piper.sh"
58 | echo "  4. OpenWakeWord: ./run-openwakeword.sh"
59 | echo ""
60 | echo "🎉 agent-cli has been installed and is ready to use!"
61 | 


--------------------------------------------------------------------------------
/tests/memory/test_files.py:
--------------------------------------------------------------------------------
 1 | """Tests for file-backed memory helpers."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING
 6 | 
 7 | from agent_cli.memory import _files as mem_files
 8 | from agent_cli.memory.models import MemoryMetadata
 9 | 
10 | if TYPE_CHECKING:
11 |     from pathlib import Path
12 | 
13 | 
14 | def test_write_and_read_memory_file_round_trip(tmp_path: Path) -> None:
15 |     """Writes a memory file and reads it back with metadata intact."""
16 |     record = mem_files.write_memory_file(
17 |         tmp_path,
18 |         conversation_id="conv-1",
19 |         role="memory",
20 |         created_at="2025-01-01T00:00:00Z",
21 |         content="fact about bikes",
22 |     )
23 | 
24 |     loaded = mem_files.read_memory_file(record.path)
25 |     assert loaded is not None
26 |     assert loaded.content == "fact about bikes"
27 |     assert loaded.metadata.conversation_id == "conv-1"
28 |     assert "facts" in loaded.path.parts
29 | 
30 | 
31 | def test_snapshot_round_trip(tmp_path: Path) -> None:
32 |     """Snapshot JSON stores and restores memory records."""
33 |     meta = MemoryMetadata(
34 |         conversation_id="c1",
35 |         role="memory",
36 |         created_at="now",
37 |     )
38 |     rec = mem_files.MemoryFileRecord(id="1", path=tmp_path / "p.md", metadata=meta, content="hi")
39 |     snapshot = tmp_path / "snap.json"
40 | 
41 |     mem_files.write_snapshot(snapshot, [rec])
42 |     loaded = mem_files.load_snapshot(snapshot)
43 | 
44 |     assert "1" in loaded
45 |     assert loaded["1"].content == "hi"
46 | 
47 | 
48 | def test_load_memory_files_skips_invalid(tmp_path: Path) -> None:
49 |     """Invalid files without front matter should be ignored."""
50 |     entries_dir = tmp_path / "entries" / "default"
51 |     entries_dir.mkdir(parents=True, exist_ok=True)
52 |     bad_file = entries_dir / "bad.md"
53 |     bad_file.write_text("no front matter here", encoding="utf-8")
54 | 
55 |     records = mem_files.load_memory_files(tmp_path)
56 |     assert records == []
57 | 


--------------------------------------------------------------------------------
/scripts/linux-hotkeys/toggle-transcription.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Toggle script for agent-cli transcription on Linux
 4 | #
 5 | # This script provides a simple toggle mechanism for voice transcription:
 6 | # - First invocation: Starts transcription in the background
 7 | # - Second invocation: Stops transcription and displays the result
 8 | #
 9 | # Works across different Linux desktop environments
10 | 
11 | # Function to send notification
12 | notify() {
13 |     local title="$1"
14 |     local message="$2"
15 |     local timeout="${3:-3000}"
16 | 
17 |     if command -v notify-send &> /dev/null; then
18 |         notify-send -t "$timeout" "$title" "$message"
19 |     elif command -v dunstify &> /dev/null; then
20 |         dunstify -t "$timeout" "$title" "$message"
21 |     else
22 |         echo "$title: $message"
23 |     fi
24 | }
25 | 
26 | # Function to sync clipboard (Wayland)
27 | sync_clipboard() {
28 |     if command -v wl-paste &> /dev/null && command -v wl-copy &> /dev/null; then
29 |         wl-paste | wl-copy -p 2>/dev/null || true
30 |     fi
31 | }
32 | 
33 | # Check if agent-cli transcribe is already running
34 | if pgrep -f "agent-cli transcribe" > /dev/null; then
35 |     # Transcription is running - stop it
36 |     pkill -INT -f "agent-cli transcribe"
37 |     notify "🛑 Transcription Stopped" "Processing results..."
38 | else
39 |     # Transcription is not running - start it
40 | 
41 |     # Ensure agent-cli is in PATH
42 |     export PATH="$PATH:$HOME/.local/bin"
43 | 
44 |     # Notify user that recording has started
45 |     notify "🎙️ Transcription Started" "Listening in background..."
46 | 
47 |     # Start transcription in background
48 |     (
49 |         OUTPUT=$(agent-cli transcribe --llm --quiet 2>/dev/null)
50 |         if [ -n "$OUTPUT" ]; then
51 |             # Sync clipboard to primary selection (Wayland)
52 |             sync_clipboard
53 |             notify "📄 Transcription Result" "$OUTPUT" 5000
54 |         else
55 |             notify "❌ Error" "No output" 3000
56 |         fi
57 |     ) &
58 | fi
59 | 


--------------------------------------------------------------------------------
/scripts/setup-macos-hotkeys.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | echo "⌨️ Setting up macOS hotkeys..."
 6 | 
 7 | # Check macOS
 8 | if [[ "$(uname)" != "Darwin" ]]; then
 9 |     echo "❌ This script is for macOS only"
10 |     exit 1
11 | fi
12 | 
13 | # Install dependencies
14 | echo "📦 Installing dependencies..."
15 | if ! command -v brew &> /dev/null; then
16 |     echo "🍺 Installing Homebrew..."
17 |     /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
18 | fi
19 | 
20 | brew install terminal-notifier
21 | brew tap jackielii/tap && brew install jackielii/tap/skhd-zig
22 | 
23 | # Setup configuration
24 | echo "⚙️ Setting up configuration..."
25 | mkdir -p ~/.config/skhd
26 | 
27 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
28 | TRANSCRIBE_SCRIPT="$SCRIPT_DIR/macos-hotkeys/toggle-transcription.sh"
29 | AUTOCORRECT_SCRIPT="$SCRIPT_DIR/macos-hotkeys/toggle-autocorrect.sh"
30 | VOICE_EDIT_SCRIPT="$SCRIPT_DIR/macos-hotkeys/toggle-voice-edit.sh"
31 | 
32 | cat > ~/.config/skhd/skhdrc << EOF
33 | # Agent-CLI Hotkeys
34 | cmd + shift - r : "$TRANSCRIBE_SCRIPT"
35 | cmd + shift - a : "$AUTOCORRECT_SCRIPT"
36 | cmd + shift - v : "$VOICE_EDIT_SCRIPT"
37 | EOF
38 | 
39 | # Start service
40 | echo "🚀 Starting skhd..."
41 | skhd --start-service
42 | 
43 | # Test
44 | echo "🧪 Testing..."
45 | terminal-notifier -title "⌨️ Setup Complete" -message "Agent-CLI hotkeys ready!"
46 | 
47 | echo ""
48 | echo "✅ Done! Hotkeys:"
49 | echo "  Cmd+Shift+R - Transcribe voice"
50 | echo "  Cmd+Shift+A - Autocorrect clipboard"
51 | echo "  Cmd+Shift+V - Voice edit clipboard"
52 | echo ""
53 | echo "If the hotkey doesn't work:"
54 | echo "1. Open System Settings → Privacy & Security → Accessibility"
55 | echo "2. Add and enable 'skhd'"
56 | echo ""
57 | echo "If the notification doesn't show:"
58 | echo "1. Open System Settings → Notifications"
59 | echo "2. Find 'terminal-notifier' and allow notifications"
60 | echo "3. Set Alert style to Persistent for better visibility"
61 | echo "4. Enable 'Allow notification when mirroring or sharing the display'"
62 | 


--------------------------------------------------------------------------------
/agent_cli/services/_wyoming_utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for Wyoming protocol interactions to eliminate code duplication."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from contextlib import asynccontextmanager
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | from wyoming.client import AsyncClient
 9 | 
10 | from agent_cli.core.utils import print_error_message
11 | 
12 | if TYPE_CHECKING:
13 |     import logging
14 |     from collections.abc import AsyncGenerator
15 | 
16 | 
17 | @asynccontextmanager
18 | async def wyoming_client_context(
19 |     server_ip: str,
20 |     server_port: int,
21 |     server_type: str,
22 |     logger: logging.Logger,
23 |     *,
24 |     quiet: bool = False,
25 | ) -> AsyncGenerator[AsyncClient, None]:
26 |     """Context manager for Wyoming client connections with unified error handling.
27 | 
28 |     Args:
29 |         server_ip: Wyoming server IP
30 |         server_port: Wyoming server port
31 |         server_type: Type of server (e.g., "ASR", "TTS", "wake word")
32 |         logger: Logger instance
33 |         quiet: If True, suppress console error messages
34 | 
35 |     Yields:
36 |         Connected Wyoming client
37 | 
38 |     Raises:
39 |         ConnectionRefusedError: If connection fails
40 |         Exception: For other connection errors
41 | 
42 |     """
43 |     uri = f"tcp://{server_ip}:{server_port}"
44 |     logger.info("Connecting to Wyoming %s server at %s", server_type, uri)
45 | 
46 |     try:
47 |         async with AsyncClient.from_uri(uri) as client:
48 |             logger.info("%s connection established", server_type)
49 |             yield client
50 |     except ConnectionRefusedError:
51 |         logger.exception("%s connection refused.", server_type)
52 |         if not quiet:
53 |             print_error_message(
54 |                 f"{server_type} connection refused.",
55 |                 f"Is the Wyoming {server_type.lower()} server running at {uri}?",
56 |             )
57 |         raise
58 |     except Exception as e:
59 |         logger.exception("An error occurred during %s connection", server_type.lower())
60 |         if not quiet:
61 |             print_error_message(f"{server_type} error: {e}")
62 |         raise
63 | 


--------------------------------------------------------------------------------
/agent_cli/core/watch.py:
--------------------------------------------------------------------------------
 1 | """Shared watchfiles helper."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import asyncio
 6 | from collections.abc import Callable
 7 | from pathlib import Path
 8 | 
 9 | from watchfiles import Change, awatch
10 | 
11 | ChangeHandler = Callable[[Change, Path], None]
12 | PathFilter = Callable[[Path, Path], bool]
13 | 
14 | 
15 | def _default_skip_hidden(path: Path, root: Path) -> bool:
16 |     """Default filter that skips hidden files and directories."""
17 |     rel_parts = path.relative_to(root).parts
18 |     return any(part.startswith(".") for part in rel_parts)
19 | 
20 | 
21 | async def watch_directory(
22 |     root: Path,
23 |     handler: ChangeHandler,
24 |     *,
25 |     skip_hidden: bool = True,
26 |     ignore_filter: PathFilter | None = None,
27 |     use_executor: bool = True,
28 | ) -> None:
29 |     """Watch a directory for file changes and invoke handler(change, path).
30 | 
31 |     Args:
32 |         root: The directory to watch.
33 |         handler: Callback invoked with (change_type, path) for each file change.
34 |         skip_hidden: If True, skip files/dirs starting with '.'. Ignored if
35 |             ignore_filter is provided.
36 |         ignore_filter: Optional custom filter function(path, root) -> bool.
37 |             Returns True if the path should be ignored. Overrides skip_hidden.
38 |         use_executor: If True, run handler in a thread pool executor.
39 | 
40 |     """
41 |     loop = asyncio.get_running_loop()
42 | 
43 |     # Determine which filter to use
44 |     if ignore_filter is not None:
45 |         should_skip = ignore_filter
46 |     elif skip_hidden:
47 |         should_skip = _default_skip_hidden
48 |     else:
49 |         should_skip = None
50 | 
51 |     async for changes in awatch(root):
52 |         for change_type, file_path_str in changes:
53 |             path = Path(file_path_str)
54 |             if path.is_dir():
55 |                 continue
56 | 
57 |             if should_skip is not None and should_skip(path, root):
58 |                 continue
59 | 
60 |             if use_executor:
61 |                 await loop.run_in_executor(None, handler, change_type, path)
62 |             else:
63 |                 handler(change_type, path)
64 | 


--------------------------------------------------------------------------------
/tests/agents/test_tts_common_extra.py:
--------------------------------------------------------------------------------
 1 | """Extra tests for the TTS common module."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from unittest.mock import AsyncMock, MagicMock, patch
 7 | 
 8 | import pytest
 9 | 
10 | from agent_cli import config
11 | from agent_cli.services.tts import _save_audio_file, handle_tts_playback
12 | 
13 | 
14 | @pytest.mark.asyncio
15 | @patch("agent_cli.services.tts.asyncio.to_thread")
16 | async def test_save_audio_file_os_error(mock_to_thread: AsyncMock) -> None:
17 |     """Test _save_audio_file with OSError."""
18 |     mock_to_thread.side_effect = OSError("Permission denied")
19 | 
20 |     await _save_audio_file(
21 |         b"audio data",
22 |         Path("test.wav"),
23 |         quiet=False,
24 |         logger=MagicMock(),
25 |     )
26 | 
27 |     mock_to_thread.assert_called_once()
28 | 
29 | 
30 | @pytest.mark.asyncio
31 | @patch("agent_cli.services.tts._speak_text", new_callable=AsyncMock)
32 | async def test_handle_tts_playback_os_error(mock_speak_text: AsyncMock) -> None:
33 |     """Test handle_tts_playback with OSError."""
34 |     mock_speak_text.side_effect = OSError("Connection error")
35 |     mock_live = MagicMock()
36 | 
37 |     provider_cfg = config.ProviderSelection(
38 |         tts_provider="wyoming",
39 |         asr_provider="wyoming",
40 |         llm_provider="ollama",
41 |     )
42 |     audio_out_cfg = config.AudioOutput(enable_tts=True)
43 |     wyoming_tts_cfg = config.WyomingTTS(tts_wyoming_ip="localhost", tts_wyoming_port=1234)
44 |     openai_tts_cfg = config.OpenAITTS(tts_openai_model="tts-1", tts_openai_voice="alloy")
45 |     kokoro_tts_cfg = config.KokoroTTS(
46 |         tts_kokoro_model="tts-1",
47 |         tts_kokoro_voice="alloy",
48 |         tts_kokoro_host="http://localhost:8000/v1",
49 |     )
50 | 
51 |     result = await handle_tts_playback(
52 |         text="hello",
53 |         provider_cfg=provider_cfg,
54 |         audio_output_cfg=audio_out_cfg,
55 |         wyoming_tts_cfg=wyoming_tts_cfg,
56 |         openai_tts_cfg=openai_tts_cfg,
57 |         kokoro_tts_cfg=kokoro_tts_cfg,
58 |         save_file=None,
59 |         quiet=False,
60 |         logger=MagicMock(),
61 |         live=mock_live,
62 |     )
63 | 
64 |     assert result is None
65 | 


--------------------------------------------------------------------------------
/agent_cli/rag/_indexer.py:
--------------------------------------------------------------------------------
 1 | """File watcher and indexing logic using watchfiles."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import logging
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | from watchfiles import Change
 9 | 
10 | from agent_cli.core.watch import watch_directory
11 | from agent_cli.rag._indexing import index_file, remove_file
12 | from agent_cli.rag._utils import should_ignore_path
13 | 
14 | if TYPE_CHECKING:
15 |     from pathlib import Path
16 | 
17 |     from chromadb import Collection
18 | 
19 | LOGGER = logging.getLogger(__name__)
20 | 
21 | 
22 | async def watch_docs(
23 |     collection: Collection,
24 |     docs_folder: Path,
25 |     file_hashes: dict[str, str],
26 |     file_mtimes: dict[str, float],
27 | ) -> None:
28 |     """Watch docs folder for changes and update index asynchronously."""
29 |     LOGGER.info("📁 Watching folder: %s", docs_folder)
30 | 
31 |     await watch_directory(
32 |         docs_folder,
33 |         lambda change, path: _handle_change(
34 |             change,
35 |             path,
36 |             collection,
37 |             docs_folder,
38 |             file_hashes,
39 |             file_mtimes,
40 |         ),
41 |         ignore_filter=should_ignore_path,
42 |     )
43 | 
44 | 
45 | def _handle_change(
46 |     change: Change,
47 |     file_path: Path,
48 |     collection: Collection,
49 |     docs_folder: Path,
50 |     file_hashes: dict[str, str],
51 |     file_mtimes: dict[str, float],
52 | ) -> None:
53 |     try:
54 |         if change == Change.deleted:
55 |             LOGGER.info("[deleted] Removing from index: %s", file_path.name)
56 |             remove_file(collection, docs_folder, file_path, file_hashes, file_mtimes)
57 |             return
58 |         if change in {Change.added, Change.modified} and file_path.is_file():
59 |             action = "created" if change == Change.added else "modified"
60 |             LOGGER.info("[%s] Indexing: %s", action, file_path.name)
61 |             index_file(collection, docs_folder, file_path, file_hashes, file_mtimes)
62 |     except (OSError, UnicodeDecodeError):
63 |         LOGGER.warning("Watcher handler transient IO error for %s", file_path, exc_info=True)
64 |     except Exception:
65 |         LOGGER.exception("Watcher handler failed for %s", file_path)
66 |         raise
67 | 


--------------------------------------------------------------------------------
/scripts/linux-hotkeys/README.md:
--------------------------------------------------------------------------------
 1 | # Linux Hotkeys
 2 | 
 3 | System-wide hotkeys for agent-cli voice AI features on Linux.
 4 | 
 5 | ## Setup
 6 | 
 7 | ```bash
 8 | ./setup-linux-hotkeys.sh
 9 | ```
10 | 
11 | The setup script will:
12 | 1. Install notification support if missing
13 | 2. Show you the exact hotkey bindings to add to your desktop environment
14 | 3. Provide copy-paste ready configuration for popular desktop environments
15 | 
16 | ## Usage
17 | 
18 | - **`Super+Shift+R`** → Toggle voice transcription (start/stop with result)
19 | - **`Super+Shift+A`** → Autocorrect clipboard text
20 | - **`Super+Shift+V`** → Toggle voice edit mode for clipboard
21 | 
22 | Results appear in notifications and clipboard.
23 | 
24 | ## Desktop Environment Support
25 | 
26 | The setup script provides copy-paste ready instructions for:
27 | 
28 | - **Hyprland**: Add bindings to `~/.config/hypr/hyprland.conf`
29 | - **Sway**: Add bindings to `~/.config/sway/config`
30 | - **i3**: Add bindings to `~/.config/i3/config`
31 | - **GNOME**: Use Settings → Keyboard → Custom Shortcuts
32 | - **KDE**: Use System Settings → Shortcuts → Custom Shortcuts
33 | - **XFCE**: Use Settings Manager → Keyboard → Application Shortcuts
34 | - **Other**: Manual hotkey configuration in your desktop environment
35 | 
36 | ## Features
37 | 
38 | - **Manual configuration**: Simple setup with clear instructions for each desktop environment
39 | - **Wayland support**: Includes clipboard syncing for Wayland compositors
40 | - **Fallback notifications**: Uses `notify-send`, `dunstify`, or console output
41 | - **Error handling**: Shows notifications for both success and failure cases
42 | - **PATH handling**: Scripts automatically find agent-cli installation
43 | 
44 | ## Troubleshooting
45 | 
46 | **Hotkeys not working?**
47 | - Check your desktop's keyboard shortcut settings for conflicts
48 | - Make sure you added the bindings to your desktop environment's config
49 | - Verify the script paths are correct
50 | 
51 | **No notifications?**
52 | ```bash
53 | sudo apt install libnotify-bin  # Ubuntu/Debian
54 | sudo dnf install libnotify      # Fedora/RHEL
55 | sudo pacman -S libnotify        # Arch
56 | ```
57 | 
58 | **Services not running?**
59 | ```bash
60 | ./start-all-services.sh
61 | ```
62 | 
63 | That's it! System-wide hotkeys for agent-cli on Linux.
64 | 


--------------------------------------------------------------------------------
/agent_cli/memory/_filters.py:
--------------------------------------------------------------------------------
 1 | """Filter conversion utilities for ChromaDB."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Any
 6 | 
 7 | 
 8 | def _convert_condition(key: str, value: Any) -> dict[str, Any] | None:
 9 |     """Convert a single filter condition to ChromaDB format."""
10 |     if isinstance(value, dict):
11 |         # Operator dict: {"gte": 10} → {"$gte": 10}
12 |         for op, val in value.items():
13 |             chroma_op = f"${op}" if not op.startswith("$") else op
14 |             return {key: {chroma_op: val}}
15 |         return None
16 |     # Simple equality
17 |     return {key: {"$eq": value}}
18 | 
19 | 
20 | def _process_or(conditions: list[dict[str, Any]]) -> dict[str, Any] | None:
21 |     """Process $or conditions."""
22 |     or_conditions = []
23 |     for cond in conditions:
24 |         for sub_key, sub_val in cond.items():
25 |             converted = _convert_condition(sub_key, sub_val)
26 |             if converted:
27 |                 or_conditions.append(converted)
28 |     if len(or_conditions) > 1:
29 |         return {"$or": or_conditions}
30 |     if or_conditions:
31 |         return or_conditions[0]
32 |     return None
33 | 
34 | 
35 | def to_chroma_where(filters: dict[str, Any] | None) -> dict[str, Any] | None:
36 |     """Convert universal filter format to ChromaDB WHERE clause.
37 | 
38 |     Supports:
39 |     - Simple equality: {"role": "user"} → {"role": {"$eq": "user"}}
40 |     - Operators: {"created_at": {"gte": "2024-01-01"}} → {"created_at": {"$gte": "2024-01-01"}}
41 |     - Logical OR: {"$or": [{"role": "user"}, {"role": "assistant"}]}
42 | 
43 |     Operators: eq, ne, gt, gte, lt, lte, in, nin
44 |     """
45 |     if not filters:
46 |         return None
47 | 
48 |     processed: list[dict[str, Any]] = []
49 |     for key, value in filters.items():
50 |         if key == "$or":
51 |             or_result = _process_or(value)
52 |             if or_result:
53 |                 processed.append(or_result)
54 |         elif not key.startswith("$"):
55 |             converted = _convert_condition(key, value)
56 |             if converted:
57 |                 processed.append(converted)
58 | 
59 |     if not processed:
60 |         return None
61 |     if len(processed) == 1:
62 |         return processed[0]
63 |     return {"$and": processed}
64 | 


--------------------------------------------------------------------------------
/tests/test_wyoming_utils.py:
--------------------------------------------------------------------------------
 1 | """Tests for the Wyoming utilities."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import logging
 6 | from unittest.mock import AsyncMock, MagicMock, patch
 7 | 
 8 | import pytest
 9 | from wyoming.client import AsyncClient
10 | 
11 | from agent_cli.services._wyoming_utils import wyoming_client_context
12 | 
13 | 
14 | @pytest.mark.asyncio
15 | async def test_wyoming_client_context_success():
16 |     """Test that the Wyoming client context manager connects successfully."""
17 |     mock_client = AsyncMock(spec=AsyncClient)
18 |     with patch(
19 |         "agent_cli.services._wyoming_utils.AsyncClient.from_uri",
20 |         return_value=MagicMock(
21 |             __aenter__=AsyncMock(return_value=mock_client),
22 |             __aexit__=AsyncMock(return_value=None),
23 |         ),
24 |     ):
25 |         async with wyoming_client_context("localhost", 1234, "Test", logging.getLogger()) as client:
26 |             assert client is mock_client
27 | 
28 | 
29 | @pytest.mark.asyncio
30 | async def test_wyoming_client_context_connection_refused(
31 |     caplog: pytest.LogCaptureFixture,
32 | ):
33 |     """Test that a ConnectionRefusedError is handled correctly."""
34 |     with (
35 |         patch(
36 |             "agent_cli.services._wyoming_utils.AsyncClient.from_uri",
37 |             side_effect=ConnectionRefusedError,
38 |         ),
39 |         pytest.raises(ConnectionRefusedError),
40 |     ):
41 |         async with wyoming_client_context("localhost", 1234, "Test", logging.getLogger()):
42 |             pass  # This part should not be reached
43 | 
44 |     assert "Test connection refused" in caplog.text
45 | 
46 | 
47 | @pytest.mark.asyncio
48 | async def test_wyoming_client_context_generic_exception(
49 |     caplog: pytest.LogCaptureFixture,
50 | ):
51 |     """Test that a generic Exception is handled correctly."""
52 |     with (
53 |         patch(
54 |             "agent_cli.services._wyoming_utils.AsyncClient.from_uri",
55 |             side_effect=RuntimeError("Something went wrong"),
56 |         ),
57 |         pytest.raises(RuntimeError),
58 |     ):
59 |         async with wyoming_client_context("localhost", 1234, "Test", logging.getLogger()):
60 |             pass  # This part should not be reached
61 | 
62 |     assert "An error occurred during test connection" in caplog.text
63 | 


--------------------------------------------------------------------------------
/agent_cli/core/transcription_logger.py:
--------------------------------------------------------------------------------
 1 | """Transcription logging utilities for automatic server-side logging."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import json
 6 | import logging
 7 | from datetime import UTC, datetime
 8 | from pathlib import Path
 9 | from typing import Any
10 | 
11 | 
12 | class TranscriptionLogger:
13 |     """Handles automatic logging of transcription results with timestamps."""
14 | 
15 |     def __init__(self, log_file: Path | str | None = None) -> None:
16 |         """Initialize the transcription logger.
17 | 
18 |         Args:
19 |             log_file: Path to the log file. If None, uses default location.
20 | 
21 |         """
22 |         if log_file is None:
23 |             log_file = Path.cwd() / "transcription_log.json"
24 |         elif isinstance(log_file, str):
25 |             log_file = Path(log_file)
26 | 
27 |         self.log_file = log_file
28 | 
29 |         # Ensure the log directory exists
30 |         self.log_file.parent.mkdir(parents=True, exist_ok=True)
31 | 
32 |     def log_transcription(
33 |         self,
34 |         *,
35 |         raw: str,
36 |         processed: str | None = None,
37 |     ) -> None:
38 |         """Log a transcription result.
39 | 
40 |         Args:
41 |             raw: The raw transcript from ASR.
42 |             processed: The processed transcript from LLM.
43 | 
44 |         """
45 |         log_entry: dict[str, Any] = {
46 |             "timestamp": datetime.now(UTC).isoformat(),
47 |             "raw": raw,
48 |             "processed": processed,
49 |         }
50 | 
51 |         # Write to log file as JSON Lines format
52 |         try:
53 |             with self.log_file.open("a", encoding="utf-8") as f:
54 |                 f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
55 |         except OSError:
56 |             # Use Python's logging module to log errors with the logger itself
57 |             logger = logging.getLogger(__name__)
58 |             logger.exception("Failed to write transcription log")
59 | 
60 | 
61 | # Default logger instance
62 | _default_logger: TranscriptionLogger | None = None
63 | 
64 | 
65 | def get_default_logger() -> TranscriptionLogger:
66 |     """Get the default transcription logger instance."""
67 |     global _default_logger
68 |     if _default_logger is None:
69 |         _default_logger = TranscriptionLogger()
70 |     return _default_logger
71 | 


--------------------------------------------------------------------------------
/tests/memory/test_indexer.py:
--------------------------------------------------------------------------------
 1 | """Indexer and watcher tests for file-based memory."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Any
 6 | 
 7 | from watchfiles import Change
 8 | 
 9 | from agent_cli.memory import _files as mem_files
10 | from agent_cli.memory import _indexer
11 | 
12 | 
13 | class _FakeCollection:
14 |     def __init__(self) -> None:
15 |         self.upserts: list[tuple[list[str], list[str], list[dict[str, Any]]]] = []
16 |         self.deleted: list[list[str]] = []
17 | 
18 |     def upsert(self, ids: list[str], documents: list[str], metadatas: list[dict[str, Any]]) -> None:
19 |         self.upserts.append((ids, documents, metadatas))
20 | 
21 |     def delete(self, ids: list[str]) -> None:
22 |         self.deleted.append(ids)
23 | 
24 | 
25 | def test_initial_index_deletes_stale_and_indexes_current(tmp_path: Any) -> None:
26 |     fake = _FakeCollection()
27 |     idx = _indexer.MemoryIndex.from_snapshot(tmp_path / "memory_index.json")
28 |     idx.entries["stale"] = mem_files.MemoryFileRecord(
29 |         id="stale",
30 |         path=tmp_path / "entries" / "default" / "stale.md",
31 |         metadata=mem_files.MemoryMetadata(conversation_id="c", role="memory", created_at="now"),  # type: ignore[attr-defined]
32 |         content="old",
33 |     )
34 | 
35 |     rec = mem_files.write_memory_file(
36 |         tmp_path,
37 |         conversation_id="c",
38 |         role="memory",
39 |         created_at="now",
40 |         content="fresh",
41 |     )
42 | 
43 |     _indexer.initial_index(fake, tmp_path, index=idx)
44 | 
45 |     assert fake.deleted == [["stale"]]
46 |     assert fake.upserts  # fresh file indexed
47 |     assert rec.id in idx.entries
48 | 
49 | 
50 | def test_handle_change_add_modify_delete(tmp_path: Any) -> None:
51 |     fake = _FakeCollection()
52 |     idx = _indexer.MemoryIndex(snapshot_path=None)
53 | 
54 |     rec = mem_files.write_memory_file(
55 |         tmp_path,
56 |         conversation_id="c",
57 |         role="memory",
58 |         created_at="now",
59 |         content="hello",
60 |     )
61 | 
62 |     _indexer._handle_change(Change.added, rec.path, fake, idx)
63 |     assert fake.upserts
64 |     assert rec.id in idx.entries
65 | 
66 |     _indexer._handle_change(Change.modified, rec.path, fake, idx)
67 |     assert len(fake.upserts) >= 2
68 | 
69 |     _indexer._handle_change(Change.deleted, rec.path, fake, idx)
70 |     assert fake.deleted
71 |     assert rec.id not in idx.entries
72 | 


--------------------------------------------------------------------------------
/tests/agents/test_wake_word_assistant.py:
--------------------------------------------------------------------------------
 1 | """Tests for the wake word assistant agent."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from unittest.mock import MagicMock, patch
 6 | 
 7 | from typer.testing import CliRunner
 8 | 
 9 | from agent_cli.cli import app
10 | 
11 | runner = CliRunner(env={"NO_COLOR": "1", "TERM": "dumb"})
12 | 
13 | 
14 | def test_assistant_help():
15 |     """Test the assistant --help command."""
16 |     result = runner.invoke(app, ["assistant", "--help"], env={"NO_COLOR": "1", "TERM": "dumb"})
17 |     assert result.exit_code == 0
18 |     assert "Usage: agent-cli assistant [OPTIONS]" in result.stdout
19 | 
20 | 
21 | @patch("agent_cli.agents.assistant.asyncio.run")
22 | def test_assistant_command(mock_asyncio_run: MagicMock):
23 |     """Test the assistant command."""
24 |     result = runner.invoke(app, ["assistant"])
25 |     assert result.exit_code == 0
26 |     mock_asyncio_run.assert_called_once()
27 | 
28 | 
29 | @patch("agent_cli.agents.assistant.stop_or_status_or_toggle")
30 | def test_assistant_stop(mock_stop_or_status_or_toggle: MagicMock):
31 |     """Test the assistant --stop command."""
32 |     result = runner.invoke(app, ["assistant", "--stop"])
33 |     assert result.exit_code == 0
34 |     mock_stop_or_status_or_toggle.assert_called_once_with(
35 |         "assistant",
36 |         "wake word assistant",
37 |         True,
38 |         False,
39 |         False,
40 |         quiet=False,
41 |     )
42 | 
43 | 
44 | @patch("agent_cli.agents.assistant.stop_or_status_or_toggle")
45 | def test_assistant_status(mock_stop_or_status_or_toggle: MagicMock):
46 |     """Test the assistant --status command."""
47 |     result = runner.invoke(app, ["assistant", "--status"])
48 |     assert result.exit_code == 0
49 |     mock_stop_or_status_or_toggle.assert_called_once_with(
50 |         "assistant",
51 |         "wake word assistant",
52 |         False,
53 |         True,
54 |         False,
55 |         quiet=False,
56 |     )
57 | 
58 | 
59 | @patch("agent_cli.agents.assistant.stop_or_status_or_toggle")
60 | def test_assistant_toggle(mock_stop_or_status_or_toggle: MagicMock):
61 |     """Test the assistant --toggle command."""
62 |     result = runner.invoke(app, ["assistant", "--toggle"])
63 |     assert result.exit_code == 0
64 |     mock_stop_or_status_or_toggle.assert_called_once_with(
65 |         "assistant",
66 |         "wake word assistant",
67 |         False,
68 |         False,
69 |         True,
70 |         quiet=False,
71 |     )
72 | 


--------------------------------------------------------------------------------
/tests/mocks/audio.py:
--------------------------------------------------------------------------------
 1 | """Mock SoundDevice for testing audio functionality without real hardware."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Any, Self
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | class MockSoundDeviceStream:
11 |     """Mock sounddevice stream for testing."""
12 | 
13 |     def __init__(self, *args: Any, **kwargs: Any) -> None:
14 |         """Initialize mock audio stream."""
15 |         self.args = args
16 |         self.kwargs = kwargs
17 |         self.is_input = kwargs.get("input", False) or isinstance(self, MockInputStream)
18 |         self.is_output = kwargs.get("output", False) or isinstance(self, MockOutputStream)
19 |         self.written_data: list[bytes] = []
20 |         self.active = False
21 |         self._closed = False
22 | 
23 |     def start(self) -> None:
24 |         """Start the mock stream."""
25 |         self.active = True
26 | 
27 |     def stop(self) -> None:
28 |         """Stop the mock stream."""
29 |         self.active = False
30 | 
31 |     def close(self) -> None:
32 |         """Close the mock stream."""
33 |         self._closed = True
34 |         self.active = False
35 | 
36 |     def read(self, frames: int) -> tuple[np.ndarray, bool]:
37 |         """Simulate reading from audio input device.
38 | 
39 |         Returns:
40 |             tuple: (data, overflow)
41 | 
42 |         """
43 |         dtype = self.kwargs.get("dtype", "int16")
44 |         channels = self.kwargs.get("channels", 1)
45 | 
46 |         shape = (frames, channels) if channels > 1 else (frames,)
47 | 
48 |         if dtype == "int16":
49 |             data = np.full(shape, 1, dtype=np.int16)
50 |         else:
51 |             data = np.zeros(shape, dtype=np.float32)
52 | 
53 |         return data, False
54 | 
55 |     def write(self, data: np.ndarray) -> None:
56 |         """Simulate writing to audio output device."""
57 |         # data is numpy array
58 |         self.written_data.append(data.tobytes())
59 | 
60 |     def get_written_data(self) -> bytes:
61 |         """Get all written data concatenated."""
62 |         return b"".join(self.written_data)
63 | 
64 |     def __enter__(self) -> Self:
65 |         """Context manager entry."""
66 |         self.start()
67 |         return self
68 | 
69 |     def __exit__(self, *args: object) -> None:
70 |         """Context manager exit."""
71 |         self.close()
72 | 
73 | 
74 | class MockInputStream(MockSoundDeviceStream):
75 |     """Mock input stream."""
76 | 
77 | 
78 | class MockOutputStream(MockSoundDeviceStream):
79 |     """Mock output stream."""
80 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | """Shared test fixtures and configuration."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import asyncio
 6 | import contextlib
 7 | import io
 8 | import logging
 9 | 
10 | import pytest
11 | from rich.console import Console
12 | 
13 | 
14 | def pytest_collection_modifyitems(items: list[pytest.Item]) -> None:
15 |     """Set default timeout for all tests."""
16 |     for item in items:
17 |         with contextlib.suppress(AttributeError):
18 |             item.add_marker(pytest.mark.timeout(3))
19 | 
20 | 
21 | @pytest.fixture
22 | def mock_console() -> Console:
23 |     """Provide a console that writes to a StringIO for testing."""
24 |     return Console(file=io.StringIO(), width=80, force_terminal=True)
25 | 
26 | 
27 | @pytest.fixture
28 | def mock_logger() -> logging.Logger:
29 |     """Provide a mock logger for testing."""
30 |     logger = logging.getLogger("test")
31 |     logger.setLevel(logging.DEBUG)
32 |     return logger
33 | 
34 | 
35 | @pytest.fixture
36 | def stop_event() -> asyncio.Event:
37 |     """Provide an asyncio event for stopping operations."""
38 |     return asyncio.Event()
39 | 
40 | 
41 | @pytest.fixture
42 | def timeout_seconds() -> float:
43 |     """Default timeout for async operations in tests."""
44 |     return 5.0
45 | 
46 | 
47 | @pytest.fixture
48 | def mock_audio_device_info() -> list[dict]:
49 |     """Mock audio device info for testing."""
50 |     return [
51 |         {
52 |             "index": 0,
53 |             "name": "Mock Input Device",
54 |             "max_input_channels": 2,
55 |             "max_output_channels": 0,
56 |             "default_samplerate": 44100.0,
57 |         },
58 |         {
59 |             "index": 1,
60 |             "name": "Mock Output Device",
61 |             "max_input_channels": 0,
62 |             "max_output_channels": 2,
63 |             "default_samplerate": 44100.0,
64 |         },
65 |         {
66 |             "index": 2,
67 |             "name": "Mock Combined Device",
68 |             "max_input_channels": 2,
69 |             "max_output_channels": 2,
70 |             "default_samplerate": 44100.0,
71 |         },
72 |     ]
73 | 
74 | 
75 | @pytest.fixture
76 | def llm_responses() -> dict[str, str]:
77 |     """Predefined LLM responses for testing."""
78 |     return {
79 |         "correct": "This text has been corrected and improved.",
80 |         "hello": "Hello! How can I help you today?",
81 |         "question": "The meaning of life is 42, according to The Hitchhiker's Guide to the Galaxy.",
82 |         "default": "I understand your request and here is my response.",
83 |     }
84 | 


--------------------------------------------------------------------------------
/scripts/nvidia-asr-server/README.md:
--------------------------------------------------------------------------------
  1 | # NVIDIA ASR Server
  2 | 
  3 | OpenAI-compatible API server for NVIDIA ASR models.
  4 | 
  5 | ## Quick Start
  6 | 
  7 | ```bash
  8 | cd scripts/nvidia-asr-server
  9 | uv run server.py
 10 | ```
 11 | 
 12 | Server runs at `http://localhost:9898`
 13 | 
 14 | ## CLI Options
 15 | 
 16 | - `--model`, `-m`: Model to use (default: `canary-qwen-2.5b`)
 17 |   - `canary-qwen-2.5b`: Multilingual ASR (~5GB VRAM)
 18 |   - `parakeet-tdt-0.6b-v2`: English with timestamps (~2GB VRAM)
 19 | - `--port`, `-p`: Port (default: 9898)
 20 | - `--device`, `-d`: Device (default: auto-select best GPU)
 21 | 
 22 | ```bash
 23 | # Examples
 24 | uv run server.py --model parakeet-tdt-0.6b-v2
 25 | uv run server.py -m parakeet-tdt-0.6b-v2 -p 9090 -d cuda:1
 26 | ```
 27 | 
 28 | ## Using with Agent-CLI
 29 | 
 30 | ```bash
 31 | # Start server
 32 | cd scripts/nvidia-asr-server
 33 | uv run server.py
 34 | 
 35 | # In another terminal
 36 | agent-cli transcribe \
 37 |   --asr-provider openai \
 38 |   --asr-openai-base-url http://localhost:9898/v1
 39 | ```
 40 | 
 41 | **Note**: The `/v1` suffix is required for OpenAI compatibility.
 42 | 
 43 | ## API Usage
 44 | 
 45 | ### Python Example
 46 | 
 47 | ```python
 48 | import requests
 49 | 
 50 | with open("audio.wav", "rb") as f:
 51 |     response = requests.post(
 52 |         "http://localhost:9898/v1/audio/transcriptions",
 53 |         files={"file": f},
 54 |         data={"model": "parakeet-tdt-0.6b-v2"}
 55 |     )
 56 | 
 57 | print(response.json()["text"])
 58 | ```
 59 | 
 60 | ### With Timestamps (Parakeet only)
 61 | 
 62 | ```python
 63 | response = requests.post(
 64 |     "http://localhost:9898/v1/audio/transcriptions",
 65 |     files={"file": open("audio.wav", "rb")},
 66 |     data={
 67 |         "model": "parakeet-tdt-0.6b-v2",
 68 |         "timestamp_granularities": ["word"]
 69 |     }
 70 | )
 71 | 
 72 | result = response.json()
 73 | for word in result.get("words", []):
 74 |     print(f"{word['start']:.2f}s - {word['end']:.2f}s: {word['word']}")
 75 | ```
 76 | 
 77 | ## Requirements
 78 | 
 79 | - Python 3.13+
 80 | - CUDA-compatible GPU (recommended)
 81 | - ~2-5GB VRAM depending on model
 82 | 
 83 | ## Troubleshooting
 84 | 
 85 | **GPU out of memory**: Try smaller model or CPU
 86 | ```bash
 87 | uv run server.py --model parakeet-tdt-0.6b-v2
 88 | uv run server.py --device cpu
 89 | ```
 90 | 
 91 | **Port in use**: Change port
 92 | ```bash
 93 | uv run server.py --port 9999
 94 | ```
 95 | 
 96 | ## License
 97 | 
 98 | - Canary: NVIDIA AI Foundation Models Community License
 99 | - Parakeet: CC-BY-4.0
100 | 


--------------------------------------------------------------------------------
/agent_cli/core/sse.py:
--------------------------------------------------------------------------------
 1 | """Shared SSE (Server-Sent Events) formatting helpers for OpenAI-compatible streaming."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import json
 6 | import time
 7 | from typing import Any
 8 | 
 9 | 
10 | def format_chunk(
11 |     run_id: str,
12 |     model: str,
13 |     *,
14 |     content: str | None = None,
15 |     finish_reason: str | None = None,
16 |     extra: dict[str, Any] | None = None,
17 | ) -> str:
18 |     """Format a single SSE chunk in OpenAI chat.completion.chunk format.
19 | 
20 |     Args:
21 |         run_id: Unique identifier for this completion.
22 |         model: Model name to include in response.
23 |         content: Text content delta (None for finish chunk).
24 |         finish_reason: Reason for completion (e.g., "stop").
25 |         extra: Additional fields to include in the response.
26 | 
27 |     Returns:
28 |         Formatted SSE data line.
29 | 
30 |     """
31 |     data: dict[str, Any] = {
32 |         "id": f"chatcmpl-{run_id}",
33 |         "object": "chat.completion.chunk",
34 |         "created": int(time.time()),
35 |         "model": model,
36 |         "choices": [
37 |             {
38 |                 "index": 0,
39 |                 "delta": {"content": content} if content else {},
40 |                 "finish_reason": finish_reason,
41 |             },
42 |         ],
43 |     }
44 |     if extra:
45 |         data.update(extra)
46 |     return f"data: {json.dumps(data)}\n\n"
47 | 
48 | 
49 | def format_done() -> str:
50 |     """Format the terminal [DONE] SSE message."""
51 |     return "data: [DONE]\n\n"
52 | 
53 | 
54 | def parse_chunk(line: str) -> dict[str, Any] | None:
55 |     """Parse an SSE data line into a dict.
56 | 
57 |     Args:
58 |         line: Raw SSE line (e.g., "data: {...}").
59 | 
60 |     Returns:
61 |         Parsed JSON dict, or None if not parseable or [DONE].
62 | 
63 |     """
64 |     if not line.startswith("data:"):
65 |         return None
66 |     payload = line[5:].strip()
67 |     if payload == "[DONE]":
68 |         return None
69 |     try:
70 |         return json.loads(payload)
71 |     except json.JSONDecodeError:
72 |         return None
73 | 
74 | 
75 | def extract_content_from_chunk(chunk: dict[str, Any]) -> str:
76 |     """Extract text content from a parsed SSE chunk.
77 | 
78 |     Args:
79 |         chunk: Parsed chunk dict from parse_chunk().
80 | 
81 |     Returns:
82 |         Content string, or empty string if not found.
83 | 
84 |     """
85 |     choices = chunk.get("choices") or [{}]
86 |     delta = choices[0].get("delta") or {}
87 |     return delta.get("content") or delta.get("text") or ""
88 | 


--------------------------------------------------------------------------------
/tests/rag/test_history.py:
--------------------------------------------------------------------------------
 1 | """Test history preservation in RAG engine."""
 2 | 
 3 | from pathlib import Path
 4 | from unittest.mock import AsyncMock, MagicMock, patch
 5 | 
 6 | import pytest
 7 | from pydantic_ai.messages import ModelRequest, ModelResponse
 8 | 
 9 | from agent_cli.rag import engine
10 | from agent_cli.rag.models import ChatRequest, Message
11 | 
12 | 
13 | @pytest.mark.asyncio
14 | async def test_process_chat_request_preserves_history(tmp_path: Path) -> None:
15 |     """Test that conversation history is correctly passed to the agent."""
16 |     mock_collection = MagicMock()
17 |     mock_reranker = MagicMock()
18 | 
19 |     # Mock Agent Run
20 |     mock_run_result = MagicMock()
21 |     mock_run_result.output = "Response"
22 |     mock_run_result.run_id = "test-id"
23 |     mock_run_result.usage.return_value = None
24 | 
25 |     with (
26 |         patch("pydantic_ai.Agent.run", new_callable=AsyncMock) as mock_run,
27 |         patch("agent_cli.rag.engine.search_context") as mock_search,
28 |     ):
29 |         mock_run.return_value = mock_run_result
30 |         mock_search.return_value = MagicMock(context="")  # No RAG context for this test
31 | 
32 |         # Create a multi-turn conversation
33 |         messages = [
34 |             Message(role="system", content="System prompt"),
35 |             Message(role="user", content="Question 1"),
36 |             Message(role="assistant", content="Answer 1"),
37 |             Message(role="user", content="Question 2"),
38 |         ]
39 |         req = ChatRequest(model="test", messages=messages)
40 | 
41 |         await engine.process_chat_request(
42 |             req,
43 |             mock_collection,
44 |             mock_reranker,
45 |             "http://mock",
46 |             docs_folder=tmp_path,
47 |         )
48 | 
49 |         # Verify Agent.run was called
50 |         mock_run.assert_called_once()
51 | 
52 |         # Check arguments
53 |         call_args = mock_run.call_args
54 |         # positional args: prompt (user_prompt)
55 |         prompt = call_args[0][0]
56 |         assert prompt == "Question 2"
57 | 
58 |         # keyword args: message_history
59 |         history = call_args[1]["message_history"]
60 |         assert len(history) == 3
61 | 
62 |         # Verify types and content of history
63 |         assert isinstance(history[0], ModelRequest)
64 |         assert history[0].parts[0].content == "System prompt"
65 | 
66 |         assert isinstance(history[1], ModelRequest)
67 |         assert history[1].parts[0].content == "Question 1"
68 | 
69 |         assert isinstance(history[2], ModelResponse)
70 |         assert history[2].parts[0].content == "Answer 1"
71 | 


--------------------------------------------------------------------------------
/agent_cli/cli.py:
--------------------------------------------------------------------------------
 1 | """Shared CLI functionality for the Agent CLI tools."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Annotated
 6 | 
 7 | import typer
 8 | 
 9 | from . import __version__
10 | from .config import load_config, normalize_provider_defaults
11 | from .core.utils import console
12 | 
13 | app = typer.Typer(
14 |     name="agent-cli",
15 |     help="A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.",
16 |     add_completion=True,
17 |     context_settings={"help_option_names": ["-h", "--help"]},
18 |     rich_markup_mode="markdown",
19 | )
20 | 
21 | 
22 | def _version_callback(value: bool) -> None:
23 |     if value:
24 |         console.print(f"agent-cli {__version__}")
25 |         raise typer.Exit
26 | 
27 | 
28 | @app.callback(invoke_without_command=True)
29 | def main(
30 |     ctx: typer.Context,
31 |     version: Annotated[  # noqa: ARG001
32 |         bool,
33 |         typer.Option(
34 |             "-v",
35 |             "--version",
36 |             callback=_version_callback,
37 |             is_eager=True,
38 |             help="Show version and exit.",
39 |         ),
40 |     ] = False,
41 | ) -> None:
42 |     """A suite of AI-powered tools."""
43 |     if ctx.invoked_subcommand is None:
44 |         console.print("[bold red]No command specified.[/bold red]")
45 |         console.print("[bold yellow]Running --help for your convenience.[/bold yellow]")
46 |         console.print(ctx.get_help())
47 |         raise typer.Exit
48 |     import dotenv  # noqa: PLC0415
49 | 
50 |     dotenv.load_dotenv()
51 |     print()
52 | 
53 | 
54 | def set_config_defaults(ctx: typer.Context, config_file: str | None) -> None:
55 |     """Set the default values for the CLI based on the config file."""
56 |     config = load_config(config_file)
57 |     wildcard_config = normalize_provider_defaults(config.get("defaults", {}))
58 | 
59 |     command_key = ctx.command.name or ""
60 |     if not command_key:
61 |         ctx.default_map = wildcard_config
62 |         return
63 | 
64 |     # For nested subcommands (e.g., "memory proxy"), build "memory.proxy"
65 |     if ctx.parent and ctx.parent.command.name and ctx.parent.command.name != "agent-cli":
66 |         command_key = f"{ctx.parent.command.name}.{command_key}"
67 | 
68 |     command_config = normalize_provider_defaults(config.get(command_key, {}))
69 |     ctx.default_map = {**wildcard_config, **command_config}
70 | 
71 | 
72 | # Import commands from other modules to register them
73 | from . import config_cmd  # noqa: E402, F401
74 | from .agents import (  # noqa: E402, F401
75 |     assistant,
76 |     autocorrect,
77 |     chat,
78 |     memory,
79 |     rag_proxy,
80 |     server,
81 |     speak,
82 |     transcribe,
83 |     voice_edit,
84 | )
85 | from .install import hotkeys, services  # noqa: E402, F401
86 | 


--------------------------------------------------------------------------------
/tests/agents/test_speak_e2e.py:
--------------------------------------------------------------------------------
 1 | """End-to-end tests for the speak agent with simplified mocks."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from types import SimpleNamespace
 6 | from unittest.mock import MagicMock, patch
 7 | 
 8 | import pytest
 9 | 
10 | from agent_cli import config
11 | from agent_cli.agents.speak import _async_main
12 | from tests.mocks.audio import MockSoundDeviceStream
13 | from tests.mocks.wyoming import MockTTSClient
14 | 
15 | 
16 | @pytest.mark.asyncio
17 | @patch("agent_cli.services.tts.wyoming_client_context")
18 | @patch("agent_cli.agents.speak.setup_devices")
19 | @patch("agent_cli.services.tts.setup_output_stream")
20 | @patch("agent_cli.services.tts.open_audio_stream")
21 | async def test_speak_e2e(
22 |     mock_open_audio_stream: MagicMock,
23 |     mock_setup_output_stream: MagicMock,
24 |     mock_setup_devices: MagicMock,
25 |     mock_wyoming_client_context: MagicMock,
26 | ) -> None:
27 |     """Test end-to-end speech synthesis with simplified mocks."""
28 |     # Setup mock stream
29 |     mock_stream = MockSoundDeviceStream(output=True)
30 |     mock_open_audio_stream.return_value.__enter__.return_value = mock_stream
31 | 
32 |     # Setup device info (input_index, input_name, output_index)
33 |     mock_setup_devices.return_value = (None, None, 0)
34 |     mock_setup_output_stream.return_value = SimpleNamespace(dtype="int16")
35 | 
36 |     # Setup mock Wyoming client
37 |     mock_tts_client = MockTTSClient(b"fake audio data")
38 |     mock_wyoming_client_context.return_value.__aenter__.return_value = mock_tts_client
39 | 
40 |     general_cfg = config.General(
41 |         log_level="INFO",
42 |         log_file=None,
43 |         list_devices=False,
44 |         quiet=False,
45 |         clipboard=False,
46 |         save_file=None,
47 |     )
48 |     provider_cfg = config.ProviderSelection(
49 |         tts_provider="wyoming",
50 |         asr_provider="wyoming",
51 |         llm_provider="ollama",
52 |     )
53 |     audio_out_cfg = config.AudioOutput(enable_tts=True)
54 |     wyoming_tts_cfg = config.WyomingTTS(
55 |         tts_wyoming_ip="mock-host",
56 |         tts_wyoming_port=10200,
57 |     )
58 |     openai_tts_cfg = config.OpenAITTS(tts_openai_model="tts-1", tts_openai_voice="alloy")
59 |     kokoro_tts_cfg = config.KokoroTTS(
60 |         tts_kokoro_model="tts-1",
61 |         tts_kokoro_voice="alloy",
62 |         tts_kokoro_host="http://localhost:8000/v1",
63 |     )
64 | 
65 |     await _async_main(
66 |         general_cfg=general_cfg,
67 |         text="Hello, world!",
68 |         provider_cfg=provider_cfg,
69 |         audio_out_cfg=audio_out_cfg,
70 |         wyoming_tts_cfg=wyoming_tts_cfg,
71 |         openai_tts_cfg=openai_tts_cfg,
72 |         kokoro_tts_cfg=kokoro_tts_cfg,
73 |     )
74 | 
75 |     # Verify that the audio was "played"
76 |     mock_wyoming_client_context.assert_called_once()
77 |     assert mock_stream.get_written_data()
78 | 


--------------------------------------------------------------------------------
/docs/installation/README.md:
--------------------------------------------------------------------------------
 1 | # Installation Guide
 2 | 
 3 | Choose the best installation method for your platform and performance needs.
 4 | 
 5 | ## Quick Platform Guide
 6 | 
 7 | | Platform         | Recommended Method             | GPU Support   | Performance |
 8 | | ---------------- | ------------------------------ | ------------- | ----------- |
 9 | | **macOS**        | [Native Setup](macos.md)       | ✅ Metal GPU  | Best        |
10 | | **Linux**        | [Native Setup](linux.md)       | ✅ NVIDIA GPU | Best        |
11 | | **NixOS**        | [System Integration](nixos.md) | ✅ NVIDIA GPU | Best        |
12 | | **Any Platform** | [Docker Setup](docker.md)      | ⚠️ Limited\*  | Good        |
13 | 
14 | > **Note**: Docker on macOS does not support GPU acceleration. For best performance on Mac, use the native setup.
15 | 
16 | ## Installation Methods
17 | 
18 | ### 🍎 macOS Native (Recommended)
19 | 
20 | **Best performance with Metal GPU acceleration**
21 | 
22 | - Full GPU acceleration for Ollama
23 | - Optimized for Apple Silicon
24 | - Native macOS integrations
25 | 
26 | 👉 [Follow macOS Setup Guide](macos.md)
27 | 
28 | ### 🐧 Linux Native (Recommended)
29 | 
30 | **Best performance with NVIDIA GPU acceleration**
31 | 
32 | - NVIDIA GPU support
33 | - Full system integration
34 | - Optimal resource usage
35 | 
36 | 👉 [Follow Linux Setup Guide](linux.md)
37 | 
38 | ### ❄️ NixOS System Integration
39 | 
40 | **Declarative system configuration with GPU support**
41 | 
42 | - System-level service integration
43 | - Declarative configuration
44 | - Automatic service management
45 | 
46 | 👉 [Follow NixOS Setup Guide](nixos.md)
47 | 
48 | ### 🐳 Docker (Cross-platform)
49 | 
50 | **Universal solution, some limitations**
51 | 
52 | - Works on any platform
53 | - Consistent environment
54 | - ⚠️ No GPU acceleration on macOS
55 | - ⚠️ Limited GPU support on other platforms
56 | 
57 | 👉 [Follow Docker Setup Guide](docker.md)
58 | 
59 | ## What Gets Installed
60 | 
61 | All installation methods set up these services:
62 | 
63 | - **🧠 Ollama** - LLM server (gemma3:4b model)
64 | - **🎤 Wyoming Faster Whisper** - Speech-to-text
65 | - **🗣️ Wyoming Piper** - Text-to-speech
66 | - **👂 Wyoming OpenWakeWord** - Wake word detection
67 | 
68 | ## Service Ports
69 | 
70 | All methods use the same ports:
71 | 
72 | - Ollama (LLM): `11434`
73 | - Whisper (ASR): `10300`
74 | - Piper (TTS): `10200`
75 | - OpenWakeWord: `10400`
76 | 
77 | ## After Installation
78 | 
79 | Once services are running, install the agent-cli package:
80 | 
81 | ```bash
82 | # Using uv (recommended)
83 | uv tools install agent-cli
84 | 
85 | # Using pip
86 | pip install agent-cli
87 | ```
88 | 
89 | Then test with:
90 | 
91 | ```bash
92 | agent-cli autocorrect --help
93 | ```
94 | 
95 | ## Need Help?
96 | 
97 | - Check the troubleshooting section in your chosen installation guide
98 | - Open an issue on [GitHub](https://github.com/basnijholt/agent-cli/issues)
99 | 


--------------------------------------------------------------------------------
/tests/core/test_sse.py:
--------------------------------------------------------------------------------
 1 | """Tests for SSE formatting helpers."""
 2 | 
 3 | import json
 4 | 
 5 | from agent_cli.core.sse import (
 6 |     extract_content_from_chunk,
 7 |     format_chunk,
 8 |     format_done,
 9 |     parse_chunk,
10 | )
11 | 
12 | 
13 | def test_format_chunk_with_content() -> None:
14 |     """Test formatting a chunk with content."""
15 |     result = format_chunk("test-id", "gpt-4", content="Hello")
16 |     assert result.startswith("data: ")
17 |     assert result.endswith("\n\n")
18 | 
19 |     data = json.loads(result[6:])
20 |     assert data["id"] == "chatcmpl-test-id"
21 |     assert data["object"] == "chat.completion.chunk"
22 |     assert data["model"] == "gpt-4"
23 |     assert data["choices"][0]["delta"]["content"] == "Hello"
24 |     assert data["choices"][0]["finish_reason"] is None
25 | 
26 | 
27 | def test_format_chunk_finish() -> None:
28 |     """Test formatting a finish chunk."""
29 |     result = format_chunk("test-id", "gpt-4", finish_reason="stop")
30 |     data = json.loads(result[6:])
31 | 
32 |     assert data["choices"][0]["delta"] == {}
33 |     assert data["choices"][0]["finish_reason"] == "stop"
34 | 
35 | 
36 | def test_format_chunk_with_extra() -> None:
37 |     """Test formatting a chunk with extra fields."""
38 |     extra = {"rag_sources": [{"path": "test.md"}]}
39 |     result = format_chunk("test-id", "gpt-4", finish_reason="stop", extra=extra)
40 |     data = json.loads(result[6:])
41 | 
42 |     assert data["rag_sources"] == [{"path": "test.md"}]
43 | 
44 | 
45 | def test_format_done() -> None:
46 |     """Test formatting the done message."""
47 |     assert format_done() == "data: [DONE]\n\n"
48 | 
49 | 
50 | def test_parse_chunk_valid() -> None:
51 |     """Test parsing a valid chunk."""
52 |     line = 'data: {"choices": [{"delta": {"content": "Hi"}}]}'
53 |     result = parse_chunk(line)
54 | 
55 |     assert result is not None
56 |     assert result["choices"][0]["delta"]["content"] == "Hi"
57 | 
58 | 
59 | def test_parse_chunk_done() -> None:
60 |     """Test parsing the done message returns None."""
61 |     assert parse_chunk("data: [DONE]") is None
62 | 
63 | 
64 | def test_parse_chunk_invalid() -> None:
65 |     """Test parsing invalid input returns None."""
66 |     assert parse_chunk("not a data line") is None
67 |     assert parse_chunk("data: {invalid json}") is None
68 | 
69 | 
70 | def test_extract_content_from_chunk() -> None:
71 |     """Test extracting content from a parsed chunk."""
72 |     chunk = {"choices": [{"delta": {"content": "Hello"}}]}
73 |     assert extract_content_from_chunk(chunk) == "Hello"
74 | 
75 |     # Empty delta
76 |     chunk_empty: dict[str, list[dict[str, dict[str, str]]]] = {"choices": [{"delta": {}}]}
77 |     assert extract_content_from_chunk(chunk_empty) == ""
78 | 
79 |     # Alternative text field
80 |     chunk_text = {"choices": [{"delta": {"text": "World"}}]}
81 |     assert extract_content_from_chunk(chunk_text) == "World"
82 | 


--------------------------------------------------------------------------------
/agent_cli/memory/models.py:
--------------------------------------------------------------------------------
  1 | """Memory data models."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from typing import Literal
  6 | 
  7 | from pydantic import BaseModel, ConfigDict, field_validator
  8 | 
  9 | 
 10 | class Message(BaseModel):
 11 |     """Chat message model."""
 12 | 
 13 |     role: str
 14 |     content: str
 15 | 
 16 | 
 17 | class ChatRequest(BaseModel):
 18 |     """Chat completion request model with long-term memory support."""
 19 | 
 20 |     model_config = ConfigDict(extra="allow")
 21 | 
 22 |     model: str
 23 |     messages: list[Message]
 24 |     temperature: float | None = 0.7
 25 |     max_tokens: int | None = 1000
 26 |     stream: bool | None = False
 27 |     memory_id: str | None = None
 28 |     memory_top_k: int | None = None
 29 |     memory_recency_weight: float | None = None
 30 |     memory_score_threshold: float | None = None
 31 | 
 32 | 
 33 | class MemoryEntry(BaseModel):
 34 |     """Stored memory entry."""
 35 | 
 36 |     role: str
 37 |     content: str
 38 |     created_at: str
 39 |     score: float | None = None
 40 | 
 41 | 
 42 | class MemoryMetadata(BaseModel):
 43 |     """Metadata for a stored memory document."""
 44 | 
 45 |     conversation_id: str
 46 |     role: str
 47 |     created_at: str
 48 |     summary_kind: str | None = None
 49 |     replaced_by: str | None = None
 50 |     source_id: str | None = None
 51 | 
 52 | 
 53 | class SummaryOutput(BaseModel):
 54 |     """Structured summary returned by the LLM."""
 55 | 
 56 |     summary: str
 57 | 
 58 |     @field_validator("summary")
 59 |     @classmethod
 60 |     def _not_empty(cls, v: str) -> str:
 61 |         if not v or not str(v).strip():
 62 |             msg = "field must be non-empty"
 63 |             raise ValueError(msg)
 64 |         return str(v).strip()
 65 | 
 66 | 
 67 | class StoredMemory(BaseModel):
 68 |     """Memory document as stored in the vector DB."""
 69 | 
 70 |     id: str
 71 |     content: str
 72 |     metadata: MemoryMetadata
 73 |     distance: float | None = None
 74 |     embedding: list[float] | None = None
 75 | 
 76 | 
 77 | class MemoryRetrieval(BaseModel):
 78 |     """Result of a memory retrieval operation."""
 79 | 
 80 |     entries: list[MemoryEntry]
 81 | 
 82 | 
 83 | class MemoryAdd(BaseModel):
 84 |     """Add a new memory fact."""
 85 | 
 86 |     event: Literal["ADD"] = "ADD"
 87 |     text: str
 88 | 
 89 | 
 90 | class MemoryUpdate(BaseModel):
 91 |     """Update an existing memory fact."""
 92 | 
 93 |     event: Literal["UPDATE"] = "UPDATE"
 94 |     id: int
 95 |     text: str
 96 | 
 97 | 
 98 | class MemoryDelete(BaseModel):
 99 |     """Delete an existing memory fact."""
100 | 
101 |     event: Literal["DELETE"] = "DELETE"
102 |     id: int
103 | 
104 | 
105 | class MemoryIgnore(BaseModel):
106 |     """Keep an existing memory as is."""
107 | 
108 |     event: Literal["NONE"] = "NONE"
109 |     id: int
110 | 
111 | 
112 | MemoryDecision = MemoryAdd | MemoryUpdate | MemoryDelete | MemoryIgnore
113 | 


--------------------------------------------------------------------------------
/scripts/setup-linux-hotkeys.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | echo "⌨️ Setting up Linux hotkeys..."
 6 | 
 7 | # Check if we're on Linux
 8 | if [[ "$(uname)" != "Linux" ]]; then
 9 |     echo "❌ This script is for Linux only"
10 |     exit 1
11 | fi
12 | 
13 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
14 | 
15 | TRANSCRIBE_SCRIPT="$SCRIPT_DIR/linux-hotkeys/toggle-transcription.sh"
16 | AUTOCORRECT_SCRIPT="$SCRIPT_DIR/linux-hotkeys/toggle-autocorrect.sh"
17 | VOICE_EDIT_SCRIPT="$SCRIPT_DIR/linux-hotkeys/toggle-voice-edit.sh"
18 | 
19 | # Install notifications if missing
20 | echo "📢 Checking notifications..."
21 | if ! command -v notify-send &> /dev/null && ! command -v dunstify &> /dev/null; then
22 |     echo "📦 Installing notification support..."
23 |     if command -v apt &> /dev/null; then
24 |         sudo apt install -y libnotify-bin
25 |     elif command -v dnf &> /dev/null; then
26 |         sudo dnf install -y libnotify
27 |     elif command -v pacman &> /dev/null; then
28 |         sudo pacman -S --noconfirm libnotify
29 |     elif command -v zypper &> /dev/null; then
30 |         sudo zypper install -y libnotify-tools
31 |     else
32 |         echo "⚠️ Please install libnotify manually for your distribution"
33 |     fi
34 | fi
35 | 
36 | # Test notifications
37 | if command -v notify-send &> /dev/null; then
38 |     notify-send "🎙️ Setup Complete" "Agent-CLI hotkeys ready!" || echo "⚠️ Notifications may not work in your environment"
39 | elif command -v dunstify &> /dev/null; then
40 |     dunstify "🎙️ Setup Complete" "Agent-CLI hotkeys ready!" || echo "⚠️ Notifications may not work in your environment"
41 | fi
42 | 
43 | echo ""
44 | echo "✅ Scripts ready! Add these hotkeys to your desktop environment:"
45 | echo ""
46 | echo "📋 Hotkey Bindings:"
47 | echo "  Super+Shift+R → $TRANSCRIBE_SCRIPT"
48 | echo "  Super+Shift+A → $AUTOCORRECT_SCRIPT"
49 | echo "  Super+Shift+V → $VOICE_EDIT_SCRIPT"
50 | echo ""
51 | echo "🖥️ Configuration by Desktop Environment:"
52 | echo ""
53 | echo "Hyprland (~/.config/hypr/hyprland.conf):"
54 | echo "  bind = SUPER SHIFT, R, exec, $TRANSCRIBE_SCRIPT"
55 | echo "  bind = SUPER SHIFT, A, exec, $AUTOCORRECT_SCRIPT"
56 | echo "  bind = SUPER SHIFT, V, exec, $VOICE_EDIT_SCRIPT"
57 | echo ""
58 | echo "Sway (~/.config/sway/config):"
59 | echo "  bindsym \$mod+Shift+r exec $TRANSCRIBE_SCRIPT"
60 | echo "  bindsym \$mod+Shift+a exec $AUTOCORRECT_SCRIPT"
61 | echo "  bindsym \$mod+Shift+v exec $VOICE_EDIT_SCRIPT"
62 | echo ""
63 | echo "i3 (~/.config/i3/config):"
64 | echo "  bindsym \$mod+Shift+r exec --no-startup-id $TRANSCRIBE_SCRIPT"
65 | echo "  bindsym \$mod+Shift+a exec --no-startup-id $AUTOCORRECT_SCRIPT"
66 | echo "  bindsym \$mod+Shift+v exec --no-startup-id $VOICE_EDIT_SCRIPT"
67 | echo ""
68 | echo "GNOME: Settings → Keyboard → View and Customize Shortcuts → Custom Shortcuts"
69 | echo "KDE: System Settings → Shortcuts → Custom Shortcuts"
70 | echo "XFCE: Settings Manager → Keyboard → Application Shortcuts"
71 | echo ""
72 | echo "For other environments, bind Super+Shift+R/A/V to the respective scripts."
73 | 


--------------------------------------------------------------------------------
/agent_cli/core/chroma.py:
--------------------------------------------------------------------------------
 1 | """Shared ChromaDB helpers."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Any
 6 | 
 7 | import chromadb
 8 | from chromadb.config import Settings
 9 | from chromadb.utils import embedding_functions
10 | 
11 | from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL
12 | 
13 | if TYPE_CHECKING:
14 |     from collections.abc import Mapping, Sequence
15 |     from pathlib import Path
16 | 
17 |     from chromadb import Collection
18 |     from pydantic import BaseModel
19 | 
20 | 
21 | def init_collection(
22 |     persistence_path: Path,
23 |     *,
24 |     name: str,
25 |     embedding_model: str = DEFAULT_OPENAI_EMBEDDING_MODEL,
26 |     openai_base_url: str | None = None,
27 |     openai_api_key: str | None = None,
28 |     subdir: str | None = None,
29 | ) -> Collection:
30 |     """Initialize a Chroma collection with OpenAI-compatible embeddings."""
31 |     target_path = persistence_path / subdir if subdir else persistence_path
32 |     target_path.mkdir(parents=True, exist_ok=True)
33 |     client = chromadb.PersistentClient(
34 |         path=str(target_path),
35 |         settings=Settings(anonymized_telemetry=False),
36 |     )
37 |     embed_fn = embedding_functions.OpenAIEmbeddingFunction(
38 |         api_base=openai_base_url,
39 |         api_key=openai_api_key or "dummy",
40 |         model_name=embedding_model,
41 |     )
42 |     return client.get_or_create_collection(name=name, embedding_function=embed_fn)
43 | 
44 | 
45 | def flatten_metadatas(metadatas: Sequence[BaseModel]) -> list[dict[str, Any]]:
46 |     """Serialize metadata models to JSON-safe dicts while preserving lists."""
47 |     return [meta.model_dump(mode="json", exclude_none=True) for meta in metadatas]
48 | 
49 | 
50 | def upsert(
51 |     collection: Collection,
52 |     *,
53 |     ids: list[str],
54 |     documents: list[str],
55 |     metadatas: Sequence[BaseModel],
56 |     batch_size: int = 10,
57 | ) -> None:
58 |     """Upsert documents with JSON-serialized metadata.
59 | 
60 |     Args:
61 |         collection: ChromaDB collection.
62 |         ids: Document IDs.
63 |         documents: Document contents.
64 |         metadatas: Pydantic metadata models.
65 |         batch_size: Max documents per embedding API call (default: 10).
66 | 
67 |     """
68 |     if not ids:
69 |         return
70 |     serialized = flatten_metadatas(metadatas)
71 | 
72 |     # Process in batches to avoid overwhelming the embedding service
73 |     for i in range(0, len(ids), batch_size):
74 |         batch_ids = ids[i : i + batch_size]
75 |         batch_docs = documents[i : i + batch_size]
76 |         batch_metas = serialized[i : i + batch_size]
77 |         collection.upsert(ids=batch_ids, documents=batch_docs, metadatas=batch_metas)
78 | 
79 | 
80 | def delete(collection: Collection, ids: list[str]) -> None:
81 |     """Delete documents by ID."""
82 |     if ids:
83 |         collection.delete(ids=ids)
84 | 
85 | 
86 | def delete_where(collection: Collection, where: Mapping[str, Any]) -> None:
87 |     """Delete documents by a filter."""
88 |     collection.delete(where=where)
89 | 


--------------------------------------------------------------------------------
/tests/memory/test_utils.py:
--------------------------------------------------------------------------------
 1 | """Unit tests for memory utilities that avoid network calls."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Any
 6 | from unittest.mock import patch
 7 | 
 8 | from agent_cli.memory._persistence import evict_if_needed
 9 | from agent_cli.memory._store import list_conversation_entries
10 | 
11 | if TYPE_CHECKING:
12 |     from pathlib import Path
13 | 
14 | 
15 | class FakeCollection:
16 |     """Minimal Chroma-like collection for unit tests."""
17 | 
18 |     def __init__(self) -> None:
19 |         """Initialize in-memory store."""
20 |         self.docs: list[dict[str, Any]] = []
21 | 
22 |     def upsert(self, ids: list[str], documents: list[str], metadatas: list[dict[str, Any]]) -> None:
23 |         """Mimic Chroma upsert."""
24 |         for entry_id, doc, meta in zip(ids, documents, metadatas, strict=False):
25 |             self.docs.append({"id": entry_id, "document": doc, "metadata": meta})
26 | 
27 |     def get(self, where: dict[str, Any], include: list[str] | None = None) -> dict[str, Any]:
28 |         """Mimic filtered get."""
29 |         _ = include
30 | 
31 |         def matches(entry: dict[str, Any]) -> bool:
32 |             meta = entry["metadata"]
33 | 
34 |             def match_clause(clause: dict[str, Any]) -> bool:
35 |                 for key, value in clause.items():
36 |                     if isinstance(value, dict) and "$ne" in value:
37 |                         if meta.get(key) == value["$ne"]:
38 |                             return False
39 |                     elif meta.get(key) != value:
40 |                         return False
41 |                 return True
42 | 
43 |             # Support simple dict or {"$and": [ ... ]}
44 |             if "$and" in where:
45 |                 return all(match_clause(cl) for cl in where["$and"])
46 |             return match_clause(where)
47 | 
48 |         filtered = [entry for entry in self.docs if matches(entry)]
49 |         return {
50 |             "documents": [e["document"] for e in filtered],
51 |             "metadatas": [e["metadata"] for e in filtered],
52 |             "ids": [e["id"] for e in filtered],
53 |         }
54 | 
55 |     def delete(self, ids: list[str]) -> None:
56 |         """Mimic delete by IDs."""
57 |         self.docs = [entry for entry in self.docs if entry["id"] not in ids]
58 | 
59 | 
60 | def test_evict_if_needed_removes_oldest(tmp_path: Path) -> None:
61 |     collection = FakeCollection()
62 |     base_meta = {"conversation_id": "c1", "role": "memory"}
63 |     collection.upsert(
64 |         ids=["old", "mid", "new"],
65 |         documents=["old doc", "mid doc", "new doc"],
66 |         metadatas=[
67 |             {**base_meta, "created_at": "2024-01-01T00:00:00Z"},
68 |             {**base_meta, "created_at": "2024-06-01T00:00:00Z"},
69 |             {**base_meta, "created_at": "2024-12-01T00:00:00Z"},
70 |         ],
71 |     )
72 | 
73 |     with patch("agent_cli.memory._ingest.delete_memory_files"):
74 |         evict_if_needed(collection, tmp_path, "c1", max_entries=2)
75 | 
76 |     remaining = list_conversation_entries(collection, "c1")
77 |     remaining_ids = {e.id for e in remaining}
78 |     assert remaining_ids == {"mid", "new"}
79 | 


--------------------------------------------------------------------------------
/tests/mocks/wyoming.py:
--------------------------------------------------------------------------------
 1 | """Mock Wyoming servers and clients for testing."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Self
 6 | 
 7 | from wyoming.asr import Transcript
 8 | from wyoming.audio import AudioChunk, AudioStart, AudioStop
 9 | 
10 | if TYPE_CHECKING:
11 |     from collections.abc import AsyncGenerator
12 | 
13 |     from wyoming.event import Event
14 | 
15 | 
16 | class MockWyomingClient:
17 |     """Base class for mock Wyoming clients."""
18 | 
19 |     def __init__(self) -> None:
20 |         """Initialize mock client."""
21 |         self.events_written: list[Event] = []
22 |         self.is_active = True
23 | 
24 |     async def write_event(self, event: Event) -> None:
25 |         """Mock writing an event."""
26 |         if self.is_active:
27 |             self.events_written.append(event)
28 | 
29 |     async def read_event(self) -> Event | None:
30 |         """Mock reading an event."""
31 |         raise NotImplementedError
32 | 
33 |     async def __aenter__(self) -> Self:
34 |         """Async context manager entry."""
35 |         return self
36 | 
37 |     async def __aexit__(self, *args: object) -> None:
38 |         """Async context manager exit."""
39 |         self.is_active = False
40 | 
41 | 
42 | class MockASRClient(MockWyomingClient):
43 |     """Mock Wyoming ASR client for testing transcription."""
44 | 
45 |     def __init__(self, transcript_text: str) -> None:
46 |         """Initialize mock ASR client."""
47 |         super().__init__()
48 |         self.transcript_text = transcript_text
49 |         self._event_generator = self._generate_events()
50 | 
51 |     async def read_event(self) -> Event | None:
52 |         """Mock reading events from the server."""
53 |         try:
54 |             return await self._event_generator.__anext__()
55 |         except StopAsyncIteration:
56 |             return None
57 | 
58 |     async def _generate_events(self) -> AsyncGenerator[Event, None]:
59 |         """Generate transcript events."""
60 |         yield Transcript(text=self.transcript_text).event()
61 | 
62 | 
63 | class MockTTSClient(MockWyomingClient):
64 |     """Mock Wyoming TTS client for testing speech synthesis."""
65 | 
66 |     def __init__(self, audio_data: bytes) -> None:
67 |         """Initialize mock TTS client."""
68 |         super().__init__()
69 |         self.audio_data = audio_data
70 |         self._event_generator = self._generate_events()
71 | 
72 |     async def read_event(self) -> Event | None:
73 |         """Mock reading events from the server."""
74 |         try:
75 |             return await self._event_generator.__anext__()
76 |         except StopAsyncIteration:
77 |             return None
78 | 
79 |     async def _generate_events(self) -> AsyncGenerator[Event, None]:
80 |         """Generate audio synthesis events."""
81 |         yield AudioStart(rate=22050, width=2, channels=1).event()
82 |         yield AudioChunk(
83 |             rate=22050,
84 |             width=2,
85 |             channels=1,
86 |             audio=self.audio_data,
87 |         ).event()
88 |         yield AudioStop().event()
89 | 
90 |     async def connect(self) -> None:
91 |         """Mock connect."""
92 | 
93 |     async def disconnect(self) -> None:
94 |         """Mock disconnect."""
95 | 


--------------------------------------------------------------------------------
/agent_cli/install/services.py:
--------------------------------------------------------------------------------
 1 | """Service installation and management commands."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import os
 6 | import subprocess
 7 | 
 8 | import typer
 9 | 
10 | from agent_cli.cli import app
11 | from agent_cli.core.utils import console, print_error_message, print_with_style
12 | from agent_cli.install.common import (
13 |     execute_installation_script,
14 |     get_platform_script,
15 |     get_script_path,
16 | )
17 | 
18 | 
19 | @app.command("install-services", rich_help_panel="Installation")
20 | def install_services() -> None:
21 |     """Install all required services (Ollama, Whisper, Piper, OpenWakeWord).
22 | 
23 |     This command installs:
24 |     - Ollama (local LLM server)
25 |     - Wyoming Faster Whisper (speech-to-text)
26 |     - Wyoming Piper (text-to-speech)
27 |     - Wyoming OpenWakeWord (wake word detection)
28 | 
29 |     The appropriate installation method is used based on your operating system.
30 |     """
31 |     script_name = get_platform_script("setup-macos.sh", "setup-linux.sh")
32 | 
33 |     execute_installation_script(
34 |         script_name=script_name,
35 |         operation_name="Install services",
36 |         success_message="Services installed successfully!",
37 |         next_steps=[
38 |             "Start services: agent-cli start-services",
39 |             "Set up hotkeys: agent-cli install-hotkeys",
40 |         ],
41 |     )
42 | 
43 | 
44 | @app.command("start-services", rich_help_panel="Service Management")
45 | def start_services(
46 |     attach: bool = typer.Option(
47 |         True,  # noqa: FBT003
48 |         "--attach/--no-attach",
49 |         help="Attach to Zellij session after starting",
50 |     ),
51 | ) -> None:
52 |     """Start all agent-cli services in a Zellij session.
53 | 
54 |     This starts:
55 |     - Ollama (LLM server)
56 |     - Wyoming Faster Whisper (speech-to-text)
57 |     - Wyoming Piper (text-to-speech)
58 |     - Wyoming OpenWakeWord (wake word detection)
59 | 
60 |     Services run in a Zellij terminal multiplexer session named 'agent-cli'.
61 |     Use Ctrl-Q to quit or Ctrl-O d to detach from the session.
62 |     """
63 |     try:
64 |         script_path = get_script_path("start-all-services.sh")
65 |     except FileNotFoundError as e:
66 |         print_error_message("Service scripts not found")
67 |         console.print(str(e))
68 |         raise typer.Exit(1) from None
69 | 
70 |     env = os.environ.copy()
71 |     if not attach:
72 |         env["AGENT_CLI_NO_ATTACH"] = "true"
73 | 
74 |     try:
75 |         subprocess.run([str(script_path)], check=True, env=env)
76 |         if not attach:
77 |             print_with_style("✅ Services started in background.", "green")
78 |             print_with_style("Run 'zellij attach agent-cli' to view the session.", "yellow")
79 |         else:
80 |             # If we get here with attach=True, user likely detached
81 |             print_with_style("\n👋 Detached from Zellij session.")
82 |             print_with_style(
83 |                 "Services are still running. Use 'zellij attach agent-cli' to reattach.",
84 |             )
85 |     except subprocess.CalledProcessError as e:
86 |         print_error_message(f"Failed to start services. Exit code: {e.returncode}")
87 |         raise typer.Exit(e.returncode) from None
88 | 


--------------------------------------------------------------------------------
/tests/agents/test_transcribe_agent.py:
--------------------------------------------------------------------------------
 1 | """Tests for the transcribe agent."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from unittest.mock import AsyncMock, MagicMock, patch
 6 | 
 7 | from typer.testing import CliRunner
 8 | 
 9 | from agent_cli.cli import app
10 | 
11 | runner = CliRunner(env={"NO_COLOR": "1", "TERM": "dumb"})
12 | 
13 | 
14 | @patch("agent_cli.agents.transcribe.asr.create_transcriber")
15 | @patch("agent_cli.agents.transcribe.process.pid_file_context")
16 | @patch("agent_cli.agents.transcribe.setup_devices")
17 | def test_transcribe_agent(
18 |     mock_setup_devices: MagicMock,
19 |     mock_pid_context: MagicMock,
20 |     mock_create_transcriber: MagicMock,
21 | ) -> None:
22 |     """Test the transcribe agent."""
23 |     mock_transcriber = AsyncMock(return_value="hello")
24 |     mock_create_transcriber.return_value = mock_transcriber
25 |     mock_setup_devices.return_value = (0, "mock_device", None)
26 |     with patch("agent_cli.agents.transcribe.pyperclip.copy") as mock_copy:
27 |         result = runner.invoke(
28 |             app,
29 |             [
30 |                 "transcribe",
31 |                 "--asr-provider",
32 |                 "wyoming",
33 |                 "--openai-api-key",
34 |                 "test",
35 |             ],
36 |         )
37 |     assert result.exit_code == 0, result.output
38 |     mock_pid_context.assert_called_once()
39 |     mock_create_transcriber.assert_called_once()
40 |     mock_transcriber.assert_called_once()
41 |     mock_copy.assert_called_once_with("hello")
42 | 
43 | 
44 | @patch("agent_cli.agents.transcribe.process.kill_process")
45 | def test_transcribe_stop(mock_kill_process: MagicMock) -> None:
46 |     """Test the --stop flag."""
47 |     mock_kill_process.return_value = True
48 |     result = runner.invoke(app, ["transcribe", "--stop"])
49 |     assert result.exit_code == 0
50 |     assert "Transcribe stopped" in result.stdout
51 |     mock_kill_process.assert_called_once_with("transcribe")
52 | 
53 | 
54 | @patch("agent_cli.agents.transcribe.process.kill_process")
55 | def test_transcribe_stop_not_running(mock_kill_process: MagicMock) -> None:
56 |     """Test the --stop flag when the process is not running."""
57 |     mock_kill_process.return_value = False
58 |     result = runner.invoke(app, ["transcribe", "--stop"])
59 |     assert result.exit_code == 0
60 |     assert "No transcribe is running" in result.stdout
61 | 
62 | 
63 | @patch("agent_cli.agents.transcribe.process.is_process_running")
64 | def test_transcribe_status_running(mock_is_process_running: MagicMock) -> None:
65 |     """Test the --status flag when the process is running."""
66 |     mock_is_process_running.return_value = True
67 |     with patch("agent_cli.agents.transcribe.process.read_pid_file", return_value=123):
68 |         result = runner.invoke(app, ["transcribe", "--status"])
69 |     assert result.exit_code == 0
70 |     assert "Transcribe is running" in result.stdout
71 | 
72 | 
73 | @patch("agent_cli.agents.transcribe.process.is_process_running")
74 | def test_transcribe_status_not_running(mock_is_process_running: MagicMock) -> None:
75 |     """Test the --status flag when the process is not running."""
76 |     mock_is_process_running.return_value = False
77 |     result = runner.invoke(app, ["transcribe", "--status"])
78 |     assert result.exit_code == 0
79 |     assert "Transcribe is not running" in result.stdout
80 | 


--------------------------------------------------------------------------------
/agent_cli/install/common.py:
--------------------------------------------------------------------------------
 1 | """Common utilities for installation commands."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import platform
 6 | import subprocess
 7 | from pathlib import Path
 8 | from typing import TYPE_CHECKING
 9 | 
10 | import typer
11 | 
12 | from agent_cli.core.utils import print_error_message, print_with_style
13 | 
14 | if TYPE_CHECKING:
15 |     from subprocess import CompletedProcess
16 | 
17 | 
18 | def _script_directory() -> Path:
19 |     """Get the directory containing all scripts."""
20 |     # First check if we're running from source (development)
21 |     source_scripts = Path(__file__).parent.parent.parent / "scripts"
22 |     if source_scripts.exists():
23 |         return source_scripts
24 |     # Check for scripts bundled with the package
25 |     package_scripts = Path(__file__).parent.parent / "scripts"
26 |     if package_scripts.exists():
27 |         return package_scripts
28 |     msg = "Should never happen: no scripts directory found, please report an issue."
29 |     raise RuntimeError(msg)
30 | 
31 | 
32 | def get_script_path(script_name: str) -> Path:
33 |     """Get the path to a script in the scripts directory."""
34 |     script_dir = _script_directory()
35 |     return script_dir / script_name
36 | 
37 | 
38 | def _run_script(script_path: Path) -> CompletedProcess[bytes]:
39 |     """Run a shell script, streaming its output directly to the terminal."""
40 |     if not script_path.exists():
41 |         msg = f"Script not found: {script_path}"
42 |         raise FileNotFoundError(msg)
43 | 
44 |     # Run the script through the shell, which handles execution permissions
45 |     # This avoids modifying file permissions in the package directory
46 |     return subprocess.run(
47 |         ["bash", str(script_path)],  # noqa: S607
48 |         check=True,
49 |         cwd=script_path.parent,
50 |     )
51 | 
52 | 
53 | def get_platform_script(macos_script: str, linux_script: str) -> str:
54 |     """Get the appropriate script name based on the platform."""
55 |     system = platform.system().lower()
56 | 
57 |     if system == "darwin":
58 |         return macos_script
59 |     if system == "linux":
60 |         return linux_script
61 |     print_error_message(f"Unsupported operating system: {system}")
62 |     raise typer.Exit(1) from None
63 | 
64 | 
65 | def execute_installation_script(
66 |     script_name: str,
67 |     operation_name: str,
68 |     success_message: str,
69 |     next_steps: list[str] | None = None,
70 | ) -> None:
71 |     """Execute an installation script with standard error handling."""
72 |     script_path = get_script_path(script_name)
73 |     print_with_style(f"🚀 Running {script_name} to {operation_name.lower()}...", "green")
74 | 
75 |     try:
76 |         _run_script(script_path)
77 |         print_with_style(f"✅ {success_message}", "green")
78 |         if next_steps:
79 |             print_with_style("\nNext steps:", "yellow")
80 |             for i, step in enumerate(next_steps, 1):
81 |                 print_with_style(f"  {i}. {step}", "cyan")
82 |     except FileNotFoundError as e:
83 |         # This case is for when the script file itself is not found
84 |         print_error_message(f"{operation_name} failed: {e}")
85 |         raise typer.Exit(1) from None
86 |     except subprocess.CalledProcessError as e:
87 |         # This case handles non-zero exit codes from the script
88 |         print_error_message(f"{operation_name} failed with exit code {e.returncode}")
89 |         raise typer.Exit(e.returncode) from None
90 | 


--------------------------------------------------------------------------------
/tests/agents/test_voice_edit.py:
--------------------------------------------------------------------------------
 1 | """Tests for the voice assistant agent."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from unittest.mock import MagicMock, patch
 7 | 
 8 | from typer.testing import CliRunner
 9 | 
10 | from agent_cli.cli import app
11 | 
12 | runner = CliRunner(env={"NO_COLOR": "1", "TERM": "dumb"})
13 | 
14 | 
15 | @patch("agent_cli.agents.voice_edit._async_main", return_value=None)
16 | @patch("agent_cli.agents.voice_edit.asyncio.run")
17 | @patch("agent_cli.agents.voice_edit.process.pid_file_context")
18 | def test_voice_edit_agent(
19 |     mock_pid_ctx: MagicMock,
20 |     mock_run: MagicMock,
21 |     mock_async_main: MagicMock,
22 | ) -> None:
23 |     """Test the voice assistant agent."""
24 |     mock_pid_ctx.return_value.__enter__.return_value = None
25 |     with runner.isolated_filesystem():
26 |         # Provide a real config file to satisfy CLI preflight.
27 |         Path("config.toml").write_text("", encoding="utf-8")
28 |         result = runner.invoke(
29 |             app,
30 |             [
31 |                 "voice-edit",
32 |                 "--config",
33 |                 "config.toml",
34 |                 "--llm-provider",
35 |                 "ollama",
36 |                 "--asr-provider",
37 |                 "wyoming",
38 |                 "--tts-provider",
39 |                 "wyoming",
40 |                 "--openai-api-key",
41 |                 "test",
42 |             ],
43 |         )
44 |     assert result.exit_code == 0, result.output
45 |     mock_run.assert_called_once()
46 |     mock_async_main.assert_called_once()
47 | 
48 | 
49 | @patch("agent_cli.agents.voice_edit.process.kill_process")
50 | def test_voice_edit_stop(mock_kill_process: MagicMock) -> None:
51 |     """Test the --stop flag."""
52 |     mock_kill_process.return_value = True
53 |     result = runner.invoke(app, ["voice-edit", "--stop"])
54 |     assert result.exit_code == 0
55 |     assert "Voice assistant stopped" in result.stdout
56 |     mock_kill_process.assert_called_once_with("voice-edit")
57 | 
58 | 
59 | @patch("agent_cli.agents.voice_edit.process.kill_process")
60 | def test_voice_edit_stop_not_running(mock_kill_process: MagicMock) -> None:
61 |     """Test the --stop flag when the process is not running."""
62 |     mock_kill_process.return_value = False
63 |     result = runner.invoke(app, ["voice-edit", "--stop"])
64 |     assert result.exit_code == 0
65 |     assert "No voice assistant is running" in result.stdout
66 | 
67 | 
68 | @patch("agent_cli.agents.voice_edit.process.is_process_running")
69 | def test_voice_edit_status_running(mock_is_process_running: MagicMock) -> None:
70 |     """Test the --status flag when the process is running."""
71 |     mock_is_process_running.return_value = True
72 |     with patch(
73 |         "agent_cli.agents.voice_edit.process.read_pid_file",
74 |         return_value=123,
75 |     ):
76 |         result = runner.invoke(app, ["voice-edit", "--status"])
77 |     assert result.exit_code == 0
78 |     assert "Voice assistant is running" in result.stdout
79 | 
80 | 
81 | @patch("agent_cli.agents.voice_edit.process.is_process_running")
82 | def test_voice_edit_status_not_running(mock_is_process_running: MagicMock) -> None:
83 |     """Test the --status flag when the process is not running."""
84 |     mock_is_process_running.return_value = False
85 |     result = runner.invoke(app, ["voice-edit", "--status"])
86 |     assert result.exit_code == 0
87 |     assert "Voice assistant is not running" in result.stdout
88 | 


--------------------------------------------------------------------------------
/agent_cli/services/__init__.py:
--------------------------------------------------------------------------------
 1 | """Module for interacting with online services like OpenAI."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import io
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | if TYPE_CHECKING:
 9 |     import logging
10 | 
11 |     from openai import AsyncOpenAI
12 | 
13 |     from agent_cli import config
14 | 
15 | 
16 | def _get_openai_client(api_key: str | None, base_url: str | None = None) -> AsyncOpenAI:
17 |     """Get an OpenAI client instance.
18 | 
19 |     For custom endpoints (base_url is set), API key is optional and a dummy value
20 |     is used if not provided, since custom endpoints may not require authentication.
21 |     """
22 |     from openai import AsyncOpenAI  # noqa: PLC0415
23 | 
24 |     # Use dummy API key for custom endpoints if none provided
25 |     effective_api_key = api_key or "dummy-api-key"
26 |     return AsyncOpenAI(api_key=effective_api_key, base_url=base_url)
27 | 
28 | 
29 | async def transcribe_audio_openai(
30 |     audio_data: bytes,
31 |     openai_asr_cfg: config.OpenAIASR,
32 |     logger: logging.Logger,
33 |     **_kwargs: object,  # Accept extra kwargs for consistency with Wyoming
34 | ) -> str:
35 |     """Transcribe audio using OpenAI's Whisper API or a compatible endpoint.
36 | 
37 |     When openai_base_url is set, uses the custom endpoint instead of the official OpenAI API.
38 |     This allows using self-hosted Whisper models or other compatible services.
39 |     """
40 |     if openai_asr_cfg.openai_base_url:
41 |         logger.info(
42 |             "Transcribing audio with custom OpenAI-compatible endpoint: %s",
43 |             openai_asr_cfg.openai_base_url,
44 |         )
45 |     else:
46 |         logger.info("Transcribing audio with OpenAI Whisper...")
47 |         if not openai_asr_cfg.openai_api_key:
48 |             msg = "OpenAI API key is not set."
49 |             raise ValueError(msg)
50 | 
51 |     client = _get_openai_client(
52 |         api_key=openai_asr_cfg.openai_api_key,
53 |         base_url=openai_asr_cfg.openai_base_url,
54 |     )
55 |     audio_file = io.BytesIO(audio_data)
56 |     audio_file.name = "audio.wav"
57 | 
58 |     transcription_params = {"model": openai_asr_cfg.asr_openai_model, "file": audio_file}
59 |     if openai_asr_cfg.asr_openai_prompt:
60 |         transcription_params["prompt"] = openai_asr_cfg.asr_openai_prompt
61 | 
62 |     response = await client.audio.transcriptions.create(**transcription_params)
63 |     return response.text
64 | 
65 | 
66 | async def synthesize_speech_openai(
67 |     text: str,
68 |     openai_tts_cfg: config.OpenAITTS,
69 |     logger: logging.Logger,
70 | ) -> bytes:
71 |     """Synthesize speech using OpenAI's TTS API or a compatible endpoint."""
72 |     if openai_tts_cfg.tts_openai_base_url:
73 |         logger.info(
74 |             "Synthesizing speech with custom OpenAI-compatible endpoint: %s",
75 |             openai_tts_cfg.tts_openai_base_url,
76 |         )
77 |     else:
78 |         logger.info("Synthesizing speech with OpenAI TTS...")
79 |         if not openai_tts_cfg.openai_api_key:
80 |             msg = "OpenAI API key is not set."
81 |             raise ValueError(msg)
82 | 
83 |     client = _get_openai_client(
84 |         api_key=openai_tts_cfg.openai_api_key,
85 |         base_url=openai_tts_cfg.tts_openai_base_url,
86 |     )
87 |     response = await client.audio.speech.create(
88 |         model=openai_tts_cfg.tts_openai_model,
89 |         voice=openai_tts_cfg.tts_openai_voice,
90 |         input=text,
91 |         response_format="wav",
92 |     )
93 |     return response.content
94 | 


--------------------------------------------------------------------------------
/tests/core/test_audio_format.py:
--------------------------------------------------------------------------------
  1 | """Tests for audio format conversion utilities."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import shutil
  6 | import struct
  7 | from unittest.mock import MagicMock, patch
  8 | 
  9 | import pytest
 10 | 
 11 | from agent_cli.core import audio_format
 12 | 
 13 | 
 14 | @pytest.fixture
 15 | def sample_wav_data() -> bytes:
 16 |     """Create a minimal valid WAV file header + silence.
 17 | 
 18 |     Simplified minimal 44 bytes header for 44.1kHz mono 16-bit.
 19 |     """
 20 |     sample_rate = 44100
 21 |     channels = 1
 22 |     duration_s = 0.1
 23 |     num_samples = int(sample_rate * duration_s)
 24 |     data_size = num_samples * channels * 2  # 2 bytes per sample (16 bit)
 25 | 
 26 |     # RIFF header
 27 |     header = b"RIFF" + struct.pack("<I", 36 + data_size) + b"WAVE"
 28 |     header += b"fmt " + struct.pack(
 29 |         "<IHHIIHH",
 30 |         16,
 31 |         1,
 32 |         channels,
 33 |         sample_rate,
 34 |         sample_rate * 2,
 35 |         2,
 36 |         16,
 37 |     )
 38 |     header += b"data" + struct.pack("<I", data_size)
 39 | 
 40 |     data = b"\x00" * data_size
 41 |     return header + data
 42 | 
 43 | 
 44 | def test_ffmpeg_not_available() -> None:
 45 |     """Test that RuntimeError is raised if ffmpeg is missing."""
 46 |     with (
 47 |         patch("shutil.which", return_value=None),
 48 |         pytest.raises(RuntimeError, match="FFmpeg not found"),
 49 |     ):
 50 |         audio_format.convert_audio_to_wyoming_format(b"data", "test.wav")
 51 | 
 52 | 
 53 | def test_convert_audio_arguments() -> None:
 54 |     """Regression test: Ensure subprocess.run is called with text=False."""
 55 |     with (
 56 |         patch("shutil.which", return_value="/usr/bin/ffmpeg"),
 57 |         patch("subprocess.run") as mock_run,
 58 |         patch("pathlib.Path.read_bytes", return_value=b"converted_data"),
 59 |         patch("pathlib.Path.unlink"),  # Prevent file deletion issues on Windows
 60 |     ):
 61 |         # Setup mock to simulate success
 62 |         mock_run.return_value = MagicMock(returncode=0, stdout=b"", stderr=b"")
 63 | 
 64 |         audio_format.convert_audio_to_wyoming_format(b"input_data", "test.mp3")
 65 | 
 66 |         # Check that subprocess.run was called
 67 |         assert mock_run.called
 68 |         args, kwargs = mock_run.call_args
 69 | 
 70 |         # CRITICAL: Verify text=False (or not present/True) was passed
 71 |         # The fix explicitly sets text=False
 72 |         assert kwargs.get("text") is False, "subprocess.run must be called with text=False"
 73 | 
 74 |         # Verify command structure
 75 |         cmd = args[0]
 76 |         assert cmd[0] == "ffmpeg"
 77 |         assert "-f" in cmd
 78 |         assert "s16le" in cmd
 79 | 
 80 | 
 81 | def test_convert_audio_integration(sample_wav_data: bytes) -> None:
 82 |     """Integration test using actual ffmpeg if available."""
 83 |     if not shutil.which("ffmpeg"):
 84 |         pytest.skip("ffmpeg not found")
 85 | 
 86 |     try:
 87 |         converted = audio_format.convert_audio_to_wyoming_format(
 88 |             sample_wav_data,
 89 |             "test.wav",
 90 |         )
 91 | 
 92 |         # Wyoming format is 16kHz, 16-bit, mono (2 bytes per sample)
 93 |         # Check if output seems reasonable (non-empty)
 94 |         assert len(converted) > 0
 95 | 
 96 |         # Ideally check if it's multiple of 2 (16-bit)
 97 |         assert len(converted) % 2 == 0
 98 | 
 99 |     except RuntimeError as e:
100 |         pytest.fail(f"Conversion failed: {e}")
101 | 


--------------------------------------------------------------------------------
/docs/installation/docker.md:
--------------------------------------------------------------------------------
  1 | # Docker Installation
  2 | 
  3 | Universal Docker setup that works on any platform with Docker support.
  4 | 
  5 | > **⚠️ Important Limitations**
  6 | >
  7 | > - **macOS**: Docker does not support GPU acceleration. For 10x better performance, use [macOS native setup](macos.md)
  8 | > - **Linux**: Limited GPU support. For full NVIDIA GPU acceleration, use [Linux native setup](linux.md)
  9 | > - **Ollama on macOS**: Can be memory-intensive without GPU acceleration
 10 | 
 11 | ## Prerequisites
 12 | 
 13 | - Docker and Docker Compose installed
 14 | - At least 8GB RAM available for Docker
 15 | - 10GB free disk space
 16 | 
 17 | ## Quick Start
 18 | 
 19 | 1. **Start the services:**
 20 | 
 21 |    ```bash
 22 |    docker compose -f docker/docker-compose.yml up --build
 23 |    ```
 24 | 
 25 | 2. **Check if services are running:**
 26 | 
 27 |    ```bash
 28 |    docker compose -f docker/docker-compose.yml logs
 29 |    ```
 30 | 
 31 |    You should see logs from all services, with Ollama downloading the `gemma3:4b` model.
 32 | 
 33 | 3. **Install agent-cli:**
 34 | 
 35 |    ```bash
 36 |    uv tools install agent-cli
 37 |    # or: pip install agent-cli
 38 |    ```
 39 | 
 40 | 4. **Test the setup:**
 41 |    ```bash
 42 |    agent-cli autocorrect "this has an eror"
 43 |    ```
 44 | 
 45 | ## Services Overview
 46 | 
 47 | The Docker setup provides:
 48 | 
 49 | | Service          | Image                        | Port  | Purpose                    |
 50 | | ---------------- | ---------------------------- | ----- | -------------------------- |
 51 | | **ollama**       | Custom build                 | 11434 | LLM server with gemma3:4b  |
 52 | | **whisper**      | rhasspy/wyoming-whisper      | 10300 | Speech-to-text (large-v3)  |
 53 | | **piper**        | rhasspy/wyoming-piper        | 10200 | Text-to-speech (ryan-high) |
 54 | | **openwakeword** | rhasspy/wyoming-openwakeword | 10400 | Wake word detection        |
 55 | 
 56 | ## Configuration Files
 57 | 
 58 | The Docker setup uses:
 59 | 
 60 | - `docker/docker-compose.yml` - Service orchestration
 61 | - `docker/Dockerfile` - Custom Ollama container
 62 | - Data volumes for model persistence
 63 | 
 64 | ## Important Limitations
 65 | 
 66 | - **macOS**: No GPU acceleration (10x slower than native)
 67 | - **Linux**: Limited GPU support
 68 | - **Memory**: Requires 8GB+ RAM for smooth operation
 69 | 
 70 | ## Managing Services
 71 | 
 72 | ```bash
 73 | # Start services in background
 74 | docker compose -f docker/docker-compose.yml up -d
 75 | 
 76 | # Stop services
 77 | docker compose -f docker/docker-compose.yml down
 78 | 
 79 | # View logs
 80 | docker compose -f docker/docker-compose.yml logs -f
 81 | 
 82 | # Restart a specific service
 83 | docker compose -f docker/docker-compose.yml restart ollama
 84 | ```
 85 | 
 86 | ## Data Persistence
 87 | 
 88 | Services store data in local directories:
 89 | 
 90 | - `./ollama/` - Ollama models and config
 91 | - `./whisper-data/` - Whisper models
 92 | - `./piper-data/` - Piper voice models
 93 | - `./openwakeword-data/` - Wake word models
 94 | 
 95 | ## Troubleshooting
 96 | 
 97 | ### Common Issues
 98 | 
 99 | - **Slow performance**: Use native setup for better performance
100 | - **Memory issues**: Increase Docker memory allocation to 8GB+
101 | - **Port conflicts**: Change port mappings in `docker/docker-compose.yml`
102 | 
103 | ## Alternative: Native Installation
104 | 
105 | For better performance, consider platform-specific native installation:
106 | 
107 | - [macOS Native Setup](macos.md) - Metal GPU acceleration
108 | - [Linux Native Setup](linux.md) - NVIDIA GPU acceleration
109 | 


--------------------------------------------------------------------------------
/tests/agents/test_transcribe_e2e.py:
--------------------------------------------------------------------------------
 1 | """End-to-end tests for the transcribe agent with minimal mocking."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import asyncio
 6 | from typing import TYPE_CHECKING
 7 | from unittest.mock import MagicMock, patch
 8 | 
 9 | import pytest
10 | 
11 | from agent_cli import config
12 | from agent_cli.agents.transcribe import _async_main
13 | from tests.mocks.audio import MockSoundDeviceStream
14 | from tests.mocks.wyoming import MockASRClient
15 | 
16 | if TYPE_CHECKING:
17 |     from rich.console import Console
18 | 
19 | 
20 | @pytest.mark.asyncio
21 | @patch("agent_cli.agents.transcribe.signal_handling_context")
22 | @patch("agent_cli.services.asr.wyoming_client_context")
23 | @patch("agent_cli.services.asr.open_audio_stream")
24 | @patch("agent_cli.services.asr.setup_input_stream")
25 | async def test_transcribe_e2e(
26 |     mock_setup_input_stream: MagicMock,
27 |     mock_open_audio_stream: MagicMock,
28 |     mock_wyoming_client_context: MagicMock,
29 |     mock_signal_handling_context: MagicMock,
30 |     mock_console: Console,
31 | ) -> None:
32 |     """Test end-to-end transcription with simplified mocks."""
33 |     # Setup mock stream
34 |     mock_stream = MockSoundDeviceStream(input=True)
35 |     mock_open_audio_stream.return_value.__enter__.return_value = mock_stream
36 |     mock_setup_input_stream.return_value = {"dtype": "int16"}
37 | 
38 |     # Setup mock Wyoming client
39 |     transcript_text = "This is a test transcription."
40 |     mock_asr_client = MockASRClient(transcript_text)
41 |     mock_wyoming_client_context.return_value.__aenter__.return_value = mock_asr_client
42 | 
43 |     # Setup stop event
44 |     stop_event = asyncio.Event()
45 |     mock_signal_handling_context.return_value.__enter__.return_value = stop_event
46 |     asyncio.get_event_loop().call_later(0.1, stop_event.set)
47 | 
48 |     provider_cfg = config.ProviderSelection(
49 |         asr_provider="wyoming",
50 |         llm_provider="ollama",
51 |         tts_provider="wyoming",
52 |     )
53 |     general_cfg = config.General(
54 |         log_level="INFO",
55 |         log_file=None,
56 |         quiet=False,
57 |         list_devices=False,
58 |         clipboard=False,
59 |     )
60 |     audio_in_cfg = config.AudioInput(input_device_index=0)
61 |     wyoming_asr_cfg = config.WyomingASR(asr_wyoming_ip="mock-host", asr_wyoming_port=10300)
62 |     openai_asr_cfg = config.OpenAIASR(asr_openai_model="whisper-1")
63 |     ollama_cfg = config.Ollama(llm_ollama_model="", llm_ollama_host="")
64 |     openai_llm_cfg = config.OpenAILLM(llm_openai_model="", openai_base_url=None)
65 |     gemini_llm_cfg = config.GeminiLLM(
66 |         llm_gemini_model="gemini-1.5-flash",
67 |         gemini_api_key="test-key",
68 |     )
69 | 
70 |     with patch("agent_cli.core.utils.console", mock_console):
71 |         await _async_main(
72 |             extra_instructions=None,
73 |             provider_cfg=provider_cfg,
74 |             general_cfg=general_cfg,
75 |             audio_in_cfg=audio_in_cfg,
76 |             wyoming_asr_cfg=wyoming_asr_cfg,
77 |             openai_asr_cfg=openai_asr_cfg,
78 |             ollama_cfg=ollama_cfg,
79 |             openai_llm_cfg=openai_llm_cfg,
80 |             gemini_llm_cfg=gemini_llm_cfg,
81 |             llm_enabled=False,
82 |             transcription_log=None,
83 |             save_recording=False,
84 |         )
85 | 
86 |     # Assert that the final transcript is in the console output
87 |     output = mock_console.file.getvalue()
88 |     assert transcript_text in output
89 | 
90 |     # Ensure the mock client was used
91 |     mock_wyoming_client_context.assert_called_once()
92 | 


--------------------------------------------------------------------------------
/tests/rag/test_rag_proxy_passthrough.py:
--------------------------------------------------------------------------------
  1 | """Tests for the RAG proxy passthrough functionality."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from typing import TYPE_CHECKING
  6 | from unittest.mock import AsyncMock, Mock
  7 | 
  8 | import pytest
  9 | from fastapi.testclient import TestClient
 10 | 
 11 | from agent_cli.rag.api import create_app
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from pathlib import Path
 15 | 
 16 |     from pytest_mock import MockerFixture
 17 | 
 18 | 
 19 | @pytest.fixture
 20 | def mock_rag_dependencies(mocker: MockerFixture) -> None:
 21 |     """Mock the RAG dependencies to avoid side effects."""
 22 |     mocker.patch("agent_cli.rag.api.init_collection")
 23 |     mocker.patch("agent_cli.rag.api.get_reranker_model")
 24 |     mocker.patch("agent_cli.rag.api.load_hashes_from_metadata", return_value=({}, {}))
 25 |     mocker.patch("agent_cli.rag.api.watch_docs")
 26 |     mocker.patch("agent_cli.rag.api.initial_index")
 27 |     # Also mock threading to prevent background threads
 28 |     mocker.patch("threading.Thread")
 29 | 
 30 | 
 31 | @pytest.fixture
 32 | def app(tmp_path: Path, mock_rag_dependencies: None) -> TestClient:  # noqa: ARG001
 33 |     """Create the FastAPI app with mocked dependencies."""
 34 |     fastapi_app = create_app(
 35 |         docs_folder=tmp_path / "docs",
 36 |         chroma_path=tmp_path / "chroma",
 37 |         openai_base_url="http://upstream.test/v1",
 38 |         chat_api_key="dummy-rag-key",
 39 |     )
 40 |     return TestClient(fastapi_app)
 41 | 
 42 | 
 43 | def test_rag_proxy_passthrough_models(app: TestClient, mocker: MockerFixture) -> None:
 44 |     """Test that /v1/models is forwarded to the upstream."""
 45 |     mock_send = AsyncMock()
 46 |     mock_response = Mock()
 47 |     mock_response.status_code = 200
 48 |     mock_response.content = b'{"data": [{"id": "gpt-4"}]}'
 49 |     mock_response.headers = {"Content-Type": "application/json"}
 50 |     mock_send.return_value = mock_response
 51 | 
 52 |     mocker.patch("httpx.AsyncClient.send", side_effect=mock_send)
 53 | 
 54 |     response = app.get("/v1/models")
 55 | 
 56 |     assert response.status_code == 200
 57 |     assert response.json() == {"data": [{"id": "gpt-4"}]}
 58 | 
 59 |     assert mock_send.call_count == 1
 60 |     request_obj = mock_send.call_args[0][0]
 61 | 
 62 |     assert str(request_obj.url) == "http://upstream.test/v1/models"
 63 |     assert request_obj.method == "GET"
 64 |     # Ensure correct Auth header from RAG config
 65 |     assert request_obj.headers["Authorization"] == "Bearer dummy-rag-key"
 66 | 
 67 | 
 68 | def test_rag_proxy_passthrough_catchall(app: TestClient, mocker: MockerFixture) -> None:
 69 |     """Test that an arbitrary path is forwarded."""
 70 |     mock_send = AsyncMock()
 71 |     mock_response = Mock()
 72 |     mock_response.status_code = 200
 73 |     mock_response.content = b"OK"
 74 |     mock_response.headers = {"Content-Type": "text/plain"}
 75 |     mock_send.return_value = mock_response
 76 | 
 77 |     mocker.patch("httpx.AsyncClient.send", side_effect=mock_send)
 78 | 
 79 |     response = app.post("/custom/endpoint", content=b"data")
 80 | 
 81 |     assert response.status_code == 200
 82 |     assert response.content == b"OK"
 83 | 
 84 |     assert mock_send.call_count == 1
 85 |     request_obj = mock_send.call_args[0][0]
 86 |     assert str(request_obj.url) == "http://upstream.test/v1/custom/endpoint"
 87 | 
 88 | 
 89 | def test_rag_proxy_passthrough_upstream_error(app: TestClient, mocker: MockerFixture) -> None:
 90 |     """Test handling of upstream errors."""
 91 |     mock_send = AsyncMock()
 92 |     mock_send.side_effect = Exception("Network error")
 93 | 
 94 |     mocker.patch("httpx.AsyncClient.send", side_effect=mock_send)
 95 | 
 96 |     response = app.get("/v1/models")
 97 | 
 98 |     assert response.status_code == 502
 99 |     assert response.content == b"Upstream Proxy Error"
100 | 


--------------------------------------------------------------------------------
/tests/rag/test_indexer.py:
--------------------------------------------------------------------------------
  1 | """Tests for RAG indexer."""
  2 | 
  3 | from collections.abc import AsyncGenerator
  4 | from pathlib import Path
  5 | from typing import Any
  6 | from unittest.mock import MagicMock, patch
  7 | 
  8 | import pytest
  9 | from watchfiles import Change
 10 | 
 11 | from agent_cli.rag import _indexer
 12 | from agent_cli.rag._utils import should_ignore_path
 13 | 
 14 | 
 15 | @pytest.mark.asyncio
 16 | async def test_watch_docs(tmp_path: Path) -> None:
 17 |     """Test watching docs folder."""
 18 |     mock_collection = MagicMock()
 19 |     docs_folder = tmp_path / "docs"
 20 |     docs_folder.mkdir()
 21 |     file_hashes: dict[str, str] = {}
 22 |     file_mtimes: dict[str, float] = {}
 23 | 
 24 |     # Create dummy files so is_file() returns True
 25 |     (docs_folder / "new.txt").touch()
 26 |     (docs_folder / "mod.txt").touch()
 27 |     # del.txt doesn't need to exist
 28 | 
 29 |     # Mock awatch to yield changes
 30 |     changes = {
 31 |         (Change.added, str(docs_folder / "new.txt")),
 32 |         (Change.modified, str(docs_folder / "mod.txt")),
 33 |         (Change.deleted, str(docs_folder / "del.txt")),
 34 |     }
 35 | 
 36 |     async def mock_awatch_gen(
 37 |         *_args: Any,
 38 |         **_kwargs: Any,
 39 |     ) -> AsyncGenerator[set[tuple[Change, str]], None]:
 40 |         yield changes
 41 | 
 42 |     async def fake_watch_directory(_root: Path, handler: Any, **_kwargs) -> None:  # type: ignore[no-untyped-def]
 43 |         for change, path in changes:
 44 |             handler(change, Path(path))
 45 | 
 46 |     with (
 47 |         patch("agent_cli.rag._indexer.watch_directory", side_effect=fake_watch_directory),
 48 |         patch("agent_cli.rag._indexer.index_file") as mock_index,
 49 |         patch("agent_cli.rag._indexer.remove_file") as mock_remove,
 50 |     ):
 51 |         await _indexer.watch_docs(mock_collection, docs_folder, file_hashes, file_mtimes)
 52 | 
 53 |         # Check calls
 54 |         assert mock_index.call_count == 2  # added and modified
 55 |         assert mock_remove.call_count == 1  # deleted
 56 | 
 57 | 
 58 | @pytest.mark.asyncio
 59 | async def test_watch_docs_passes_ignore_filter(tmp_path: Path) -> None:
 60 |     """Test that watch_docs passes the should_ignore_path filter to watch_directory."""
 61 |     mock_collection = MagicMock()
 62 |     docs_folder = tmp_path / "docs"
 63 |     docs_folder.mkdir()
 64 |     file_hashes: dict[str, str] = {}
 65 |     file_mtimes: dict[str, float] = {}
 66 | 
 67 |     async def fake_watch_directory(
 68 |         _root: Path,
 69 |         _handler: Any,
 70 |         *,
 71 |         ignore_filter: Any = None,
 72 |         **_kwargs: Any,
 73 |     ) -> None:
 74 |         # Verify ignore_filter is provided and is the should_ignore_path function
 75 |         assert ignore_filter is not None
 76 |         assert ignore_filter.__name__ == "should_ignore_path"
 77 | 
 78 |     with patch(
 79 |         "agent_cli.rag._indexer.watch_directory",
 80 |         side_effect=fake_watch_directory,
 81 |     ):
 82 |         await _indexer.watch_docs(mock_collection, docs_folder, file_hashes, file_mtimes)
 83 | 
 84 | 
 85 | @pytest.mark.asyncio
 86 | async def test_watch_docs_ignore_filter_works(tmp_path: Path) -> None:
 87 |     """Test that the ignore filter correctly filters out ignored paths."""
 88 |     docs_folder = tmp_path / "docs"
 89 |     docs_folder.mkdir()
 90 | 
 91 |     # Test that the filter correctly identifies paths to ignore
 92 |     git_file = docs_folder / ".git" / "config"
 93 |     venv_file = docs_folder / "venv" / "bin" / "python"
 94 |     pycache_file = docs_folder / "__pycache__" / "module.pyc"
 95 |     normal_file = docs_folder / "readme.md"
 96 | 
 97 |     assert should_ignore_path(git_file, docs_folder)
 98 |     assert should_ignore_path(venv_file, docs_folder)
 99 |     assert should_ignore_path(pycache_file, docs_folder)
100 |     assert not should_ignore_path(normal_file, docs_folder)
101 | 


--------------------------------------------------------------------------------
/scripts/start-all-services.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | # Check if zellij is installed
  4 | if ! command -v zellij &> /dev/null; then
  5 |     echo "📺 Zellij not found. Installing..."
  6 |     uvx dotbins get zellij-org/zellij
  7 |     export PATH="$HOME/.local/bin:$PATH"
  8 | fi
  9 | 
 10 | # Get the current directory
 11 | SCRIPTS_DIR="$(cd "$(dirname "$0")" && pwd)"
 12 | 
 13 | 
 14 | # Create .runtime directory and Zellij layout file
 15 | mkdir -p "$SCRIPTS_DIR/.runtime"
 16 | cat > "$SCRIPTS_DIR/.runtime/agent-cli-layout.kdl" << EOF
 17 | session_name "agent-cli"
 18 | 
 19 | layout {
 20 |     pane split_direction="vertical" {
 21 |         pane split_direction="horizontal" {
 22 |             pane {
 23 |                 name "Ollama"
 24 |                 command "ollama"
 25 |                 args "serve"
 26 |             }
 27 |             pane {
 28 |                 name "Help"
 29 |                 command "sh"
 30 |                 args "-c" "cat $SCRIPTS_DIR/zellij_help.txt | less"
 31 |             }
 32 |         }
 33 |         pane split_direction="horizontal" {
 34 |             pane {
 35 |                 name "Whisper"
 36 |                 cwd "$SCRIPTS_DIR"
 37 |                 command "./run-whisper.sh"
 38 |             }
 39 |             pane split_direction="horizontal" {
 40 |                 pane {
 41 |                     name "Piper"
 42 |                     cwd "$SCRIPTS_DIR"
 43 |                     command "./run-piper.sh"
 44 |                 }
 45 |                 pane {
 46 |                     name "OpenWakeWord"
 47 |                     cwd "$SCRIPTS_DIR"
 48 |                     command "./run-openwakeword.sh"
 49 |                 }
 50 |             }
 51 |         }
 52 |     }
 53 | 
 54 |     floating_panes {
 55 |         pane {
 56 |             name "Help"
 57 |             x "10%"
 58 |             y "10%"
 59 |             width "80%"
 60 |             height "80%"
 61 |             command "sh"
 62 |             close_on_exit true
 63 |             args "-c" "cat $SCRIPTS_DIR/zellij_help.txt | less"
 64 |         }
 65 |     }
 66 | }
 67 | EOF
 68 | 
 69 | # Function to show common usage instructions
 70 | show_usage() {
 71 |     echo "❌ Use 'Ctrl-Q' to quit Zellij"
 72 |     echo "🔌 Use 'Ctrl-O d' to detach from the session"
 73 |     echo "🔗 Use 'zellij attach agent-cli' to reattach"
 74 | }
 75 | 
 76 | # Function to start a new Zellij session
 77 | start_new_session() {
 78 |     if [ "$AGENT_CLI_NO_ATTACH" = "true" ]; then
 79 |         # Start detached
 80 |         zellij --session agent-cli --layout "$SCRIPTS_DIR/.runtime/agent-cli-layout.kdl" &
 81 |         sleep 1  # Give it a moment to start
 82 |         echo "✅ Session 'agent-cli' started in background. Use 'zellij attach agent-cli' to view."
 83 |     else
 84 |         show_usage
 85 |         # Start zellij with layout file - session name is specified in the layout
 86 |         zellij --layout "$SCRIPTS_DIR/.runtime/agent-cli-layout.kdl"
 87 |     fi
 88 | }
 89 | 
 90 | # Check if agent-cli session already exists and is running
 91 | # Case 1: Session exists but has exited - clean it up and start fresh
 92 | if zellij list-sessions 2>/dev/null | grep "agent-cli" | grep -q "EXITED"; then
 93 |     echo "🧹 Found exited session 'agent-cli'. Cleaning up..."
 94 |     zellij delete-session agent-cli
 95 |     echo "🆕 Starting fresh services in Zellij..."
 96 |     start_new_session
 97 | # Case 2: Session exists and is running - attach to it if requested
 98 | elif zellij list-sessions 2>/dev/null | grep -q "agent-cli"; then
 99 |     if [ "$AGENT_CLI_NO_ATTACH" = "true" ]; then
100 |         echo "✅ Session 'agent-cli' is already running. Not attaching as requested."
101 |     else
102 |         echo "🔗 Session 'agent-cli' already exists and is running. Attaching..."
103 |         show_usage
104 |         zellij attach agent-cli
105 |     fi
106 | # Case 3: No session exists - create a new one
107 | else
108 |     echo "🚀 Starting all services in Zellij..."
109 |     start_new_session
110 | fi
111 | 


--------------------------------------------------------------------------------
/agent_cli/core/audio_format.py:
--------------------------------------------------------------------------------
  1 | """Audio format conversion utilities using FFmpeg."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import logging
  6 | import shutil
  7 | import subprocess
  8 | import tempfile
  9 | from pathlib import Path
 10 | 
 11 | from agent_cli import constants
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | VALID_EXTENSIONS = (".wav", ".mp3", ".m4a", ".flac", ".ogg", ".aac", ".webm")
 16 | 
 17 | 
 18 | def convert_audio_to_wyoming_format(
 19 |     audio_data: bytes,
 20 |     source_filename: str,
 21 | ) -> bytes:
 22 |     """Convert audio data to Wyoming-compatible format using FFmpeg.
 23 | 
 24 |     Args:
 25 |         audio_data: Raw audio data
 26 |         source_filename: Source filename to help FFmpeg detect format
 27 | 
 28 |     Returns:
 29 |         Converted audio data as raw PCM bytes (16kHz, 16-bit, mono)
 30 | 
 31 |     Raises:
 32 |         RuntimeError: If FFmpeg is not available or conversion fails
 33 | 
 34 |     """
 35 |     # Check if FFmpeg is available
 36 |     if not shutil.which("ffmpeg"):
 37 |         msg = "FFmpeg not found in PATH. Please install FFmpeg to convert audio formats."
 38 |         raise RuntimeError(msg)
 39 | 
 40 |     # Create temporary files for input and output
 41 |     suffix = _get_file_extension(source_filename)
 42 |     with (
 43 |         tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as input_file,
 44 |         tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as output_file,
 45 |     ):
 46 |         input_path = Path(input_file.name)
 47 |         output_path = Path(output_file.name)
 48 | 
 49 |         try:
 50 |             # Write input audio data
 51 |             input_file.write(audio_data)
 52 |             input_file.flush()
 53 | 
 54 |             # Build FFmpeg command to convert to Wyoming format
 55 |             # -f s16le: 16-bit signed little-endian PCM
 56 |             # -ar 16000: 16kHz sample rate
 57 |             # -ac 1: mono (1 channel)
 58 |             cmd = [
 59 |                 "ffmpeg",
 60 |                 "-y",
 61 |                 "-i",
 62 |                 str(input_path),
 63 |                 "-f",
 64 |                 "s16le",
 65 |                 "-ar",
 66 |                 str(constants.AUDIO_RATE),
 67 |                 "-ac",
 68 |                 str(constants.AUDIO_CHANNELS),
 69 |                 str(output_path),
 70 |             ]
 71 | 
 72 |             logger.debug("Running FFmpeg command: %s", " ".join(cmd))
 73 | 
 74 |             # Run FFmpeg
 75 |             result = subprocess.run(
 76 |                 cmd,
 77 |                 capture_output=True,
 78 |                 text=False,
 79 |                 check=False,
 80 |             )
 81 | 
 82 |             if result.returncode != 0:
 83 |                 stderr_text = result.stderr.decode("utf-8", errors="replace")
 84 |                 logger.error("FFmpeg failed with return code %d", result.returncode)
 85 |                 logger.error("FFmpeg stderr: %s", stderr_text)
 86 |                 msg = f"FFmpeg conversion failed: {stderr_text}"
 87 |                 raise RuntimeError(msg)
 88 | 
 89 |             # Read converted audio data
 90 |             return output_path.read_bytes()
 91 | 
 92 |         finally:
 93 |             # Clean up temporary files
 94 |             input_path.unlink(missing_ok=True)
 95 |             output_path.unlink(missing_ok=True)
 96 | 
 97 | 
 98 | def _get_file_extension(filename: str) -> str:
 99 |     """Get file extension from filename, defaulting to .tmp.
100 | 
101 |     Args:
102 |         filename: Source filename
103 | 
104 |     Returns:
105 |         File extension including the dot
106 | 
107 |     """
108 |     filename = str(filename).lower()
109 | 
110 |     for ext in VALID_EXTENSIONS:
111 |         if filename.endswith(ext):
112 |             return ext
113 | 
114 |     return ".tmp"
115 | 
116 | 
117 | def check_ffmpeg_available() -> bool:
118 |     """Check if FFmpeg is available in the system PATH.
119 | 
120 |     Returns:
121 |         True if FFmpeg is available, False otherwise
122 | 
123 |     """
124 |     return shutil.which("ffmpeg") is not None
125 | 


--------------------------------------------------------------------------------
/scripts/setup-linux.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | set -e
  4 | 
  5 | echo "🚀 Setting up agent-cli services on Linux..."
  6 | 
  7 | # Function to install uv based on the distribution
  8 | install_uv() {
  9 |     if command -v curl &> /dev/null; then
 10 |         echo "📦 Installing uv using curl..."
 11 |         curl -LsSf https://astral.sh/uv/install.sh | sh
 12 |         # Add to PATH for current session
 13 |         export PATH="$HOME/.local/bin:$PATH"
 14 |     else
 15 |         echo "curl not found. Please install curl first:"
 16 |         echo "  Ubuntu/Debian: sudo apt install curl"
 17 |         echo "  Fedora/RHEL: sudo dnf install curl"
 18 |         exit 1
 19 |     fi
 20 | }
 21 | 
 22 | # Check if uv is installed
 23 | if ! command -v uv &> /dev/null; then
 24 |     echo "📦 Installing uv..."
 25 |     install_uv
 26 | fi
 27 | 
 28 | # Check for PortAudio (required for audio processing)
 29 | echo "🔊 Checking PortAudio..."
 30 | if ! pkg-config --exists portaudio-2.0 2>/dev/null; then
 31 |     echo "❌ ERROR: PortAudio development libraries are not installed."
 32 |     echo ""
 33 |     echo "PyAudio requires PortAudio. Install using your distribution's package manager:"
 34 |     echo ""
 35 |     echo "Ubuntu/Debian:"
 36 |     echo "  sudo apt install portaudio19-dev"
 37 |     echo ""
 38 |     echo "Fedora/RHEL/CentOS:"
 39 |     echo "  sudo dnf install portaudio-devel"
 40 |     echo ""
 41 |     echo "Arch Linux:"
 42 |     echo "  sudo pacman -S portaudio"
 43 |     echo ""
 44 |     echo "openSUSE:"
 45 |     echo "  sudo zypper install portaudio-devel"
 46 |     echo ""
 47 |     echo "After installing PortAudio, run this script again."
 48 |     exit 1
 49 | else
 50 |     echo "✅ PortAudio is already installed"
 51 | fi
 52 | 
 53 | # Install Ollama
 54 | echo "🧠 Checking Ollama..."
 55 | if ! command -v ollama &> /dev/null; then
 56 |     echo "📦 Installing Ollama..."
 57 |     curl -fsSL https://ollama.ai/install.sh | sh
 58 |     echo "✅ Ollama installed successfully"
 59 | else
 60 |     echo "✅ Ollama is already installed"
 61 | fi
 62 | 
 63 | # Check if zellij is available or offer alternatives
 64 | if ! command -v zellij &> /dev/null; then
 65 |     echo "📺 Zellij not found. Installing..."
 66 | 
 67 |     # Try different installation methods based on what's available
 68 |     if command -v cargo &> /dev/null; then
 69 |         echo "🦀 Installing zellij via cargo..."
 70 |         cargo install zellij
 71 |     elif command -v flatpak &> /dev/null; then
 72 |         echo "📦 Installing zellij via flatpak..."
 73 |         flatpak install -y flathub org.zellij_developers.zellij
 74 |     else
 75 |         echo "📥 Installing zellij binary..."
 76 |         curl -L https://github.com/zellij-org/zellij/releases/latest/download/zellij-x86_64-unknown-linux-musl.tar.gz | tar -xz -C ~/.local/bin/
 77 |         chmod +x ~/.local/bin/zellij
 78 |         export PATH="$HOME/.local/bin:$PATH"
 79 |     fi
 80 | fi
 81 | 
 82 | # Install agent-cli
 83 | echo "🤖 Installing/upgrading agent-cli..."
 84 | uv tool install --upgrade agent-cli
 85 | 
 86 | # Preload default Ollama model
 87 | echo "⬇️ Preloading default Ollama model (gemma3:4b)..."
 88 | echo "⏳ This may take a few minutes depending on your internet connection..."
 89 | # Start Ollama in background, then pull model synchronously
 90 | (ollama serve >/dev/null 2>&1 &) && sleep 2 && ollama pull gemma3:4b
 91 | # Stop the temporary ollama server
 92 | pkill -f "ollama serve" || true
 93 | 
 94 | echo ""
 95 | echo "✅ Setup complete! You can now run the services:"
 96 | echo ""
 97 | echo "Option 1 - Run all services at once:"
 98 | echo "  scripts/start-all-services.sh"
 99 | echo ""
100 | echo "Option 2 - Run services individually:"
101 | echo "  1. Ollama: ollama serve"
102 | echo "  2. Whisper: scripts/run-whisper.sh"
103 | echo "  3. Piper: scripts/run-piper.sh"
104 | echo "  4. OpenWakeWord: scripts/run-openwakeword.sh"
105 | echo ""
106 | echo "📝 Note: Services use uvx to run without needing virtual environments."
107 | echo "For GPU acceleration, make sure NVIDIA drivers and CUDA are installed."
108 | echo "🎉 agent-cli has been installed and is ready to use!"
109 | 


--------------------------------------------------------------------------------
/tests/memory/test_proxy_passthrough.py:
--------------------------------------------------------------------------------
  1 | """Tests for the memory proxy passthrough functionality."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from typing import TYPE_CHECKING
  6 | from unittest.mock import AsyncMock, Mock
  7 | 
  8 | import pytest
  9 | from fastapi.testclient import TestClient
 10 | 
 11 | from agent_cli.memory.api import create_app
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from pathlib import Path
 15 | 
 16 |     from pytest_mock import MockerFixture
 17 | 
 18 | 
 19 | @pytest.fixture
 20 | def mock_memory_client(mocker: MockerFixture) -> Mock:
 21 |     """Mock the MemoryClient to avoid side effects."""
 22 |     mock_client_cls = mocker.patch("agent_cli.memory.api.MemoryClient")
 23 |     mock_client = mock_client_cls.return_value
 24 |     mock_client.memory_path = "dummy_path"
 25 |     mock_client.openai_base_url = "http://upstream.test/v1"
 26 |     mock_client.chat_api_key = "dummy-key"
 27 |     mock_client.default_top_k = 5
 28 |     return mock_client
 29 | 
 30 | 
 31 | @pytest.fixture
 32 | def app(tmp_path: Path, mock_memory_client: Mock) -> TestClient:  # noqa: ARG001
 33 |     """Create the FastAPI app with mocked client."""
 34 |     fastapi_app = create_app(
 35 |         memory_path=tmp_path,
 36 |         openai_base_url="http://upstream.test/v1",
 37 |     )
 38 |     return TestClient(fastapi_app)
 39 | 
 40 | 
 41 | def test_proxy_passthrough_models(app: TestClient, mocker: MockerFixture) -> None:
 42 |     """Test that /v1/models is forwarded to the upstream."""
 43 |     # Mock httpx.AsyncClient.send
 44 |     mock_send = AsyncMock()
 45 |     mock_response = Mock()
 46 |     mock_response.status_code = 200
 47 |     mock_response.content = b'{"data": [{"id": "gpt-4"}]}'
 48 |     mock_response.headers = {"Content-Type": "application/json"}
 49 |     mock_send.return_value = mock_response
 50 | 
 51 |     mocker.patch("httpx.AsyncClient.send", side_effect=mock_send)
 52 | 
 53 |     response = app.get("/v1/models")
 54 | 
 55 |     assert response.status_code == 200
 56 |     assert response.json() == {"data": [{"id": "gpt-4"}]}
 57 | 
 58 |     # Verify the request was constructed correctly
 59 |     assert mock_send.call_count == 1
 60 |     call_args = mock_send.call_args
 61 |     request_obj = call_args[0][0]
 62 | 
 63 |     # Check that URL was constructed correctly (v1 should not be duplicated if base has it)
 64 |     # base="http://upstream.test/v1", path="v1/models" -> "http://upstream.test/v1/models"
 65 |     assert str(request_obj.url) == "http://upstream.test/v1/models"
 66 |     assert request_obj.method == "GET"
 67 | 
 68 | 
 69 | def test_proxy_passthrough_catchall_other_path(app: TestClient, mocker: MockerFixture) -> None:
 70 |     """Test that an arbitrary path is forwarded."""
 71 |     mock_send = AsyncMock()
 72 |     mock_response = Mock()
 73 |     mock_response.status_code = 200
 74 |     mock_response.content = b"OK"
 75 |     mock_response.headers = {"Content-Type": "text/plain"}
 76 |     mock_send.return_value = mock_response
 77 | 
 78 |     mocker.patch("httpx.AsyncClient.send", side_effect=mock_send)
 79 | 
 80 |     response = app.post("/other/path?foo=bar", content=b"payload")
 81 | 
 82 |     assert response.status_code == 200
 83 |     assert response.content == b"OK"
 84 | 
 85 |     # Verify construction
 86 |     assert mock_send.call_count == 1
 87 |     request_obj = mock_send.call_args[0][0]
 88 |     assert str(request_obj.url) == "http://upstream.test/v1/other/path?foo=bar"
 89 |     assert request_obj.method == "POST"
 90 |     # Note: TestClient sends body, but httpx.build_request might consume it differently
 91 |     # depending on how we mock. We just verify the call happened.
 92 | 
 93 | 
 94 | def test_proxy_passthrough_upstream_error(app: TestClient, mocker: MockerFixture) -> None:
 95 |     """Test handling of upstream errors."""
 96 |     mock_send = AsyncMock()
 97 |     mock_send.side_effect = Exception("Connection refused")
 98 | 
 99 |     mocker.patch("httpx.AsyncClient.send", side_effect=mock_send)
100 | 
101 |     response = app.get("/v1/models")
102 | 
103 |     assert response.status_code == 502
104 |     assert response.content == b"Upstream Proxy Error"
105 | 


--------------------------------------------------------------------------------
/agent_cli/memory/api.py:
--------------------------------------------------------------------------------
  1 | """FastAPI application factory for memory proxy."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import logging
  6 | from typing import TYPE_CHECKING, Any
  7 | 
  8 | from fastapi import FastAPI, Request
  9 | from fastapi.middleware.cors import CORSMiddleware
 10 | 
 11 | from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL
 12 | from agent_cli.core.openai_proxy import proxy_request_to_upstream
 13 | from agent_cli.memory.client import MemoryClient
 14 | from agent_cli.memory.models import ChatRequest  # noqa: TC001
 15 | 
 16 | if TYPE_CHECKING:
 17 |     from pathlib import Path
 18 | 
 19 | LOGGER = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | def create_app(
 23 |     memory_path: Path,
 24 |     openai_base_url: str,
 25 |     embedding_model: str = DEFAULT_OPENAI_EMBEDDING_MODEL,
 26 |     embedding_api_key: str | None = None,
 27 |     chat_api_key: str | None = None,
 28 |     default_top_k: int = 5,
 29 |     enable_summarization: bool = True,
 30 |     max_entries: int = 500,
 31 |     mmr_lambda: float = 0.7,
 32 |     recency_weight: float = 0.2,
 33 |     score_threshold: float = 0.35,
 34 |     enable_git_versioning: bool = True,
 35 | ) -> FastAPI:
 36 |     """Create the FastAPI app for memory-backed chat."""
 37 |     LOGGER.info("Initializing memory client...")
 38 | 
 39 |     client = MemoryClient(
 40 |         memory_path=memory_path,
 41 |         openai_base_url=openai_base_url,
 42 |         embedding_model=embedding_model,
 43 |         embedding_api_key=embedding_api_key,
 44 |         chat_api_key=chat_api_key,
 45 |         default_top_k=default_top_k,
 46 |         enable_summarization=enable_summarization,
 47 |         max_entries=max_entries,
 48 |         mmr_lambda=mmr_lambda,
 49 |         recency_weight=recency_weight,
 50 |         score_threshold=score_threshold,
 51 |         start_watcher=False,  # We control start/stop via app events
 52 |         enable_git_versioning=enable_git_versioning,
 53 |     )
 54 | 
 55 |     app = FastAPI(title="Memory Proxy")
 56 | 
 57 |     app.add_middleware(
 58 |         CORSMiddleware,
 59 |         allow_origins=["*"],
 60 |         allow_credentials=True,
 61 |         allow_methods=["*"],
 62 |         allow_headers=["*"],
 63 |     )
 64 | 
 65 |     @app.post("/v1/chat/completions")
 66 |     async def chat_completions(request: Request, chat_request: ChatRequest) -> Any:
 67 |         auth_header = request.headers.get("Authorization")
 68 |         api_key = None
 69 |         if auth_header and auth_header.startswith("Bearer "):
 70 |             api_key = auth_header.split(" ")[1]
 71 | 
 72 |         return await client.chat(
 73 |             messages=chat_request.messages,
 74 |             conversation_id=chat_request.memory_id or "default",
 75 |             model=chat_request.model,
 76 |             stream=chat_request.stream or False,
 77 |             api_key=api_key,
 78 |             memory_top_k=chat_request.memory_top_k,
 79 |             recency_weight=chat_request.memory_recency_weight,
 80 |             score_threshold=chat_request.memory_score_threshold,
 81 |         )
 82 | 
 83 |     @app.on_event("startup")
 84 |     async def start_watch() -> None:
 85 |         client.start()
 86 | 
 87 |     @app.on_event("shutdown")
 88 |     async def stop_watch() -> None:
 89 |         await client.stop()
 90 | 
 91 |     @app.get("/health")
 92 |     def health() -> dict[str, str]:
 93 |         return {
 94 |             "status": "ok",
 95 |             "memory_store": str(client.memory_path),
 96 |             "openai_base_url": client.openai_base_url,
 97 |             "default_top_k": str(client.default_top_k),
 98 |         }
 99 | 
100 |     @app.api_route(
101 |         "/{path:path}",
102 |         methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH"],
103 |     )
104 |     async def proxy_catch_all(request: Request, path: str) -> Any:
105 |         """Forward any other request to the upstream provider."""
106 |         return await proxy_request_to_upstream(
107 |             request,
108 |             path,
109 |             client.openai_base_url,
110 |             client.chat_api_key,
111 |         )
112 | 
113 |     return app
114 | 


--------------------------------------------------------------------------------
/agent_cli/core/reranker.py:
--------------------------------------------------------------------------------
  1 | """Shared ONNX Cross-Encoder for reranking (used by both RAG and Memory)."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import logging
  6 | 
  7 | from huggingface_hub import hf_hub_download
  8 | from onnxruntime import InferenceSession
  9 | from transformers import AutoTokenizer
 10 | 
 11 | LOGGER = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | def _download_onnx_model(model_name: str, onnx_filename: str) -> str:
 15 |     """Download the ONNX model, favoring the common `onnx/` folder layout."""
 16 |     if "/" in onnx_filename:
 17 |         return hf_hub_download(repo_id=model_name, filename=onnx_filename)
 18 | 
 19 |     try:
 20 |         return hf_hub_download(repo_id=model_name, filename=onnx_filename, subfolder="onnx")
 21 |     except Exception as first_error:
 22 |         LOGGER.debug(
 23 |             "ONNX file not found under onnx/ for %s: %s. Falling back to repo root.",
 24 |             model_name,
 25 |             first_error,
 26 |         )
 27 |         try:
 28 |             return hf_hub_download(repo_id=model_name, filename=onnx_filename)
 29 |         except Exception as second_error:
 30 |             LOGGER.exception(
 31 |                 "Failed to download ONNX model %s (filename=%s)",
 32 |                 model_name,
 33 |                 onnx_filename,
 34 |                 exc_info=second_error,
 35 |             )
 36 |             raise
 37 | 
 38 | 
 39 | class OnnxCrossEncoder:
 40 |     """A lightweight CrossEncoder using ONNX Runtime."""
 41 | 
 42 |     def __init__(
 43 |         self,
 44 |         model_name: str = "Xenova/ms-marco-MiniLM-L-6-v2",
 45 |         onnx_filename: str = "model.onnx",
 46 |     ) -> None:
 47 |         """Initialize the ONNX CrossEncoder."""
 48 |         self.model_name = model_name
 49 | 
 50 |         # Download model if needed
 51 |         LOGGER.info("Loading ONNX model: %s", model_name)
 52 |         model_path = _download_onnx_model(model_name, onnx_filename)
 53 | 
 54 |         self.session = InferenceSession(model_path)
 55 |         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
 56 | 
 57 |     def predict(
 58 |         self,
 59 |         pairs: list[tuple[str, str]],
 60 |         batch_size: int = 32,
 61 |     ) -> list[float]:
 62 |         """Predict relevance scores for query-document pairs."""
 63 |         import numpy as np  # noqa: PLC0415
 64 | 
 65 |         if not pairs:
 66 |             return []
 67 | 
 68 |         all_scores = []
 69 | 
 70 |         # Process in batches
 71 |         for i in range(0, len(pairs), batch_size):
 72 |             batch = pairs[i : i + batch_size]
 73 |             queries = [q for q, d in batch]
 74 |             docs = [d for q, d in batch]
 75 | 
 76 |             # Tokenize
 77 |             inputs = self.tokenizer(
 78 |                 queries,
 79 |                 docs,
 80 |                 padding=True,
 81 |                 truncation=True,
 82 |                 return_tensors="np",
 83 |                 max_length=512,
 84 |             )
 85 | 
 86 |             # ONNX Input
 87 |             # Check what inputs the model expects. usually input_ids, attention_mask, token_type_ids
 88 |             # specific models might not need token_type_ids
 89 |             ort_inputs = {
 90 |                 "input_ids": inputs["input_ids"].astype(np.int64),
 91 |                 "attention_mask": inputs["attention_mask"].astype(np.int64),
 92 |             }
 93 |             if "token_type_ids" in inputs:
 94 |                 ort_inputs["token_type_ids"] = inputs["token_type_ids"].astype(np.int64)
 95 | 
 96 |             # Run inference
 97 |             logits = self.session.run(None, ort_inputs)[0]
 98 | 
 99 |             # Extract scores (usually shape [batch, 1] or [batch])
100 |             batch_scores = logits.flatten() if logits.ndim > 1 else logits
101 | 
102 |             all_scores.extend(batch_scores.tolist())
103 | 
104 |         return all_scores
105 | 
106 | 
107 | def get_reranker_model(
108 |     model_name: str = "Xenova/ms-marco-MiniLM-L-6-v2",
109 | ) -> OnnxCrossEncoder:
110 |     """Load the CrossEncoder model."""
111 |     return OnnxCrossEncoder(model_name)
112 | 
113 | 
114 | def predict_relevance(
115 |     model: OnnxCrossEncoder,
116 |     pairs: list[tuple[str, str]],
117 | ) -> list[float]:
118 |     """Predict relevance scores for query-document pairs."""
119 |     return model.predict(pairs)
120 | 


--------------------------------------------------------------------------------
/docs/installation/windows.md:
--------------------------------------------------------------------------------
  1 | # Windows Installation Guide
  2 | 
  3 | While `agent-cli` does not have an automated setup script for native Windows, you can achieve a seamless experience by using a **Split Setup**.
  4 | 
  5 | This approach uses **WSL 2 (Windows Subsystem for Linux)** to run the heavy AI services (the "Brain") while running the lightweight `agent-cli` tool natively on Windows (the "Ears") to access your microphone and clipboard.
  6 | 
  7 | ## Prerequisites
  8 | 
  9 | 1.  **WSL 2**: Ensure you have WSL 2 installed (typically Ubuntu).
 10 |     *   [How to install WSL](https://learn.microsoft.com/en-us/windows/wsl/install)
 11 | 2.  **Git**: Installed in both WSL and Windows.
 12 | 3.  **uv**: The Python package manager (installed on Windows).
 13 | 
 14 | ---
 15 | 
 16 | ## Part 1: The "Brain" (WSL Side)
 17 | 
 18 | We will run the backend services (Ollama, Whisper, Piper, etc.) inside WSL.
 19 | 
 20 | 1.  **Open your WSL terminal** (e.g., Ubuntu).
 21 | 2.  **Clone the repository and run the Linux setup:**
 22 | 
 23 |     ```bash
 24 |     git clone https://github.com/basnijholt/agent-cli.git
 25 |     cd agent-cli
 26 |     ./scripts/setup-linux.sh
 27 |     ```
 28 | 
 29 | 3.  **Start the services:**
 30 | 
 31 |     ```bash
 32 |     ./scripts/start-all-services.sh
 33 |     ```
 34 | 
 35 |     This will launch a Zellij session with all services running. By default, WSL forwards these ports (11434, 10300, 10200, 10400) to your Windows `localhost`.
 36 | 
 37 | ---
 38 | 
 39 | ## Part 2: The "Ears" (Windows Side)
 40 | 
 41 | Now we install the client on Windows so it can access your hardware (microphone) and interact with your desktop (clipboard).
 42 | 
 43 | ### 1. Install uv
 44 | If you haven't installed `uv` yet, run this in PowerShell:
 45 | ```powershell
 46 | powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
 47 | ```
 48 | `uv` will automatically manage the required Python version for the tool.
 49 | 
 50 | ### 2. Install agent-cli
 51 | Run the following command to install the tool:
 52 | 
 53 | ```powershell
 54 | uv tool install agent-cli
 55 | ```
 56 | 
 57 | > [!NOTE]
 58 | > `agent-cli` uses **sounddevice** for audio, which automatically includes the necessary PortAudio binaries for Windows. You typically do not need to install any external drivers or libraries manually.
 59 | 
 60 | ### 3. Test the Connection
 61 | Run a command in PowerShell to verify that Windows can talk to the WSL services:
 62 | 
 63 | ```powershell
 64 | # This records audio on Windows -> sends to WSL -> copies text to Windows clipboard
 65 | agent-cli transcribe
 66 | ```
 67 | 
 68 | ---
 69 | 
 70 | ## Part 3: Automation (AutoHotkey)
 71 | 
 72 | To invoke these commands globally (like the macOS/Linux hotkeys), use [AutoHotkey v2](https://www.autohotkey.com/).
 73 | 
 74 | 1.  Create a file named `agent-cli.ahk`.
 75 | 2.  Paste the following script:
 76 | 
 77 | ```autohotkey
 78 | #Requires AutoHotkey v2.0
 79 | Persistent  ; Keep script running with tray icon
 80 | 
 81 | ; Win+Shift+W to toggle transcription (W for Whisper)
 82 | #+w::{
 83 |     statusFile := A_Temp . "\agent-cli-status.txt"
 84 |     cmd := Format('{1} /C agent-cli transcribe --status > "{2}" 2>&1', A_ComSpec, statusFile)
 85 |     RunWait(cmd, , "Hide")
 86 |     status := FileRead(statusFile)
 87 |     if InStr(status, "not running") {
 88 |         TrayTip("🎤 Starting transcription...", "agent-cli", 1)
 89 |         Run("agent-cli transcribe --toggle --input-device-index 1", , "Hide")  ; adjust device index if needed
 90 |     } else {
 91 |         TrayTip("🛑 Stopping transcription...", "agent-cli", 1)
 92 |         Run("agent-cli transcribe --toggle", , "Hide")
 93 |     }
 94 | }
 95 | 
 96 | ; Win+Shift+A to autocorrect clipboard
 97 | #+a::{
 98 |     TrayTip("✍️ Autocorrecting clipboard...", "agent-cli", 1)
 99 |     Run("agent-cli autocorrect", , "Hide")
100 | }
101 | 
102 | ; Win+Shift+E to voice edit selection
103 | #+e::{
104 |     Send("^c")
105 |     ClipWait(1)
106 |     TrayTip("🗣️ Voice editing selection...", "agent-cli", 1)
107 |     Run("agent-cli voice-edit --input-device-index 1", , "Hide")  ; adjust device index if needed
108 | }
109 | ```
110 | 
111 | 3.  Double-click the script to run it.
112 | 
113 | > [!TIP]
114 | > Using `--toggle` stops an existing background recorder if it's already running, so you can press the same hotkey to start/stop the session without leaving a stray process behind.
115 | 
116 | **Note on Audio Devices:**
117 | If `agent-cli` doesn't pick up your microphone, run `agent-cli transcribe --list-devices` to find the correct `--input-device-index`.
118 | 


--------------------------------------------------------------------------------
/tests/test_asr.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for the asr module."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from unittest.mock import AsyncMock, MagicMock, patch
  6 | 
  7 | import pytest
  8 | from wyoming.asr import Transcribe, Transcript, TranscriptChunk
  9 | from wyoming.audio import AudioChunk, AudioStart, AudioStop
 10 | 
 11 | from agent_cli.services import asr
 12 | 
 13 | 
 14 | @pytest.mark.asyncio
 15 | async def test_send_audio() -> None:
 16 |     """Test that _send_audio sends the correct events."""
 17 |     # Arrange
 18 |     client = AsyncMock()
 19 |     stream = MagicMock()
 20 |     stop_event = MagicMock()
 21 |     stop_event.is_set.side_effect = [False, True]  # Allow one iteration then stop
 22 |     stop_event.ctrl_c_pressed = False
 23 | 
 24 |     mock_data = MagicMock()
 25 |     mock_data.tobytes.return_value = b"fake_audio_chunk"
 26 |     stream.read.return_value = (mock_data, False)
 27 |     logger = MagicMock()
 28 | 
 29 |     # Act
 30 |     # No need to create a task and sleep, just await the coroutine.
 31 |     # The side_effect will stop the loop.
 32 |     await asr._send_audio(
 33 |         client,
 34 |         stream,
 35 |         stop_event,
 36 |         logger,
 37 |         live=MagicMock(),
 38 |         quiet=False,
 39 |         save_recording=False,
 40 |     )
 41 | 
 42 |     # Assert
 43 |     assert client.write_event.call_count == 4
 44 |     client.write_event.assert_any_call(Transcribe().event())
 45 |     client.write_event.assert_any_call(
 46 |         AudioStart(rate=16000, width=2, channels=1).event(),
 47 |     )
 48 |     client.write_event.assert_any_call(
 49 |         AudioChunk(
 50 |             rate=16000,
 51 |             width=2,
 52 |             channels=1,
 53 |             audio=b"fake_audio_chunk",
 54 |         ).event(),
 55 |     )
 56 |     client.write_event.assert_any_call(AudioStop().event())
 57 | 
 58 | 
 59 | @pytest.mark.asyncio
 60 | async def test_receive_text() -> None:
 61 |     """Test that receive_transcript correctly processes events."""
 62 |     # Arrange
 63 |     client = AsyncMock()
 64 |     client.read_event.side_effect = [
 65 |         TranscriptChunk(text="hello").event(),
 66 |         Transcript(text="hello world").event(),
 67 |         None,  # To stop the loop
 68 |     ]
 69 |     logger = MagicMock()
 70 |     chunk_callback = MagicMock()
 71 |     final_callback = MagicMock()
 72 | 
 73 |     # Act
 74 |     result = await asr._receive_transcript(
 75 |         client,
 76 |         logger,
 77 |         chunk_callback=chunk_callback,
 78 |         final_callback=final_callback,
 79 |     )
 80 | 
 81 |     # Assert
 82 |     assert result == "hello world"
 83 |     chunk_callback.assert_called_once_with("hello")
 84 |     final_callback.assert_called_once_with("hello world")
 85 | 
 86 | 
 87 | def test_create_transcriber():
 88 |     """Test that the correct transcriber is returned."""
 89 |     provider_cfg = MagicMock()
 90 |     provider_cfg.asr_provider = "openai"
 91 |     transcriber = asr.create_transcriber(
 92 |         provider_cfg,
 93 |         MagicMock(),
 94 |         MagicMock(),
 95 |         MagicMock(),
 96 |     )
 97 |     assert transcriber.func is asr._transcribe_live_audio_openai
 98 | 
 99 |     provider_cfg.asr_provider = "wyoming"
100 |     transcriber = asr.create_transcriber(
101 |         provider_cfg,
102 |         MagicMock(),
103 |         MagicMock(),
104 |         MagicMock(),
105 |     )
106 |     assert transcriber.func is asr._transcribe_live_audio_wyoming
107 | 
108 | 
109 | def test_create_recorded_audio_transcriber():
110 |     """Test that the correct recorded audio transcriber is returned."""
111 |     provider_cfg = MagicMock()
112 |     provider_cfg.asr_provider = "openai"
113 |     transcriber = asr.create_recorded_audio_transcriber(provider_cfg)
114 |     assert transcriber is asr.transcribe_audio_openai
115 | 
116 |     provider_cfg.asr_provider = "wyoming"
117 |     transcriber = asr.create_recorded_audio_transcriber(provider_cfg)
118 |     assert transcriber is asr._transcribe_recorded_audio_wyoming
119 | 
120 | 
121 | @pytest.mark.asyncio
122 | @patch("agent_cli.services.asr.wyoming_client_context", side_effect=ConnectionRefusedError)
123 | async def test_transcribe_recorded_audio_wyoming_connection_error(
124 |     mock_wyoming_client_context: MagicMock,
125 | ):
126 |     """Test that transcribe_recorded_audio_wyoming handles ConnectionRefusedError."""
127 |     result = await asr._transcribe_recorded_audio_wyoming(
128 |         audio_data=b"test",
129 |         wyoming_asr_cfg=MagicMock(),
130 |         logger=MagicMock(),
131 |     )
132 |     assert result == ""
133 |     mock_wyoming_client_context.assert_called_once()
134 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools>=42", "wheel", "versioningit"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [project]
  6 | name = "agent-cli"
  7 | description = "A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance."
  8 | dynamic = ["version"]
  9 | authors = [{ name = "Bas Nijholt", email = "bas@nijho.lt" }]
 10 | dependencies = [
 11 |     "wyoming",
 12 |     "sounddevice",
 13 |     "numpy",
 14 |     "rich",
 15 |     "pyperclip",
 16 |     "pydantic-ai-slim[openai,duckduckgo,vertexai]",
 17 |     "typer",
 18 |     "openai",
 19 |     "dotenv",
 20 |     "google-genai>=1.25.0",
 21 |     "httpx",
 22 |     "psutil; sys_platform == 'win32'",
 23 | ]
 24 | requires-python = ">=3.11"
 25 | 
 26 | [project.readme]
 27 | file = "README.md"
 28 | content-type = "text/markdown"
 29 | 
 30 | [project.urls]
 31 | Homepage = "https://github.com/basnijholt/agent-cli"
 32 | 
 33 | [project.optional-dependencies]
 34 | server = [
 35 |     "fastapi[standard]",
 36 | ]
 37 | rag = [
 38 |     "fastapi[standard]",
 39 |     "chromadb>=0.4.22",
 40 |     "onnxruntime>=1.17.0",
 41 |     "huggingface-hub>=0.20.0",
 42 |     "transformers>=4.30.0",
 43 |     "watchfiles>=0.21.0",
 44 |     # Until here same as 'memory' extras
 45 |     "markitdown[docx,pdf,pptx]>=0.1.3",
 46 | ]
 47 | memory = [
 48 |     "fastapi[standard]",
 49 |     "chromadb>=0.4.22",
 50 |     "onnxruntime>=1.17.0",
 51 |     "huggingface-hub>=0.20.0",
 52 |     "transformers>=4.30.0",
 53 |     "watchfiles>=0.21.0",
 54 |     # Until here same as 'rag' extras
 55 |     "pyyaml>=6.0.0",
 56 | ]
 57 | test = [
 58 |     "pytest>=7.0.0",
 59 |     "pytest-asyncio>=0.20.0",
 60 |     "pytest-cov>=4.0.0",
 61 |     "pydantic-ai-slim[openai]",
 62 |     "pytest-timeout",
 63 |     "pytest-mock",
 64 | ]
 65 | dev = [
 66 |     "agent-cli[test]",
 67 |     "pre-commit>=3.0.0",
 68 |     "versioningit",
 69 |     "markdown-code-runner",
 70 |     "ruff",
 71 |     "notebook",
 72 | ]
 73 | speed = ["audiostretchy>=1.3.0"]
 74 | 
 75 | # Duplicate of test+dev optional-dependencies groups
 76 | [dependency-groups]
 77 | dev = [
 78 |     "pytest>=7.0.0",
 79 |     "pytest-asyncio>=0.20.0",
 80 |     "pytest-cov>=4.0.0",
 81 |     "pydantic-ai-slim[openai]",
 82 |     "pytest-timeout",
 83 |     "pytest-mock",
 84 |     "pre-commit>=3.0.0",
 85 |     "versioningit",
 86 |     "markdown-code-runner",
 87 |     "ruff",
 88 |     "notebook",
 89 |     "audiostretchy>=1.3.0",
 90 |     "pre-commit-uv>=4.1.4",
 91 | ]
 92 | 
 93 | [project.scripts]
 94 | agent-cli = "agent_cli.cli:app"
 95 | 
 96 | [tool.setuptools.packages.find]
 97 | include = ["agent_cli.*", "agent_cli"]
 98 | 
 99 | [tool.setuptools.package-data]
100 | "agent_cli" = ["py.typed", "scripts/**/*", "example-config.toml"]
101 | 
102 | [tool.pytest.ini_options]
103 | asyncio_default_fixture_loop_scope = "function"
104 | filterwarnings = [
105 |     "ignore::DeprecationWarning",
106 |     "ignore::PendingDeprecationWarning",
107 | ]
108 | timeout = 10
109 | norecursedirs = ["rag_db", "memory_db", "build", "dist"]
110 | addopts = [
111 |     "--cov=agent_cli",
112 |     "--cov-report=term",
113 |     "--cov-report=html",
114 |     "--cov-report=xml",
115 |     "--no-cov-on-fail",
116 |     "-v",
117 | ]
118 | 
119 | [tool.coverage.run]
120 | omit = []
121 | 
122 | [tool.coverage.report]
123 | exclude_lines = [
124 |     "pragma: no cover",
125 |     "raise NotImplementedError",
126 |     "if TYPE_CHECKING:",
127 |     'if __name__ == "__main__":',
128 | ]
129 | 
130 | [tool.black]
131 | line-length = 100
132 | 
133 | [tool.ruff]
134 | line-length = 100
135 | target-version = "py311"
136 | 
137 | [tool.ruff.lint]
138 | select = ["ALL"]
139 | ignore = [
140 |     "T20",
141 |     "S101",
142 |     "S603",
143 |     "ANN401",
144 |     "D402",
145 |     "PLW0603",
146 |     "D401",
147 |     "SLF001",
148 |     "PLR0913",
149 |     "TD002",
150 |     "ANN002",
151 |     "ANN003",
152 |     "E501",
153 |     "TRY300",
154 |     "FBT001",  # Boolean-typed positional argument in function definition
155 |     "FBT002",  # Boolean-typed keyword-only argument in function definition
156 |     "BLE001",  # Do not catch blind exception: `Exception`
157 | ]
158 | 
159 | [tool.ruff.lint.per-file-ignores]
160 | "tests/*" = ["SLF001", "D103", "E501", "PLR2004", "S101", "ANN201", "FBT003"]
161 | "tests/test_examples.py" = ["E501"]
162 | ".github/*" = ["INP001"]
163 | "example/*" = ["INP001", "D100"]
164 | "docs/*" = ["INP001", "E501"]
165 | 
166 | [tool.ruff.lint.mccabe]
167 | max-complexity = 18
168 | 
169 | [tool.mypy]
170 | python_version = "3.11"
171 | 
172 | [tool.versioningit]
173 | default-version = "0.0.0"
174 | 
175 | [tool.versioningit.vcs]
176 | method = "git"
177 | match = ["v*"]
178 | default-tag = "0.0.0"
179 | 


--------------------------------------------------------------------------------
/agent_cli/agents/_voice_agent_common.py:
--------------------------------------------------------------------------------
  1 | r"""Common functionalities for voice-based agents."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import logging
  6 | import time
  7 | from typing import TYPE_CHECKING
  8 | 
  9 | import pyperclip
 10 | 
 11 | from agent_cli.core.utils import print_input_panel, print_with_style
 12 | from agent_cli.services import asr
 13 | from agent_cli.services.llm import process_and_update_clipboard
 14 | from agent_cli.services.tts import handle_tts_playback
 15 | 
 16 | if TYPE_CHECKING:
 17 |     from rich.live import Live
 18 | 
 19 |     from agent_cli import config
 20 | 
 21 | LOGGER = logging.getLogger()
 22 | 
 23 | 
 24 | async def get_instruction_from_audio(
 25 |     *,
 26 |     audio_data: bytes,
 27 |     provider_cfg: config.ProviderSelection,
 28 |     audio_input_cfg: config.AudioInput,
 29 |     wyoming_asr_cfg: config.WyomingASR,
 30 |     openai_asr_cfg: config.OpenAIASR,
 31 |     ollama_cfg: config.Ollama,
 32 |     logger: logging.Logger,
 33 |     quiet: bool,
 34 | ) -> str | None:
 35 |     """Transcribe audio data and return the instruction."""
 36 |     try:
 37 |         start_time = time.monotonic()
 38 |         transcriber = asr.create_recorded_audio_transcriber(provider_cfg)
 39 |         instruction = await transcriber(
 40 |             audio_data=audio_data,
 41 |             provider_cfg=provider_cfg,
 42 |             audio_input_cfg=audio_input_cfg,
 43 |             wyoming_asr_cfg=wyoming_asr_cfg,
 44 |             openai_asr_cfg=openai_asr_cfg,
 45 |             ollama_cfg=ollama_cfg,
 46 |             logger=logger,
 47 |             quiet=quiet,
 48 |         )
 49 |         elapsed = time.monotonic() - start_time
 50 | 
 51 |         if not instruction or not instruction.strip():
 52 |             if not quiet:
 53 |                 print_with_style(
 54 |                     "No speech detected in recording",
 55 |                     style="yellow",
 56 |                 )
 57 |             return None
 58 | 
 59 |         if not quiet:
 60 |             print_input_panel(
 61 |                 instruction,
 62 |                 title="🎯 Instruction",
 63 |                 style="bold yellow",
 64 |                 subtitle=f"[dim]took {elapsed:.2f}s[/dim]",
 65 |             )
 66 | 
 67 |         return instruction
 68 | 
 69 |     except Exception as e:
 70 |         logger.exception("Failed to process audio with ASR")
 71 |         if not quiet:
 72 |             print_with_style(f"ASR processing failed: {e}", style="red")
 73 |         return None
 74 | 
 75 | 
 76 | async def process_instruction_and_respond(
 77 |     *,
 78 |     instruction: str,
 79 |     original_text: str,
 80 |     provider_cfg: config.ProviderSelection,
 81 |     general_cfg: config.General,
 82 |     ollama_cfg: config.Ollama,
 83 |     openai_llm_cfg: config.OpenAILLM,
 84 |     gemini_llm_cfg: config.GeminiLLM,
 85 |     audio_output_cfg: config.AudioOutput,
 86 |     wyoming_tts_cfg: config.WyomingTTS,
 87 |     openai_tts_cfg: config.OpenAITTS,
 88 |     kokoro_tts_cfg: config.KokoroTTS,
 89 |     system_prompt: str,
 90 |     agent_instructions: str,
 91 |     live: Live | None,
 92 |     logger: logging.Logger,
 93 | ) -> None:
 94 |     """Process instruction with LLM and handle TTS response."""
 95 |     # Process with LLM if clipboard mode is enabled
 96 |     if general_cfg.clipboard:
 97 |         await process_and_update_clipboard(
 98 |             system_prompt=system_prompt,
 99 |             agent_instructions=agent_instructions,
100 |             provider_cfg=provider_cfg,
101 |             ollama_cfg=ollama_cfg,
102 |             openai_cfg=openai_llm_cfg,
103 |             gemini_cfg=gemini_llm_cfg,
104 |             logger=logger,
105 |             original_text=original_text,
106 |             instruction=instruction,
107 |             clipboard=general_cfg.clipboard,
108 |             quiet=general_cfg.quiet,
109 |             live=live,
110 |         )
111 | 
112 |         # Handle TTS response if enabled
113 |         if audio_output_cfg.enable_tts:
114 |             response_text = pyperclip.paste()
115 |             if response_text and response_text.strip():
116 |                 await handle_tts_playback(
117 |                     text=response_text,
118 |                     provider_cfg=provider_cfg,
119 |                     audio_output_cfg=audio_output_cfg,
120 |                     wyoming_tts_cfg=wyoming_tts_cfg,
121 |                     openai_tts_cfg=openai_tts_cfg,
122 |                     kokoro_tts_cfg=kokoro_tts_cfg,
123 |                     save_file=general_cfg.save_file,
124 |                     quiet=general_cfg.quiet,
125 |                     logger=logger,
126 |                     play_audio=not general_cfg.save_file,
127 |                     status_message="🔊 Speaking response...",
128 |                     description="TTS audio",
129 |                     live=live,
130 |                 )
131 | 


--------------------------------------------------------------------------------
/docs/installation/macos.md:
--------------------------------------------------------------------------------
  1 | # macOS Native Installation
  2 | 
  3 | Native macOS setup with full Metal GPU acceleration for optimal performance.
  4 | 
  5 | > **🍎 Recommended for macOS**
  6 | > This setup provides ~10x better performance than Docker by utilizing Metal GPU acceleration.
  7 | 
  8 | ## Prerequisites
  9 | 
 10 | - macOS 12 Monterey or later
 11 | - 8GB+ RAM (16GB+ recommended)
 12 | - 10GB free disk space
 13 | - Homebrew installed
 14 | 
 15 | ## Quick Start
 16 | 
 17 | 1. **Run the setup script:**
 18 | 
 19 |    ```bash
 20 |    scripts/setup-macos.sh
 21 |    ```
 22 | 
 23 | 2. **Start all services:**
 24 | 
 25 |    ```bash
 26 |    scripts/start-all-services.sh
 27 |    ```
 28 | 
 29 | 3. **Install agent-cli:**
 30 | 
 31 |    ```bash
 32 |    uv tool install agent-cli
 33 |    # or: pip install agent-cli
 34 |    ```
 35 | 
 36 | 4. **Test the setup:**
 37 |    ```bash
 38 |    agent-cli autocorrect "this has an eror"
 39 |    ```
 40 | 
 41 | ## What the Setup Does
 42 | 
 43 | The `setup-macos.sh` script:
 44 | 
 45 | - ✅ Checks for Homebrew
 46 | - ✅ Installs `uv` if needed
 47 | - ✅ Installs/checks Ollama (native macOS app)
 48 | - ✅ Installs Zellij for session management
 49 | - ✅ Prepares Wyoming service runners
 50 | 
 51 | ## Services Overview
 52 | 
 53 | | Service          | Implementation         | Port  | GPU Support          |
 54 | | ---------------- | ---------------------- | ----- | -------------------- |
 55 | | **Ollama**       | Native macOS app       | 11434 | ✅ Metal GPU         |
 56 | | **Whisper**      | Wyoming MLX Whisper    | 10300 | ✅ Apple Silicon MLX |
 57 | | **Piper**        | Wyoming Piper (via uv) | 10200 | N/A                  |
 58 | | **OpenWakeWord** | Wyoming OpenWakeWord   | 10400 | N/A                  |
 59 | 
 60 | > **Note:** Whisper uses [wyoming-mlx-whisper](https://github.com/basnijholt/wyoming-mlx-whisper) with `whisper-large-v3-turbo` for near real-time transcription on Apple Silicon.
 61 | 
 62 | ## Session Management with Zellij
 63 | 
 64 | The setup uses Zellij for managing all services in one session:
 65 | 
 66 | ### Starting Services
 67 | 
 68 | ```bash
 69 | scripts/start-all-services.sh
 70 | ```
 71 | 
 72 | ### Zellij Commands
 73 | 
 74 | - `Ctrl-O d` - Detach (services keep running)
 75 | - `zellij attach agent-cli` - Reattach to session
 76 | - `zellij list-sessions` - List all sessions
 77 | - `zellij kill-session agent-cli` - Stop all services
 78 | - `Alt + arrow keys` - Navigate between panes
 79 | - `Ctrl-Q` - Quit (stops all services)
 80 | 
 81 | ## Manual Service Management
 82 | 
 83 | If you prefer running services individually:
 84 | 
 85 | ```bash
 86 | # Terminal 1: Ollama (native GPU acceleration)
 87 | ollama serve
 88 | 
 89 | # Terminal 2: Whisper (CPU optimized)
 90 | scripts/run-whisper.sh
 91 | 
 92 | # Terminal 3: Piper (Apple Silicon compatible)
 93 | scripts/run-piper.sh
 94 | 
 95 | # Terminal 4: OpenWakeWord (macOS compatible fork)
 96 | scripts/run-openwakeword.sh
 97 | ```
 98 | 
 99 | ## Why Native Setup?
100 | 
101 | - **10x faster than Docker** - Full Metal GPU acceleration
102 | - **Better resource usage** - Native integration with macOS
103 | - **Automatic model management** - Services handle downloads
104 | 
105 | ## Troubleshooting
106 | 
107 | ### Terminal-notifier Popup Issues
108 | 
109 | - Ensure Settings > Notifications > terminal-notifier > Allow Notifications is enabled.
110 | - For a persistent “Listening…” badge, set the Alert style to **Persistent** (or choose **Alerts** on macOS versions that still offer Alert/Banner). This keeps the recording indicator visible while other notifications still auto-dismiss automatically.
111 | 
112 | ### Ollama Issues
113 | 
114 | ```bash
115 | # Check if Ollama is running
116 | ollama list
117 | 
118 | # Pull a model manually
119 | ollama pull gemma3:4b
120 | 
121 | # Check Ollama logs
122 | tail -f ~/.ollama/logs/server.log
123 | ```
124 | 
125 | ### Service Port Conflicts
126 | 
127 | ```bash
128 | # Check what's using a port
129 | lsof -i :11434
130 | lsof -i :10300
131 | lsof -i :10200
132 | lsof -i :10400
133 | ```
134 | 
135 | ### uv/Python Issues
136 | 
137 | ```bash
138 | # Reinstall uv
139 | brew reinstall uv
140 | 
141 | # Check uv installation
142 | uv --version
143 | ```
144 | 
145 | ### Zellij Issues
146 | 
147 | ```bash
148 | # Kill stuck sessions
149 | zellij kill-all-sessions
150 | 
151 | # Check session status
152 | zellij list-sessions
153 | 
154 | # Start without Zellij (manual)
155 | # Run each script in separate terminals
156 | ```
157 | 
158 | ### Memory/Performance Issues
159 | 
160 | - Close other apps to free RAM
161 | - Check Activity Monitor for high CPU/Memory usage
162 | - Services will automatically download required models
163 | 
164 | ## Alternative: Docker
165 | 
166 | If you prefer Docker despite performance limitations:
167 | 
168 | - [Docker Setup Guide](docker.md)
169 | - Note: ~10x slower due to no GPU acceleration
170 | 


--------------------------------------------------------------------------------
/agent_cli/memory/_prompt.py:
--------------------------------------------------------------------------------
 1 | """Centralized prompts for memory LLM calls."""
 2 | 
 3 | FACT_SYSTEM_PROMPT = """
 4 | You are a memory extractor. From the latest exchange, return 1-3 concise fact sentences based ONLY on user messages.
 5 | 
 6 | Guidelines:
 7 | - If there is no meaningful fact, return [].
 8 | - Ignore assistant/system content completely.
 9 | - Facts must be short, readable sentences (e.g., "The user's wife is Anne.", "Planning a trip to Japan next spring.").
10 | - Do not return acknowledgements, questions, or meta statements; only factual statements from the user.
11 | - NEVER output refusals like "I cannot..." or "I don't know..." or "I don't have that information". If you can't extract a fact, return [].
12 | - Return a JSON list of strings.
13 | 
14 | Few-shots:
15 | - Input: User: "Hi." / Assistant: "Hello" -> []
16 | - Input: User: "My wife is Anne." / Assistant: "Got it." -> ["The user's wife is Anne."]
17 | - Input: User: "I like biking on weekends." / Assistant: "Cool!" -> ["User likes biking on weekends."]
18 | """.strip()
19 | 
20 | FACT_INSTRUCTIONS = """
21 | Return only factual sentences grounded in the user text. No assistant acknowledgements or meta-text.
22 | """.strip()
23 | 
24 | UPDATE_MEMORY_PROMPT = """You are a smart memory manager which controls the memory of a system.
25 | You can perform four operations: (1) ADD into the memory, (2) UPDATE the memory, (3) DELETE from the memory, and (4) NONE (no change).
26 | 
27 | Compare new facts with existing memory. For each new fact, decide whether to:
28 | - ADD: Add it to the memory as a new element (new information not present in any existing memory)
29 | - UPDATE: Update an existing memory element (only if facts are about THE SAME TOPIC, e.g., both about pizza preferences)
30 | - DELETE: Delete an existing memory element (if new fact explicitly contradicts it)
31 | - NONE: Make no change (if fact is already present, a duplicate, or the existing memory is unrelated to new facts)
32 | 
33 | **Guidelines:**
34 | 
35 | 1. **ADD**: If the new fact contains new information not present in any existing memory, add it with a new ID.
36 |    - Existing unrelated memories should have event "NONE".
37 | - **Example**:
38 |     - Current memory: [{"id": 0, "text": "User is a software engineer"}]
39 |     - New facts: ["Name is John"]
40 |     - Output: [
41 |         {"id": 0, "text": "User is a software engineer", "event": "NONE"},
42 |         {"id": 1, "text": "Name is John", "event": "ADD"}
43 |       ]
44 | 
45 | 2. **UPDATE**: Only if the new fact refines/expands an existing memory about THE SAME TOPIC.
46 |    - Keep the same ID, update the text.
47 |    - Example: "User likes pizza" + "User loves pepperoni pizza" → UPDATE (same topic: pizza)
48 |    - Example: "Met Sarah today" + "Went running" → NOT same topic, do NOT update!
49 | - **Example**:
50 |     - Current memory: [{"id": 0, "text": "User likes pizza"}]
51 |     - New facts: ["User loves pepperoni pizza"]
52 |     - Output: [{"id": 0, "text": "User loves pepperoni pizza", "event": "UPDATE"}]
53 | 
54 | 3. **DELETE**: If the new fact explicitly contradicts an existing memory.
55 | - **Example**:
56 |     - Current memory: [{"id": 0, "text": "Loves pizza"}, {"id": 1, "text": "Name is John"}]
57 |     - New facts: ["Hates pizza"]
58 |     - Output: [
59 |         {"id": 0, "text": "Loves pizza", "event": "DELETE"},
60 |         {"id": 1, "text": "Name is John", "event": "NONE"},
61 |         {"id": 2, "text": "Hates pizza", "event": "ADD"}
62 |       ]
63 | 
64 | 4. **NONE**: If the new fact is already present or existing memory is unrelated to new facts.
65 | - **Example**:
66 |     - Current memory: [{"id": 0, "text": "Name is John"}]
67 |     - New facts: ["Name is John"]
68 |     - Output: [{"id": 0, "text": "Name is John", "event": "NONE"}]
69 | 
70 | 5. **IMPORTANT - Unrelated topics example**:
71 |     - Current memory: [{"id": 0, "text": "Met Sarah to discuss quantum computing"}]
72 |     - New facts: ["Went for a 5km run"]
73 |     - These are COMPLETELY DIFFERENT topics (meeting vs running). Do NOT use UPDATE!
74 |     - Output: [
75 |         {"id": 0, "text": "Met Sarah to discuss quantum computing", "event": "NONE"},
76 |         {"id": 1, "text": "Went for a 5km run", "event": "ADD"}
77 |       ]
78 | 
79 | **CRITICAL RULES:**
80 | - You MUST return ALL memories (existing + new) in your response.
81 | - Each existing memory MUST have an event (NONE, UPDATE, or DELETE).
82 | - Each genuinely NEW fact (not related to any existing memory) MUST be ADDed with a new ID.
83 | - Do NOT use UPDATE for unrelated topics! "Met Sarah" and "Went running" are DIFFERENT topics → use NONE for existing + ADD for new.
84 | 
85 | Return ONLY a JSON list. No prose or code fences.""".strip()
86 | 
87 | SUMMARY_PROMPT = """
88 | You are a concise conversation summarizer. Update the running summary with the new facts.
89 | Keep it brief, factual, and focused on durable information; do not restate transient chit-chat.
90 | Prefer aggregating related facts into compact statements; drop redundancies.
91 | """.strip()
92 | 


--------------------------------------------------------------------------------