├── docpixie
    ├── py.typed
    ├── core
    │   ├── __init__.py
    │   ├── utils.py
    │   └── config.py
    ├── cli
    │   ├── __init__.py
    │   ├── widgets
    │   │   ├── __init__.py
    │   │   └── command_palette.py
    │   ├── styles.py
    │   ├── task_display.py
    │   ├── commands.py
    │   ├── config.py
    │   ├── state_manager.py
    │   ├── event_handlers.py
    │   ├── docpixie_manager.py
    │   ├── legacy.py
    │   └── conversation_storage.py
    ├── ai
    │   ├── __init__.py
    │   ├── query_classifier.py
    │   ├── query_reformulator.py
    │   ├── summarizer.py
    │   ├── page_selector.py
    │   ├── synthesizer.py
    │   └── context_processor.py
    ├── utils
    │   ├── __init__.py
    │   └── async_helpers.py
    ├── models
    │   ├── __init__.py
    │   ├── agent.py
    │   └── document.py
    ├── storage
    │   ├── __init__.py
    │   ├── base.py
    │   └── memory.py
    ├── processors
    │   ├── __init__.py
    │   ├── base.py
    │   ├── factory.py
    │   ├── image.py
    │   └── pdf.py
    ├── providers
    │   ├── __init__.py
    │   ├── factory.py
    │   ├── base.py
    │   ├── openai.py
    │   ├── openrouter.py
    │   └── anthropic.py
    ├── __init__.py
    ├── cli.py
    └── exceptions.py
├── screenshot.png
├── setup.py
├── MANIFEST.in
├── requirements.txt
├── LICENSE
├── pyproject.toml
├── .gitignore
├── README.md
├── CLAUDE.md
└── docs
    └── cli-tool.md


/docpixie/py.typed:
--------------------------------------------------------------------------------
1 | # Marker file for PEP 561


--------------------------------------------------------------------------------
/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qnguyen3/docpixie/HEAD/screenshot.png


--------------------------------------------------------------------------------
/docpixie/core/__init__.py:
--------------------------------------------------------------------------------
1 | """Core DocPixie components"""
2 | 
3 | from .config import DocPixieConfig
4 | 
5 | __all__ = ["DocPixieConfig"]


--------------------------------------------------------------------------------
/docpixie/cli/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | DocPixie CLI - Terminal User Interface for document chat
3 | """
4 | 
5 | from .app import main
6 | 
7 | __all__ = ["main"]


--------------------------------------------------------------------------------
/docpixie/ai/__init__.py:
--------------------------------------------------------------------------------
1 | """AI operations and business logic components"""
2 | 
3 | from .summarizer import PageSummarizer
4 | 
5 | __all__ = [
6 |     "PageSummarizer"
7 | ]


--------------------------------------------------------------------------------
/docpixie/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Utility functions and helpers"""
2 | 
3 | from .async_helpers import sync_wrapper, ensure_async
4 | 
5 | __all__ = [
6 |     "sync_wrapper",
7 |     "ensure_async"
8 | ]


--------------------------------------------------------------------------------
/docpixie/models/__init__.py:
--------------------------------------------------------------------------------
1 | """Document models and data structures"""
2 | 
3 | from .document import Document, Page, QueryResult, QueryMode
4 | 
5 | __all__ = ["Document", "Page", "QueryResult", "QueryMode"]


--------------------------------------------------------------------------------
/docpixie/storage/__init__.py:
--------------------------------------------------------------------------------
 1 | """Storage backends for documents and metadata"""
 2 | 
 3 | from .base import BaseStorage
 4 | from .local import LocalStorage
 5 | from .memory import InMemoryStorage
 6 | 
 7 | __all__ = [
 8 |     "BaseStorage",
 9 |     "LocalStorage",
10 |     "InMemoryStorage"
11 | ]


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Setup script for DocPixie package.
 4 | This file exists for backward compatibility with older pip versions.
 5 | The actual configuration is in pyproject.toml.
 6 | """
 7 | 
 8 | from setuptools import setup
 9 | 
10 | if __name__ == "__main__":
11 |     setup()


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.md
 2 | include LICENSE
 3 | include requirements.txt
 4 | recursive-include docpixie *.py
 5 | include docpixie/py.typed
 6 | recursive-include docs *.md
 7 | recursive-exclude * __pycache__
 8 | recursive-exclude * *.py[co]
 9 | recursive-exclude tests *
10 | recursive-exclude documents *
11 | recursive-exclude docpixie_data *


--------------------------------------------------------------------------------
/docpixie/processors/__init__.py:
--------------------------------------------------------------------------------
 1 | """Document processors for different file types"""
 2 | 
 3 | from .base import BaseProcessor
 4 | from .pdf import PDFProcessor
 5 | from .image import ImageProcessor
 6 | from .factory import ProcessorFactory
 7 | 
 8 | __all__ = [
 9 |     "BaseProcessor",
10 |     "PDFProcessor", 
11 |     "ImageProcessor",
12 |     "ProcessorFactory"
13 | ]


--------------------------------------------------------------------------------
/docpixie/providers/__init__.py:
--------------------------------------------------------------------------------
 1 | """Vision AI providers for DocPixie"""
 2 | 
 3 | from .base import BaseProvider
 4 | from .openai import OpenAIProvider
 5 | from .anthropic import AnthropicProvider
 6 | from .openrouter import OpenRouterProvider
 7 | from .factory import create_provider
 8 | 
 9 | __all__ = [
10 |     "BaseProvider",
11 |     "OpenAIProvider", 
12 |     "AnthropicProvider",
13 |     "OpenRouterProvider",
14 |     "create_provider"
15 | ]


--------------------------------------------------------------------------------
/docpixie/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | DocPixie - Simplified Multimodal RAG Library
 3 | 
 4 | A lightweight, vision-based document question-answering system
 5 | that doesn't require vector databases or embedding models.
 6 | """
 7 | 
 8 | __version__ = "0.1.0"
 9 | 
10 | from .docpixie import DocPixie
11 | from .models.document import Document, Page, QueryResult, QueryMode
12 | from .models.agent import ConversationMessage
13 | from .core.config import DocPixieConfig
14 | from .providers import BaseProvider, create_provider
15 | 
16 | __all__ = [
17 |     "DocPixie",
18 |     "Document",
19 |     "Page", 
20 |     "QueryResult",
21 |     "QueryMode",
22 |     "ConversationMessage",
23 |     "DocPixieConfig",
24 |     "BaseProvider",
25 |     "create_provider"
26 | ]


--------------------------------------------------------------------------------
/docpixie/cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | DocPixie CLI - Modern terminal interface for document chat
 4 | """
 5 | 
 6 | import sys
 7 | 
 8 | 
 9 | def main():
10 |     """Main entry point for DocPixie CLI"""
11 |     try:
12 |         # Try to import and use the new Textual CLI
13 |         from docpixie.cli.app import main as textual_main
14 |         textual_main()
15 |     except ImportError as e:
16 |         # Fallback to legacy CLI if Textual is not installed
17 |         print("Note: Textual not installed. Using legacy CLI.")
18 |         print("Install with: pip install textual>=0.47.0")
19 |         print("")
20 |         
21 |         from docpixie.cli.legacy import main as legacy_main
22 |         legacy_main()
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/docpixie/cli/widgets/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | DocPixie CLI Widgets
 3 | """
 4 | 
 5 | from .command_palette import DocPixieCommandPalette as CommandPalette, CommandSelected, CommandAutoComplete
 6 | from .conversation_manager import ConversationManagerDialog, ConversationSelected, ConversationDeleted
 7 | from .model_selector import ModelSelectorDialog, ModelSelected
 8 | from .document_manager import DocumentManagerDialog, DocumentRemoved, DocumentsIndexed
 9 | from .chat_area import ChatArea
10 | 
11 | __all__ = [
12 |     "CommandPalette", "CommandSelected", "CommandAutoComplete",
13 |     "ConversationManagerDialog", "ConversationSelected", "ConversationDeleted",
14 |     "ModelSelectorDialog", "ModelSelected",
15 |     "DocumentManagerDialog", "DocumentRemoved", "DocumentsIndexed",
16 |     "ChatArea"
17 | ]


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # DocPixie Open Source Library Dependencies
 2 | 
 3 | # Core dependencies
 4 | Pillow>=10.0.0          # Image processing
 5 | PyMuPDF>=1.23.0         # PDF processing (replaces pdf2image)
 6 | 
 7 | # Optional AI provider dependencies
 8 | openai>=1.0.0           # OpenAI GPT-4V (optional)
 9 | anthropic>=0.10.0       # Anthropic Claude (optional)
10 | 
11 | # CLI dependencies
12 | textual>=0.47.0         # Terminal UI framework
13 | textual-dev>=1.3.0      # Development tools for Textual
14 | pyfiglet>=0.8.0         # ASCII art text generation
15 | 
16 | # Development and testing
17 | pytest>=7.0.0           # Testing framework
18 | pytest-asyncio>=0.21.0  # Async testing support
19 | 
20 | # Optional dependencies for different storage backends
21 | # boto3>=1.28.0         # AWS S3 support (optional)
22 | # azure-storage-blob    # Azure Blob support (optional)


--------------------------------------------------------------------------------
/docpixie/core/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Core utility functions for DocPixie
 3 | """
 4 | import re
 5 | 
 6 | 
 7 | def sanitize_llm_json(response: str) -> str:
 8 |     """
 9 |     Sanitize JSON response from LLM by removing markdown code blocks and extra whitespace.
10 |     
11 |     LLMs sometimes wrap JSON responses with markdown code blocks like:
12 |     ```json
13 |     {"key": "value"}
14 |     ```
15 |     
16 |     This function strips those wrappers and returns clean JSON.
17 |     
18 |     Args:
19 |         response: Raw response string from LLM
20 |         
21 |     Returns:
22 |         Sanitized JSON string ready for json.loads()
23 |     """
24 |     # Strip leading/trailing whitespace
25 |     cleaned = response.strip()
26 |     
27 |     # Remove markdown code block wrappers
28 |     # Matches ```json...``` or ```...``` patterns
29 |     code_block_pattern = r'^```(?:json)?\s*\n?(.*?)\n?```$'
30 |     match = re.match(code_block_pattern, cleaned, re.DOTALL | re.IGNORECASE)
31 |     
32 |     if match:
33 |         cleaned = match.group(1).strip()
34 |     
35 |     return cleaned


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 DocPixie Team
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/docpixie/exceptions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Custom exceptions for DocPixie RAG Agent
 3 | """
 4 | 
 5 | 
 6 | class DocPixieError(Exception):
 7 |     """Base exception for DocPixie errors"""
 8 |     pass
 9 | 
10 | 
11 | class ContextProcessingError(DocPixieError):
12 |     """Error occurred during conversation context processing"""
13 |     pass
14 | 
15 | 
16 | class QueryReformulationError(DocPixieError):
17 |     """Error occurred during query reformulation"""
18 |     pass
19 | 
20 | 
21 | class QueryClassificationError(DocPixieError):
22 |     """Error occurred during query classification"""
23 |     pass
24 | 
25 | 
26 | class TaskPlanningError(DocPixieError):
27 |     """Error occurred during task planning or document selection"""
28 |     pass
29 | 
30 | 
31 | class PageSelectionError(DocPixieError):
32 |     """Error occurred during page selection"""
33 |     pass
34 | 
35 | 
36 | class TaskAnalysisError(DocPixieError):
37 |     """Error occurred during task analysis"""
38 |     pass
39 | 
40 | 
41 | class ResponseSynthesisError(DocPixieError):
42 |     """Error occurred during response synthesis"""
43 |     pass
44 | 
45 | 
46 | class DocumentSelectionError(DocPixieError):
47 |     """Error occurred during document selection"""
48 |     pass
49 | 
50 | 
51 | class PlanUpdateError(DocPixieError):
52 |     """Error occurred during adaptive plan updates"""
53 |     pass


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "docpixie"
 7 | version = "0.1.0"
 8 | description = "A lightweight, vision-based document question-answering system"
 9 | readme = "README.md"
10 | license = "MIT"
11 | authors = [
12 |     {name = "DocPixie Team"},
13 | ]
14 | maintainers = [
15 |     {name = "DocPixie Team"},
16 | ]
17 | classifiers = [
18 |     "Development Status :: 3 - Alpha",
19 |     "Intended Audience :: Developers",
20 |     "Programming Language :: Python :: 3",
21 |     "Programming Language :: Python :: 3.8",
22 |     "Programming Language :: Python :: 3.9",
23 |     "Programming Language :: Python :: 3.10",
24 |     "Programming Language :: Python :: 3.11",
25 |     "Programming Language :: Python :: 3.12",
26 |     "Topic :: Software Development :: Libraries :: Python Modules",
27 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
28 | ]
29 | requires-python = ">=3.8"
30 | dependencies = [
31 |     "Pillow>=10.0.0",
32 |     "PyMuPDF>=1.23.0",
33 |     "openai>=1.0.0",
34 |     "anthropic>=0.10.0",
35 |     "textual>=0.47.0",
36 |     "textual-dev>=1.3.0",
37 |     "pyfiglet>=0.8.0",
38 | ]
39 | 
40 | [project.optional-dependencies]
41 | dev = [
42 |     "pytest>=7.0.0",
43 |     "pytest-asyncio>=0.21.0",
44 |     "build>=1.0.0",
45 |     "twine>=4.0.0",
46 | ]
47 | 
48 | [project.urls]
49 | Homepage = "https://github.com/qnguyen3/docpixie"
50 | Documentation = "https://github.com/qnguyen3/docpixie#readme"
51 | Repository = "https://github.com/qnguyen3/docpixie.git"
52 | Issues = "https://github.com/qnguyen3/docpixie/issues"
53 | 
54 | [project.scripts]
55 | docpixie = "docpixie.cli:main"
56 | 
57 | [tool.setuptools]
58 | packages = ["docpixie", "docpixie.ai", "docpixie.cli", "docpixie.core", "docpixie.models", "docpixie.processors", "docpixie.providers", "docpixie.storage", "docpixie.utils"]
59 | 
60 | [tool.setuptools.package-data]
61 | docpixie = ["py.typed"]


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .nox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | *.py,cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | cover/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | db.sqlite3
 60 | db.sqlite3-journal
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | .pybuilder/
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # IPython
 80 | profile_default/
 81 | ipython_config.py
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # pipenv
 87 | Pipfile.lock
 88 | 
 89 | # poetry
 90 | poetry.lock
 91 | 
 92 | # pdm
 93 | .pdm.toml
 94 | 
 95 | # PEP 582
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyderproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | 
132 | # pytype static type analyzer
133 | .pytype/
134 | 
135 | # Cython debug symbols
136 | cython_debug/
137 | 
138 | # VS Code
139 | .vscode/
140 | 
141 | # PyCharm
142 | .idea/
143 | 
144 | # macOS
145 | .DS_Store
146 | 
147 | # DocPixie specific
148 | docpixie_data/
149 | documents/*.pdf
150 | *.pdf.json
151 | 
152 | # Claude AI
153 | .claude/


--------------------------------------------------------------------------------
/docpixie/utils/async_helpers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Async/sync compatibility helpers
 3 | """
 4 | 
 5 | import asyncio
 6 | import threading
 7 | from typing import Any, Awaitable, TypeVar
 8 | from functools import wraps
 9 | 
10 | T = TypeVar('T')
11 | 
12 | 
13 | def sync_wrapper(coro: Awaitable[T]) -> T:
14 |     """
15 |     Run async function in sync context
16 |     Handles both cases: existing event loop and no event loop
17 |     """
18 |     try:
19 |         # Try to get the current event loop
20 |         loop = asyncio.get_running_loop()
21 |         # We're in an async context, need to run in a new thread
22 |         return _run_in_thread(coro)
23 |     except RuntimeError:
24 |         # No running event loop, safe to use asyncio.run
25 |         return asyncio.run(coro)
26 | 
27 | 
28 | def _run_in_thread(coro: Awaitable[T]) -> T:
29 |     """Run coroutine in a separate thread with its own event loop"""
30 |     result = {"value": None, "exception": None}
31 |     
32 |     def thread_target():
33 |         try:
34 |             # Create new event loop for this thread
35 |             new_loop = asyncio.new_event_loop()
36 |             asyncio.set_event_loop(new_loop)
37 |             result["value"] = new_loop.run_until_complete(coro)
38 |         except Exception as e:
39 |             result["exception"] = e
40 |         finally:
41 |             new_loop.close()
42 |     
43 |     thread = threading.Thread(target=thread_target)
44 |     thread.start()
45 |     thread.join()
46 |     
47 |     if result["exception"]:
48 |         raise result["exception"]
49 |     
50 |     return result["value"]
51 | 
52 | 
53 | def ensure_async(func):
54 |     """
55 |     Decorator to ensure function is async-compatible
56 |     If the function is sync, wrap it to run in thread pool
57 |     """
58 |     if asyncio.iscoroutinefunction(func):
59 |         return func
60 |     
61 |     @wraps(func)
62 |     async def async_wrapper(*args, **kwargs):
63 |         loop = asyncio.get_event_loop()
64 |         return await loop.run_in_executor(None, lambda: func(*args, **kwargs))
65 |     
66 |     return async_wrapper
67 | 
68 | 
69 | def make_sync_version(async_func):
70 |     """
71 |     Create a synchronous version of an async function
72 |     """
73 |     @wraps(async_func)
74 |     def sync_version(*args, **kwargs):
75 |         coro = async_func(*args, **kwargs)
76 |         return sync_wrapper(coro)
77 |     
78 |     return sync_version


--------------------------------------------------------------------------------
/docpixie/providers/factory.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Provider factory for creating AI vision providers
 3 | """
 4 | 
 5 | from typing import Union
 6 | 
 7 | from .base import BaseProvider
 8 | from .openai import OpenAIProvider
 9 | from .anthropic import AnthropicProvider
10 | from .openrouter import OpenRouterProvider
11 | from ..core.config import DocPixieConfig
12 | 
13 | 
14 | def create_provider(config: DocPixieConfig) -> BaseProvider:
15 |     """
16 |     Create AI provider based on configuration
17 |     
18 |     Args:
19 |         config: DocPixie configuration
20 |         
21 |     Returns:
22 |         Configured provider instance
23 |         
24 |     Raises:
25 |         ValueError: If provider is not supported
26 |     """
27 |     if config.provider == "openai":
28 |         return OpenAIProvider(config)
29 |     elif config.provider == "anthropic":
30 |         return AnthropicProvider(config)
31 |     elif config.provider == "openrouter":
32 |         return OpenRouterProvider(config)
33 |     else:
34 |         raise ValueError(f"Unsupported provider: {config.provider}")
35 | 
36 | 
37 | def get_available_providers() -> list[str]:
38 |     """Get list of available provider names"""
39 |     return ["openai", "anthropic", "openrouter"]
40 | 
41 | 
42 | def validate_provider_config(provider: str, config: DocPixieConfig) -> bool:
43 |     """
44 |     Validate provider configuration
45 |     
46 |     Args:
47 |         provider: Provider name
48 |         config: Configuration to validate
49 |         
50 |     Returns:
51 |         True if configuration is valid
52 |         
53 |     Raises:
54 |         ValueError: If configuration is invalid
55 |     """
56 |     if provider not in get_available_providers():
57 |         raise ValueError(f"Unknown provider: {provider}")
58 |     
59 |     if provider == "openai":
60 |         if not config.openai_api_key:
61 |             raise ValueError("OpenAI API key is required")
62 |         if not config.vision_model:
63 |             raise ValueError("Vision model is required")
64 |         return True
65 |     
66 |     elif provider == "anthropic":
67 |         if not config.anthropic_api_key:
68 |             raise ValueError("Anthropic API key is required")
69 |         if not config.vision_model:
70 |             raise ValueError("Vision model is required")
71 |         return True
72 |     
73 |     elif provider == "openrouter":
74 |         if not config.openrouter_api_key:
75 |             raise ValueError("OpenRouter API key is required")
76 |         if not config.vision_model:
77 |             raise ValueError("Vision model is required")
78 |         return True
79 |     
80 |     return False


--------------------------------------------------------------------------------
/docpixie/ai/query_classifier.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Query Classifier - Determines if queries need document retrieval
 3 | """
 4 | 
 5 | import json
 6 | import logging
 7 | 
 8 | from ..providers.base import BaseProvider
 9 | from ..exceptions import QueryClassificationError
10 | from ..core.utils import sanitize_llm_json
11 | from .prompts import QUERY_CLASSIFICATION_PROMPT, SYSTEM_QUERY_CLASSIFIER
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | class QueryClassifier:
17 |     """
18 |     Classifies queries to determine processing strategy
19 | 
20 |     Key classification:
21 |     - needs_documents: Whether query requires document retrieval
22 |     """
23 | 
24 |     def __init__(self, provider: BaseProvider):
25 |         self.provider = provider
26 | 
27 |     async def classify_query(self, query: str) -> dict:
28 |         """
29 |         Classify a query to determine processing approach
30 | 
31 |         Args:
32 |             query: The user's query (potentially reformulated)
33 | 
34 |         Returns:
35 |             Dict with classification results:
36 |             {
37 |                 "reasoning": "explanation",
38 |                 "needs_documents": bool
39 |             }
40 | 
41 |         Raises:
42 |             QueryClassificationError: If classification fails
43 |         """
44 |         result = None
45 | 
46 |         try:
47 |             # Build classification prompt
48 |             prompt = QUERY_CLASSIFICATION_PROMPT.format(query=query)
49 | 
50 |             messages_for_api = [
51 |                 {"role": "system", "content": SYSTEM_QUERY_CLASSIFIER},
52 |                 {"role": "user", "content": prompt}
53 |             ]
54 | 
55 |             response = await self.provider.process_text_messages(
56 |                 messages=messages_for_api,
57 |                 max_tokens=1024,
58 |                 temperature=0.1
59 |             )
60 | 
61 |             # Parse JSON response
62 |             try:
63 |                 result = json.loads(sanitize_llm_json(response))
64 | 
65 |                 # Validate required fields
66 |                 if "reasoning" not in result or "needs_documents" not in result:
67 |                     raise QueryClassificationError(
68 |                         f"Missing required fields in classification response: {result}"
69 |                     )
70 | 
71 |                 logger.info(f"Query classified: needs_documents={result['needs_documents']}, "
72 |                            f"reasoning='{result['reasoning']}'")
73 | 
74 |                 return result
75 | 
76 |             except json.JSONDecodeError as e:
77 |                 logger.error(f"Failed to parse classification JSON: {response}")
78 |                 raise QueryClassificationError(f"Invalid JSON response from classification: {e}")
79 | 
80 |         except Exception as e:
81 |             logger.error(f"Query classification failed: {e}")
82 |             raise QueryClassificationError(f"Failed to classify query: {e}")
83 | 


--------------------------------------------------------------------------------
/docpixie/cli/styles.py:
--------------------------------------------------------------------------------
  1 | """
  2 | CSS styles for DocPixie CLI components
  3 | """
  4 | 
  5 | SETUP_SCREEN_CSS = """
  6 | SetupScreen {
  7 |     align: center middle;
  8 | }
  9 | 
 10 | #setup-container {
 11 |     width: 60;
 12 |     height: auto;
 13 |     padding: 1 2;
 14 |     background: #2d1f2d;
 15 |     border: solid #ff99cc;
 16 | }
 17 | 
 18 | #setup-container > .title {
 19 |     color: #ff99cc;
 20 | }
 21 | 
 22 | #setup-hint, .setup-text {
 23 |     color: #bda6b6;
 24 | }
 25 | 
 26 | #api-input {
 27 |     margin: 1 0;
 28 |     background: #2d1f2d;
 29 |     border: solid #ff99cc;
 30 | }
 31 | """
 32 | 
 33 | MAIN_APP_CSS = """
 34 | #chat-container {
 35 |     height: 100%;
 36 |     layout: vertical;
 37 |     background: #2d1f2d;
 38 |     padding: 0 1 1 1;
 39 | }
 40 | 
 41 | #chat-log {
 42 |     border: solid #4a3344;
 43 |     background: #2d1f2d;
 44 | }
 45 | 
 46 | #input-container {
 47 |     height: auto;
 48 |     min-height: 3;
 49 |     max-height: 12;
 50 |     padding: 0 0 0 1;
 51 |     margin: 0;
 52 |     background: #2d1f2d;
 53 |     border: solid #ff99cc;
 54 | }
 55 | 
 56 | #prompt-indicator {
 57 |     width: 2;
 58 |     color: #ff99cc;
 59 |     padding: 0;
 60 |     background: #2d1f2d;
 61 |     margin: 0;
 62 | }
 63 | 
 64 | #chat-input {
 65 |     background: #2d1f2d;
 66 |     min-height: 1;
 67 |     max-height: 10;
 68 |     height: auto;
 69 |     border: none;
 70 |     padding: 0;
 71 |     margin: 0;
 72 |     scrollbar-background: #2d1f2d;
 73 |     scrollbar-color: #ff99cc;
 74 |     scrollbar-size: 1 1;
 75 | }
 76 | 
 77 | #chat-input:focus {
 78 |     border: none;
 79 | }
 80 | 
 81 | #chat-input > .text-area--scrollbar {
 82 |     background: #2d1f2d;
 83 | }
 84 | 
 85 | #chat-input > ScrollableContainer {
 86 |     background: #2d1f2d;
 87 | }
 88 | 
 89 | ChatInput {
 90 |     background: #2d1f2d !important;
 91 | }
 92 | 
 93 | ChatInput > .text-area--scrollbar {
 94 |     background: #2d1f2d;
 95 | }
 96 | 
 97 | ChatInput .text-area--cursor-line {
 98 |     background: #2d1f2d;
 99 | }
100 | 
101 | #chat-input .text-area--document {
102 |     background: #2d1f2d;
103 | }
104 | 
105 | #chat-input .text-area--selection {
106 |     background: #4a3344;
107 | }
108 | 
109 | #chat-input .text-area--cursor {
110 |     background: #ff99cc;
111 | }
112 | 
113 | #input-hint {
114 |     height: 1;
115 |     color: #bda6b6;
116 |     background: #2d1f2d;
117 |     padding: 0 1;
118 |     margin: 0;
119 | }
120 | 
121 | #status-bar {
122 |     height: 1;
123 |     background: #2d1f2d;
124 |     color: $text;
125 |     padding: 0 1;
126 | }
127 | 
128 | .user-message {
129 |     color: $success;
130 |     margin: 0 0 1 0;
131 | }
132 | 
133 | .assistant-message {
134 |     color: $primary;
135 |     margin: 0 0 1 0;
136 | }
137 | 
138 | .task-update {
139 |     color: $warning;
140 |     margin: 0 0 1 0;
141 | }
142 | 
143 | .error-message {
144 |     color: $error;
145 |     margin: 0 0 1 0;
146 | }
147 | """
148 | 


--------------------------------------------------------------------------------
/docpixie/processors/base.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Base processor interface for document processing
 3 | """
 4 | 
 5 | from abc import ABC, abstractmethod
 6 | from typing import List, Optional
 7 | from pathlib import Path
 8 | import logging
 9 | 
10 | from ..models.document import Document, Page
11 | from ..core.config import DocPixieConfig
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | class BaseProcessor(ABC):
17 |     """Base class for document processors"""
18 |     
19 |     def __init__(self, config: DocPixieConfig):
20 |         self.config = config
21 |     
22 |     @abstractmethod
23 |     def supports(self, file_path: str) -> bool:
24 |         """Check if this processor supports the given file type"""
25 |         pass
26 |     
27 |     @abstractmethod
28 |     async def process(self, file_path: str, document_id: Optional[str] = None) -> Document:
29 |         """
30 |         Process a document file into pages
31 |         
32 |         Args:
33 |             file_path: Path to the document file
34 |             document_id: Optional custom document ID
35 |             
36 |         Returns:
37 |             Document with processed pages
38 |         """
39 |         pass
40 |     
41 |     def get_supported_extensions(self) -> List[str]:
42 |         """Get list of supported file extensions"""
43 |         return []
44 |     
45 |     def _create_document(
46 |         self, 
47 |         file_path: str, 
48 |         pages: List[Page], 
49 |         document_id: Optional[str] = None
50 |     ) -> Document:
51 |         """Create a Document object from processed pages"""
52 |         document_name = Path(file_path).stem
53 |         
54 |         return Document(
55 |             id=document_id or self._generate_document_id(file_path),
56 |             name=document_name,
57 |             pages=pages,
58 |             metadata={
59 |                 'original_file': file_path,
60 |                 'processor': self.__class__.__name__,
61 |                 'file_size': Path(file_path).stat().st_size if Path(file_path).exists() else 0
62 |             }
63 |         )
64 |     
65 |     def _generate_document_id(self, file_path: str) -> str:
66 |         """Generate a document ID from file path"""
67 |         import hashlib
68 |         return hashlib.md5(file_path.encode()).hexdigest()
69 |     
70 |     def _validate_file(self, file_path: str) -> None:
71 |         """Validate that file exists and is readable"""
72 |         path = Path(file_path)
73 |         if not path.exists():
74 |             raise FileNotFoundError(f"File not found: {file_path}")
75 |         if not path.is_file():
76 |             raise ValueError(f"Path is not a file: {file_path}")
77 |         if path.stat().st_size == 0:
78 |             raise ValueError(f"File is empty: {file_path}")
79 | 
80 | 
81 | class ProcessingError(Exception):
82 |     """Exception raised during document processing"""
83 |     
84 |     def __init__(self, message: str, file_path: str, page_number: Optional[int] = None):
85 |         self.file_path = file_path
86 |         self.page_number = page_number
87 |         super().__init__(message)


--------------------------------------------------------------------------------
/docpixie/ai/query_reformulator.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Query Reformulator - Creates optimized search queries from conversation context
 3 | """
 4 | 
 5 | import json
 6 | import logging
 7 | 
 8 | from ..providers.base import BaseProvider
 9 | from ..exceptions import QueryReformulationError
10 | from ..core.utils import sanitize_llm_json
11 | from .prompts import QUERY_REFORMULATION_PROMPT, SYSTEM_QUERY_REFORMULATOR
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | class QueryReformulator:
17 |     """
18 |     Reformulates queries by resolving references for better search
19 | 
20 |     Focuses on:
21 |     - Resolving pronouns and references (e.g., "it", "this", "that")
22 |     - Keeping queries concise and focused on current intent
23 |     - NOT combining multiple questions or intents
24 |     - Maintaining optimal length for search
25 |     """
26 | 
27 |     def __init__(self, provider: BaseProvider):
28 |         self.provider = provider
29 | 
30 |     async def reformulate_with_context(
31 |         self,
32 |         current_query: str,
33 |         conversation_context: str
34 |     ) -> str:
35 |         """
36 |         Reformulate query by resolving references while keeping it concise
37 | 
38 |         Args:
39 |             current_query: The current user query
40 |             conversation_context: Processed context from ContextProcessor
41 | 
42 |         Returns:
43 |             Reformulated query with resolved references
44 | 
45 |         Raises:
46 |             QueryReformulationError: If reformulation fails
47 |         """
48 |         try:
49 |             # Build prompt using existing template
50 |             prompt = QUERY_REFORMULATION_PROMPT.format(
51 |                 conversation_context=conversation_context,
52 |                 recent_topics="", # Let AI extract topics from context
53 |                 current_query=current_query
54 |             )
55 | 
56 |             messages_for_api = [
57 |                 {"role": "system", "content": SYSTEM_QUERY_REFORMULATOR},
58 |                 {"role": "user", "content": prompt}
59 |             ]
60 | 
61 |             response = await self.provider.process_text_messages(
62 |                 messages=messages_for_api,
63 |                 max_tokens=8192,
64 |                 temperature=0.2
65 |             )
66 | 
67 |             # Parse JSON response
68 |             result = None
69 |             try:
70 |                 result = json.loads(sanitize_llm_json(response))
71 |                 reformulated = result.get("reformulated_query", current_query)
72 | 
73 |                 logger.info(f"Query reformulation: '{current_query}' → '{reformulated}'")
74 |                 return reformulated
75 | 
76 |             except json.JSONDecodeError as e:
77 |                 logger.error(f"Failed to parse reformulation JSON: {response}")
78 |                 raise QueryReformulationError(f"Invalid JSON response from reformulation: {e}")
79 | 
80 |         except Exception as e:
81 |             logger.error(f"Query reformulation failed: {e}")
82 |             raise QueryReformulationError(f"Failed to reformulate query: {e}")
83 | 


--------------------------------------------------------------------------------
/docpixie/providers/base.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Base provider interface for vision AI operations
 3 | """
 4 | 
 5 | import base64
 6 | from abc import ABC, abstractmethod
 7 | from typing import List, Optional
 8 | from pathlib import Path
 9 | from dataclasses import dataclass
10 | import logging
11 | 
12 | from ..core.config import DocPixieConfig
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | 
17 | @dataclass
18 | class APIResult:
19 |     """Container for API response with optional cost tracking"""
20 |     text: str
21 |     cost: Optional[float] = None
22 | 
23 | 
24 | class BaseProvider(ABC):
25 |     """Base class for AI vision providers"""
26 | 
27 |     def __init__(self, config: DocPixieConfig):
28 |         self.config = config
29 |         self.last_api_cost: Optional[float] = None
30 |         self.total_cost: float = 0.0
31 | 
32 |     @abstractmethod
33 |     async def process_text_messages(
34 |         self,
35 |         messages: List[dict],
36 |         max_tokens: int = 512,
37 |         temperature: float = 0.3
38 |     ) -> str:
39 |         """Process text-only messages through the provider API"""
40 |         pass
41 | 
42 |     @abstractmethod
43 |     async def process_multimodal_messages(
44 |         self,
45 |         messages: List[dict],
46 |         max_tokens: int = 300,
47 |         temperature: float = 0.3
48 |     ) -> str:
49 |         """Process messages with text and images through the provider API"""
50 |         pass
51 | 
52 |     def get_last_cost(self) -> Optional[float]:
53 |         """Get the cost of the last API call (if available)"""
54 |         return self.last_api_cost
55 | 
56 |     def get_total_cost(self) -> float:
57 |         """Get the total accumulated cost"""
58 |         return self.total_cost
59 | 
60 |     def reset_cost_tracking(self):
61 |         """Reset cost tracking"""
62 |         self.last_api_cost = None
63 |         self.total_cost = 0.0
64 | 
65 |     # Helper methods for image handling (shared by all providers)
66 | 
67 |     def _encode_image(self, image_path: str) -> str:
68 |         """Encode image to base64 for API calls"""
69 |         try:
70 |             with open(image_path, 'rb') as image_file:
71 |                 encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
72 |                 return encoded_string
73 |         except Exception as e:
74 |             logger.error(f"Failed to encode image {image_path}: {e}")
75 |             raise
76 | 
77 |     def _create_image_data_url(self, image_path: str) -> str:
78 |         """Create data URL for image"""
79 |         encoded_image = self._encode_image(image_path)
80 |         return f"data:image/jpeg;base64,{encoded_image}"
81 | 
82 |     def _validate_image_path(self, image_path: str) -> bool:
83 |         """Validate image path exists and is readable"""
84 |         path = Path(image_path)
85 |         return path.exists() and path.is_file()
86 | 
87 | 
88 | class ProviderError(Exception):
89 |     """Exception raised by provider operations"""
90 | 
91 |     def __init__(self, message: str, provider: str, image_path: str = None):
92 |         self.provider = provider
93 |         self.image_path = image_path
94 |         super().__init__(message)
95 | 


--------------------------------------------------------------------------------
/docpixie/cli/task_display.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Task display management for DocPixie CLI
 3 | """
 4 | 
 5 | from typing import TYPE_CHECKING, Any
 6 | from .state_manager import AppStateManager
 7 | from .widgets import ChatArea
 8 | 
 9 | if TYPE_CHECKING:
10 |     from .app import DocPixieTUI
11 | 
12 | 
13 | class TaskDisplayManager:
14 |     """Manages task plan and progress display in the chat interface"""
15 |     
16 |     def __init__(self, app: 'DocPixieTUI', state_manager: AppStateManager):
17 |         self.app = app
18 |         self.state_manager = state_manager
19 |     
20 |     def display_task_update(self, event_type: str, data: Any) -> None:
21 |         """Display task plan updates"""
22 |         chat_log = self.app.query_one("#chat-log", ChatArea)
23 |         
24 |         if event_type == 'plan_created':
25 |             plan = data
26 |             self.state_manager.current_plan = plan
27 |             self.state_manager.completed_tasks.clear()
28 |             chat_log.hide_processing_status(mark_done=True, final_text="Planning")
29 |             chat_log.show_plan(plan)
30 |         
31 |         elif event_type == 'plan_updated':
32 |             plan = data
33 |             self.state_manager.current_plan = plan
34 |             chat_log.show_plan(plan, is_update=True, completed_tasks=list(self.state_manager.completed_tasks))
35 |         
36 |         elif event_type == 'task_started':
37 |             task = data['task']
38 |             task_name = task.name if hasattr(task, 'name') else str(task)
39 |             
40 |             doc_name = self._get_document_name_for_task(task)
41 |             chat_log.show_task_progress(task_name, None, doc_name)
42 |         
43 |         elif event_type == 'pages_selected':
44 |             task = data['task']
45 |             page_numbers = data.get('page_numbers', [])
46 |             task_name = task.name if hasattr(task, 'name') else str(task)
47 |             
48 |             doc_name = self._get_document_name_for_task(task)
49 |             pages_count = len(page_numbers) if isinstance(page_numbers, (list, tuple)) else 0
50 |             chat_log.show_task_progress(task_name, pages_count, doc_name)
51 |         
52 |         elif event_type == 'task_completed':
53 |             task = data['task']
54 |             task_name = task.name if hasattr(task, 'name') else str(task)
55 |             
56 |             chat_log.update_task_status(task_name, done=True)
57 |             self.state_manager.completed_tasks.add(task_name)
58 |             
59 |             if self.state_manager.current_plan:
60 |                 chat_log.show_plan(
61 |                     self.state_manager.current_plan, 
62 |                     is_update=True, 
63 |                     completed_tasks=list(self.state_manager.completed_tasks)
64 |                 )
65 |     
66 |     def _get_document_name_for_task(self, task) -> str:
67 |         """Extract document name from task, with fallback to 'document'"""
68 |         doc_name = 'document'
69 |         try:
70 |             task_doc_id = getattr(task, 'document', '')
71 |             if task_doc_id:
72 |                 doc = next(
73 |                     (d for d in self.state_manager.indexed_documents if d.id == task_doc_id), 
74 |                     None
75 |                 )
76 |                 if doc and getattr(doc, 'name', None):
77 |                     doc_name = doc.name
78 |         except Exception:
79 |             pass
80 |         return doc_name


--------------------------------------------------------------------------------
/docpixie/processors/factory.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Processor factory for selecting appropriate document processor
  3 | """
  4 | 
  5 | from typing import Optional, Dict, Type
  6 | from pathlib import Path
  7 | import logging
  8 | 
  9 | from .base import BaseProcessor
 10 | from .pdf import PDFProcessor
 11 | from .image import ImageProcessor
 12 | from ..core.config import DocPixieConfig
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class ProcessorFactory:
 18 |     """Factory for creating document processors"""
 19 |     
 20 |     def __init__(self, config: DocPixieConfig):
 21 |         self.config = config
 22 |         self._processors: Dict[str, Type[BaseProcessor]] = {
 23 |             'pdf': PDFProcessor,
 24 |             'image': ImageProcessor
 25 |         }
 26 |         
 27 |         # Map file extensions to processor types
 28 |         self._extension_map: Dict[str, str] = {}
 29 |         self._build_extension_map()
 30 |     
 31 |     def _build_extension_map(self):
 32 |         """Build mapping from file extensions to processor types"""
 33 |         # Create processor instances to get supported extensions
 34 |         for processor_type, processor_class in self._processors.items():
 35 |             processor = processor_class(self.config)
 36 |             for ext in processor.get_supported_extensions():
 37 |                 self._extension_map[ext.lower()] = processor_type
 38 |         
 39 |         logger.debug(f"Built extension map: {self._extension_map}")
 40 |     
 41 |     def get_processor(self, file_path: str) -> BaseProcessor:
 42 |         """
 43 |         Get appropriate processor for file
 44 |         
 45 |         Args:
 46 |             file_path: Path to file
 47 |             
 48 |         Returns:
 49 |             Processor instance
 50 |             
 51 |         Raises:
 52 |             ValueError: If file type is not supported
 53 |         """
 54 |         file_extension = Path(file_path).suffix.lower()
 55 |         
 56 |         if not file_extension:
 57 |             raise ValueError(f"File has no extension: {file_path}")
 58 |         
 59 |         processor_type = self._extension_map.get(file_extension)
 60 |         
 61 |         if not processor_type:
 62 |             supported_exts = list(self._extension_map.keys())
 63 |             raise ValueError(
 64 |                 f"Unsupported file type '{file_extension}'. "
 65 |                 f"Supported extensions: {supported_exts}"
 66 |             )
 67 |         
 68 |         processor_class = self._processors[processor_type]
 69 |         processor = processor_class(self.config)
 70 |         
 71 |         logger.debug(f"Selected {processor_class.__name__} for {file_path}")
 72 |         return processor
 73 |     
 74 |     def supports_file(self, file_path: str) -> bool:
 75 |         """Check if file type is supported"""
 76 |         file_extension = Path(file_path).suffix.lower()
 77 |         return file_extension in self._extension_map
 78 |     
 79 |     def get_supported_extensions(self) -> Dict[str, str]:
 80 |         """Get all supported extensions and their processor types"""
 81 |         return self._extension_map.copy()
 82 |     
 83 |     def register_processor(self, processor_type: str, processor_class: Type[BaseProcessor]):
 84 |         """
 85 |         Register a custom processor
 86 |         
 87 |         Args:
 88 |             processor_type: Unique identifier for processor
 89 |             processor_class: Processor class
 90 |         """
 91 |         self._processors[processor_type] = processor_class
 92 |         
 93 |         # Update extension mapping
 94 |         processor = processor_class(self.config)
 95 |         for ext in processor.get_supported_extensions():
 96 |             self._extension_map[ext.lower()] = processor_type
 97 |         
 98 |         logger.info(f"Registered custom processor: {processor_type}")
 99 |     
100 |     def list_processors(self) -> Dict[str, Type[BaseProcessor]]:
101 |         """Get all registered processors"""
102 |         return self._processors.copy()
103 |     
104 |     def create_processor(self, processor_type: str) -> Optional[BaseProcessor]:
105 |         """
106 |         Create processor by type
107 |         
108 |         Args:
109 |             processor_type: Type of processor to create
110 |             
111 |         Returns:
112 |             Processor instance or None if type not found
113 |         """
114 |         processor_class = self._processors.get(processor_type)
115 |         if processor_class:
116 |             return processor_class(self.config)
117 |         return None


--------------------------------------------------------------------------------
/docpixie/ai/summarizer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Page summarizer for generating document summaries
  3 | """
  4 | 
  5 | import asyncio
  6 | from typing import List, Optional, Dict, Any
  7 | import logging
  8 | 
  9 | from ..providers.base import BaseProvider
 10 | from ..models.document import Document, Page
 11 | from ..core.config import DocPixieConfig
 12 | from .prompts import SYSTEM_VISION_EXPERT
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class PageSummarizer:
 18 |     """Generates summaries for document pages using vision models"""
 19 | 
 20 |     def __init__(self, config: DocPixieConfig, provider: Optional[BaseProvider] = None):
 21 |         self.config = config
 22 |         if provider:
 23 |             self.provider = provider
 24 |         else:
 25 |             from ..providers.factory import create_provider
 26 |             self.provider = create_provider(config)
 27 | 
 28 | 
 29 | 
 30 |     async def summarize_document(self, document: Document) -> Document:
 31 |         """
 32 |         Generate document summary from all page images
 33 | 
 34 |         Args:
 35 |             document: Document to summarize
 36 | 
 37 |         Returns:
 38 |             Document with document summary
 39 |         """
 40 |         logger.info(f"Summarizing document: {document.name}")
 41 | 
 42 |         # Always generate document summary from all page images
 43 |         document_summary = await self._generate_document_summary(document.pages, document.name)
 44 | 
 45 |         # Create updated document
 46 |         updated_document = Document(
 47 |             id=document.id,
 48 |             name=document.name,
 49 |             pages=document.pages,
 50 |             summary=document_summary,
 51 |             status=document.status,
 52 |             metadata={
 53 |                 **document.metadata,
 54 |                 'document_summary_generated': document_summary is not None,
 55 |                 'summary_model': self.config.provider
 56 |             },
 57 |             created_at=document.created_at
 58 |         )
 59 | 
 60 |         logger.info(f"Completed document summarization: {document.name}")
 61 |         return updated_document
 62 | 
 63 |     async def _generate_document_summary(self, pages: List[Page], document_name: str) -> Optional[str]:
 64 |         """Generate overall document summary using all page images in a single vision call"""
 65 |         try:
 66 |             # Get all page image paths
 67 |             image_paths = [page.image_path for page in pages if page.image_path]
 68 | 
 69 |             if not image_paths:
 70 |                 logger.warning("No page images available for document summary")
 71 |                 return None
 72 | 
 73 |             # Build messages for document summary
 74 |             messages = [
 75 |                 {
 76 |                     "role": "system",
 77 |                     "content": "You are a document analysis expert. Analyze all pages of this document and create a comprehensive summary that captures the overall content, main themes, key information, and purpose of the entire document. Consider how all pages work together to form a complete document."
 78 |                 },
 79 |                 {
 80 |                     "role": "user",
 81 |                     "content": [
 82 |                         {
 83 |                             "type": "text",
 84 |                             "text": f"Please analyze this complete document titled '{document_name}' and provide a comprehensive summary. Look at all pages together to understand the document's overall structure, main themes, key information, and purpose."
 85 |                         }
 86 |                     ]
 87 |                 }
 88 |             ]
 89 | 
 90 |             # Add all page images to the user message
 91 |             for image_path in image_paths:
 92 |                 messages[1]["content"].append({
 93 |                     "type": "image_path",
 94 |                     "image_path": image_path,
 95 |                     "detail": self.config.vision_detail
 96 |                 })
 97 | 
 98 |             # Generate document summary using provider
 99 |             summary = await self.provider.process_multimodal_messages(
100 |                 messages=messages,
101 |                 max_tokens=400,
102 |                 temperature=0.3
103 |             )
104 | 
105 |             logger.debug(f"Generated document summary: {summary[:50]}...")
106 |             return summary
107 | 
108 |         except Exception as e:
109 |             logger.error(f"Failed to generate document summary: {e}")
110 |             return None
111 | 
112 | 
113 | 
114 |     def get_summary_stats(self) -> Dict[str, Any]:
115 |         """Get summarizer statistics"""
116 |         return {
117 |             'provider': self.config.provider,
118 |             'model': self.config.vision_model
119 |         }
120 | 


--------------------------------------------------------------------------------
/docpixie/models/agent.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Agent models and data structures for DocPixie RAG Agent
  3 | """
  4 | 
  5 | import uuid
  6 | from dataclasses import dataclass, field
  7 | from typing import List, Optional
  8 | from enum import Enum
  9 | from datetime import datetime
 10 | 
 11 | from .document import Page
 12 | 
 13 | 
 14 | class TaskStatus(str, Enum):
 15 |     """Agent task status"""
 16 |     PENDING = "pending"
 17 |     IN_PROGRESS = "in_progress"
 18 |     COMPLETED = "completed"
 19 |     CANCELLED = "cancelled"
 20 | 
 21 | 
 22 | @dataclass
 23 | class ConversationMessage:
 24 |     """Represents a single conversation message"""
 25 |     role: str  # "user" or "assistant"
 26 |     content: str
 27 |     timestamp: datetime = field(default_factory=datetime.now)
 28 |     cost: float = 0.0  # Cost for this message (agent pipeline total for assistant messages)
 29 | 
 30 |     def __post_init__(self):
 31 |         """Validate message data"""
 32 |         if self.role not in ["system", "user", "assistant"]:
 33 |             raise ValueError("Role must be 'user' or 'assistant'")
 34 |         if not self.content.strip():
 35 |             raise ValueError("Content cannot be empty")
 36 | 
 37 | 
 38 | @dataclass
 39 | class AgentTask:
 40 |     """Represents a single task in the agent's plan"""
 41 |     id: str = field(default_factory=lambda: str(uuid.uuid4()))
 42 |     name: str = ""
 43 |     description: str = ""
 44 |     status: TaskStatus = TaskStatus.PENDING
 45 |     document: str = ""  # Single document ID assigned to this task
 46 | 
 47 |     def __post_init__(self):
 48 |         """Validate task data"""
 49 |         if not self.name.strip():
 50 |             raise ValueError("Task name cannot be empty")
 51 |         if not self.description.strip():
 52 |             raise ValueError("Task description cannot be empty")
 53 | 
 54 | 
 55 | @dataclass
 56 | class TaskPlan:
 57 |     """Represents the agent's current task plan"""
 58 |     initial_query: str
 59 |     tasks: List[AgentTask] = field(default_factory=list)
 60 |     current_iteration: int = 0
 61 | 
 62 |     def get_next_pending_task(self) -> Optional[AgentTask]:
 63 |         """Get the next task that needs to be executed"""
 64 |         return next((task for task in self.tasks if task.status == TaskStatus.PENDING), None)
 65 | 
 66 |     def has_pending_tasks(self) -> bool:
 67 |         """Check if there are any pending tasks"""
 68 |         return any(task.status == TaskStatus.PENDING for task in self.tasks)
 69 | 
 70 |     def mark_task_completed(self, task_id: str) -> bool:
 71 |         """Mark a task as completed"""
 72 |         task = next((t for t in self.tasks if t.id == task_id), None)
 73 |         if task:
 74 |             task.status = TaskStatus.COMPLETED
 75 |             return True
 76 |         return False
 77 | 
 78 |     def add_task(self, task: AgentTask):
 79 |         """Add a new task to the plan"""
 80 |         self.tasks.append(task)
 81 | 
 82 |     def remove_task(self, task_id: str) -> bool:
 83 |         """Remove a task from the plan"""
 84 |         original_length = len(self.tasks)
 85 |         self.tasks = [t for t in self.tasks if t.id != task_id]
 86 |         return len(self.tasks) < original_length
 87 | 
 88 |     def get_completed_tasks(self) -> List[AgentTask]:
 89 |         """Get all completed tasks"""
 90 |         return [task for task in self.tasks if task.status == TaskStatus.COMPLETED]
 91 | 
 92 | 
 93 | @dataclass
 94 | class TaskResult:
 95 |     """Represents the result of executing a single task"""
 96 |     task: AgentTask
 97 |     selected_pages: List[Page]
 98 |     analysis: str
 99 |     pages_analyzed: int = 0
100 | 
101 |     def __post_init__(self):
102 |         """Calculate pages analyzed"""
103 |         self.pages_analyzed = len(self.selected_pages)
104 | 
105 | 
106 | @dataclass
107 | class AgentQueryResult:
108 |     """Represents the final result of processing a user query through the agent pipeline"""
109 |     query: str
110 |     answer: str
111 |     selected_pages: List[Page]
112 |     task_results: List[TaskResult] = field(default_factory=list)
113 |     total_iterations: int = 0
114 |     processing_time_seconds: float = 0.0
115 |     total_cost: float = 0.0  # Total cost of all API calls for this query
116 | 
117 |     def get_unique_pages(self) -> List[Page]:
118 |         """Get unique pages from all task results"""
119 |         seen_paths = set()
120 |         unique_pages = []
121 | 
122 |         for page in self.selected_pages:
123 |             if page.image_path not in seen_paths:
124 |                 seen_paths.add(page.image_path)
125 |                 unique_pages.append(page)
126 | 
127 |         return unique_pages
128 | 
129 |     def get_total_pages_analyzed(self) -> int:
130 |         """Get total number of pages analyzed across all tasks"""
131 |         return sum(result.pages_analyzed for result in self.task_results)
132 | 


--------------------------------------------------------------------------------
/docpixie/cli/commands.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Command handling for DocPixie CLI
  3 | """
  4 | 
  5 | from typing import TYPE_CHECKING, Optional
  6 | from pathlib import Path
  7 | from docpixie import DocPixie
  8 | from .state_manager import AppStateManager
  9 | from .widgets import (
 10 |     ConversationManagerDialog, ModelSelectorDialog, DocumentManagerDialog,
 11 |     ChatArea
 12 | )
 13 | 
 14 | if TYPE_CHECKING:
 15 |     from .app import DocPixieTUI
 16 | 
 17 | 
 18 | class CommandHandler:
 19 |     """Handles all slash commands for the CLI application"""
 20 |     
 21 |     def __init__(self, app: 'DocPixieTUI', state_manager: AppStateManager):
 22 |         self.app = app
 23 |         self.state_manager = state_manager
 24 |     
 25 |     async def handle_command(self, command: str) -> None:
 26 |         """Handle slash commands"""
 27 |         chat_log = self.app.query_one("#chat-log", ChatArea)
 28 |         
 29 |         if command == "/exit":
 30 |             self.state_manager.save_current_conversation()
 31 |             self.app.exit()
 32 |         
 33 |         elif command == "/new":
 34 |             await self._handle_new_command(chat_log)
 35 |         
 36 |         elif command == "/clear":
 37 |             self._handle_clear_command(chat_log)
 38 |         
 39 |         elif command == "/save":
 40 |             self._handle_save_command(chat_log)
 41 |         
 42 |         elif command == "/conversations":
 43 |             await self._handle_conversations_command()
 44 |         
 45 |         elif command == "/model":
 46 |             await self._handle_model_command()
 47 |         
 48 |         elif command == "/documents":
 49 |             await self._handle_documents_command()
 50 |         
 51 |         elif command == "/help":
 52 |             self._handle_help_command(chat_log)
 53 |         
 54 |         else:
 55 |             chat_log.write(f"[warning]Unknown command: {command}[/warning]\n")
 56 |             chat_log.write("Type /help for available commands\n\n")
 57 |     
 58 |     async def _handle_new_command(self, chat_log: ChatArea) -> None:
 59 |         """Handle /new command"""
 60 |         self.state_manager.save_current_conversation()
 61 |         self.state_manager.create_new_conversation()
 62 |         self.state_manager.clear_task_plan()
 63 |         
 64 |         chat_log.clear()
 65 |         self.app.show_welcome_message()
 66 |         chat_log.write("[green bold]●[/green bold] Started new conversation\n\n")
 67 |         
 68 |         status_label = self.app.query_one("#status-label")
 69 |         status_label.update(self.state_manager.get_status_text())
 70 |     
 71 |     def _handle_clear_command(self, chat_log: ChatArea) -> None:
 72 |         """Handle /clear command"""
 73 |         self.state_manager.clear_task_plan()
 74 |         chat_log.clear()
 75 |         self.app.show_welcome_message()
 76 |     
 77 |     def _handle_save_command(self, chat_log: ChatArea) -> None:
 78 |         """Handle /save command"""
 79 |         if self.state_manager.current_conversation_id and self.state_manager.conversation_history:
 80 |             self.state_manager.save_current_conversation()
 81 |             chat_log.write("[green bold]●[/green bold] Conversation saved!\n\n")
 82 |         else:
 83 |             chat_log.write("[warning]No conversation to save[/warning]\n\n")
 84 |     
 85 |     async def _handle_conversations_command(self) -> None:
 86 |         """Handle /conversations command"""
 87 |         await self.app.push_screen(ConversationManagerDialog(
 88 |             self.state_manager.current_conversation_id
 89 |         ))
 90 |     
 91 |     async def _handle_model_command(self) -> None:
 92 |         """Handle /model command"""
 93 |         await self.app.push_screen(ModelSelectorDialog())
 94 |     
 95 |     async def _handle_documents_command(self) -> None:
 96 |         """Handle /documents command"""
 97 |         await self.app.push_screen(DocumentManagerDialog(
 98 |             self.state_manager.documents_folder,
 99 |             self.app.docpixie
100 |         ))
101 |     
102 |     def _handle_help_command(self, chat_log: ChatArea) -> None:
103 |         """Handle /help command"""
104 |         chat_log.write("\n[bold]Available Commands:[/bold]\n")
105 |         chat_log.write("  /new          - Start a new conversation (Ctrl+N)\n")
106 |         chat_log.write("  /conversations - Switch between conversations (Ctrl+L)\n")
107 |         chat_log.write("  /save         - Save current conversation\n")
108 |         chat_log.write("  /clear        - Clear the chat display\n")
109 |         chat_log.write("  /model        - Configure AI models (Ctrl+O)\n")
110 |         chat_log.write("  /documents    - Manage and index documents (Ctrl+D)\n")
111 |         chat_log.write("  /help         - Show this help message\n")
112 |         chat_log.write("  /exit         - Exit the program (Ctrl+Q)\n\n")
113 |         chat_log.write("[dim]Press Ctrl+/ to open command palette[/dim]\n\n")


--------------------------------------------------------------------------------
/docpixie/providers/openai.py:
--------------------------------------------------------------------------------
  1 | """
  2 | OpenAI GPT-4V provider for raw API operations
  3 | """
  4 | 
  5 | import logging
  6 | from typing import List, Dict, Any
  7 | 
  8 | from .base import BaseProvider, ProviderError
  9 | from ..core.config import DocPixieConfig
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class OpenAIProvider(BaseProvider):
 15 |     """OpenAI GPT-4V provider for raw API operations"""
 16 |     
 17 |     def __init__(self, config: DocPixieConfig):
 18 |         super().__init__(config)
 19 |         
 20 |         if not config.openai_api_key:
 21 |             raise ValueError("OpenAI API key is required")
 22 |         
 23 |         # Import here to make it optional dependency
 24 |         try:
 25 |             from openai import AsyncOpenAI
 26 |             self.client = AsyncOpenAI(api_key=config.openai_api_key)
 27 |         except ImportError:
 28 |             raise ImportError("OpenAI library not found. Install with: pip install openai")
 29 |         
 30 |         self.model = config.vision_model
 31 |     
 32 |     async def process_text_messages(
 33 |         self, 
 34 |         messages: List[Dict[str, Any]], 
 35 |         max_tokens: int = 300, 
 36 |         temperature: float = 0.3
 37 |     ) -> str:
 38 |         """Process text-only messages through OpenAI API"""
 39 |         try:
 40 |             response = await self.client.chat.completions.create(
 41 |                 model=self.config.model,
 42 |                 messages=messages,
 43 |                 max_tokens=max_tokens,
 44 |                 temperature=temperature
 45 |             )
 46 |             
 47 |             result = response.choices[0].message.content.strip()
 48 |             logger.debug(f"OpenAI text response: {result[:50]}...")
 49 |             
 50 |             return result
 51 |             
 52 |         except Exception as e:
 53 |             logger.error(f"OpenAI text processing failed: {e}")
 54 |             raise ProviderError(f"Text processing failed: {e}", "openai")
 55 |     
 56 |     async def process_multimodal_messages(
 57 |         self, 
 58 |         messages: List[Dict[str, Any]], 
 59 |         max_tokens: int = 300, 
 60 |         temperature: float = 0.3
 61 |     ) -> str:
 62 |         """Process multimodal messages (text + images) through OpenAI Vision API"""
 63 |         try:
 64 |             # Process messages to convert image paths to data URLs
 65 |             processed_messages = self._prepare_openai_messages(messages)
 66 |             
 67 |             response = await self.client.chat.completions.create(
 68 |                 model=self.model,  # Use vision model
 69 |                 messages=processed_messages,
 70 |                 max_tokens=max_tokens,
 71 |                 temperature=temperature
 72 |             )
 73 |             
 74 |             result = response.choices[0].message.content.strip()
 75 |             logger.debug(f"OpenAI multimodal response: {result[:50]}...")
 76 |             
 77 |             return result
 78 |             
 79 |         except Exception as e:
 80 |             logger.error(f"OpenAI multimodal processing failed: {e}")
 81 |             raise ProviderError(f"Multimodal processing failed: {e}", "openai")
 82 |     
 83 |     def _prepare_openai_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 84 |         """Prepare messages for OpenAI API by converting image paths to data URLs"""
 85 |         processed_messages = []
 86 |         
 87 |         for message in messages:
 88 |             if message["role"] == "system":
 89 |                 # System messages are text-only
 90 |                 processed_messages.append(message)
 91 |             elif message["role"] == "user" and isinstance(message["content"], list):
 92 |                 # User message with multimodal content
 93 |                 processed_content = []
 94 |                 
 95 |                 for content_item in message["content"]:
 96 |                     if content_item["type"] == "text":
 97 |                         processed_content.append(content_item)
 98 |                     elif content_item["type"] == "image_path":
 99 |                         # Convert image path to OpenAI format
100 |                         image_path = content_item["image_path"]
101 |                         if self._validate_image_path(image_path):
102 |                             image_data_url = self._create_image_data_url(image_path)
103 |                             processed_content.append({
104 |                                 "type": "image_url",
105 |                                 "image_url": {
106 |                                     "url": image_data_url,
107 |                                     "detail": content_item.get("detail", "high")
108 |                                 }
109 |                             })
110 |                         else:
111 |                             logger.warning(f"Skipping invalid image path: {image_path}")
112 |                     else:
113 |                         # Pass through other content types
114 |                         processed_content.append(content_item)
115 |                 
116 |                 processed_messages.append({
117 |                     "role": message["role"],
118 |                     "content": processed_content
119 |                 })
120 |             else:
121 |                 # Regular text message
122 |                 processed_messages.append(message)
123 |         
124 |         return processed_messages


--------------------------------------------------------------------------------
/docpixie/cli/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Global configuration manager for DocPixie CLI
  3 | Handles API keys, model preferences, and user settings
  4 | """
  5 | 
  6 | import json
  7 | import os
  8 | from pathlib import Path
  9 | from typing import Optional, Dict, Any
 10 | from dataclasses import dataclass, asdict, field
 11 | 
 12 | 
 13 | PLANNING_MODELS = [
 14 |     "anthropic/claude-opus-4.1",
 15 |     "anthropic/claude-sonnet-4",
 16 |     "anthropic/claude-3.5-haiku",
 17 |     "google/gemini-2.5-flash",
 18 |     "google/gemini-2.5-pro",
 19 |     "openai/gpt-5",
 20 |     "openai/gpt-5-mini",
 21 |     "openai/gpt-4.1",
 22 |     "openai/gpt-4.1-mini",
 23 |     "qwen/qwen-max",
 24 |     "qwen/qwen-plus",
 25 |     "nousresearch/hermes-4-70b",
 26 |     "deepseek/deepseek-chat-v3.1",
 27 |     "mistralai/mistral-medium-3.1",
 28 | ]
 29 | 
 30 | VISION_MODELS = [
 31 |     "google/gemini-2.5-pro",
 32 |     "google/gemini-2.5-flash",
 33 |     "google/gemini-2.5-flash-lite",
 34 |     "openai/gpt-4.1",
 35 |     "openai/gpt-4.1-mini",
 36 |     "openai/gpt-4.1-nano",
 37 |     "anthropic/claude-sonnet-4",
 38 | ]
 39 | 
 40 | 
 41 | @dataclass
 42 | class CLIConfig:
 43 |     """CLI configuration stored globally in ~/.docpixie/"""
 44 | 
 45 |     openrouter_api_key: Optional[str] = None
 46 | 
 47 |     text_model: str = "qwen/qwen-plus"
 48 |     vision_model: str = "google/gemini-2.5-flash"
 49 | 
 50 |     last_conversation_id: Optional[str] = None
 51 |     theme: str = "default"
 52 | 
 53 |     auto_index_on_startup: bool = True
 54 |     max_conversation_history: int = 20
 55 | 
 56 |     def to_dict(self) -> Dict[str, Any]:
 57 |         """Convert config to dictionary for JSON serialization"""
 58 |         return asdict(self)
 59 | 
 60 |     @classmethod
 61 |     def from_dict(cls, data: Dict[str, Any]) -> 'CLIConfig':
 62 |         """Create config from dictionary"""
 63 |         return cls(**data)
 64 | 
 65 | 
 66 | class ConfigManager:
 67 |     """Manages global DocPixie CLI configuration"""
 68 | 
 69 |     def __init__(self):
 70 |         """Initialize config manager with global config directory"""
 71 |         self.config_dir = Path.home() / ".docpixie"
 72 |         self.config_file = self.config_dir / "config.json"
 73 |         self.conversations_dir = self.config_dir / "conversations"
 74 | 
 75 |         self.config_dir.mkdir(exist_ok=True)
 76 |         self.conversations_dir.mkdir(exist_ok=True)
 77 | 
 78 |         self.config = self.load_config()
 79 | 
 80 |     def load_config(self) -> CLIConfig:
 81 |         """Load configuration from file or create default"""
 82 |         if self.config_file.exists():
 83 |             try:
 84 |                 with open(self.config_file, 'r') as f:
 85 |                     data = json.load(f)
 86 |                     return CLIConfig.from_dict(data)
 87 |             except Exception as e:
 88 |                 print(f"Warning: Failed to load config: {e}")
 89 |                 return CLIConfig()
 90 |         else:
 91 |             env_key = os.getenv("OPENROUTER_API_KEY")
 92 |             config = CLIConfig()
 93 |             if env_key:
 94 |                 config.openrouter_api_key = env_key
 95 |             return config
 96 | 
 97 |     def save_config(self):
 98 |         """Save current configuration to file"""
 99 |         try:
100 |             with open(self.config_file, 'w') as f:
101 |                 json.dump(self.config.to_dict(), f, indent=2)
102 |         except Exception as e:
103 |             print(f"Error saving config: {e}")
104 | 
105 |     def get_api_key(self) -> Optional[str]:
106 |         """Get OpenRouter API key from config or environment"""
107 |         if self.config.openrouter_api_key:
108 |             return self.config.openrouter_api_key
109 |         return os.getenv("OPENROUTER_API_KEY")
110 | 
111 |     def set_api_key(self, api_key: str):
112 |         """Set and save OpenRouter API key"""
113 |         self.config.openrouter_api_key = api_key
114 |         self.save_config()
115 | 
116 |     def has_api_key(self) -> bool:
117 |         """Check if API key is configured"""
118 |         return bool(self.get_api_key())
119 | 
120 |     def get_models(self) -> tuple[str, str]:
121 |         """Get configured models (text, vision)"""
122 |         return self.config.text_model, self.config.vision_model
123 | 
124 |     def set_models(self, text_model: str = None, vision_model: str = None):
125 |         """Update model configuration"""
126 |         if text_model:
127 |             self.config.text_model = text_model
128 |         if vision_model:
129 |             self.config.vision_model = vision_model
130 |         self.save_config()
131 | 
132 |     def get_conversation_path(self, conversation_id: str) -> Path:
133 |         """Get path for a specific conversation file"""
134 |         return self.conversations_dir / f"{conversation_id}.json"
135 | 
136 |     def get_all_conversations(self) -> list[Path]:
137 |         """Get all conversation files"""
138 |         return list(self.conversations_dir.glob("*.json"))
139 | 
140 |     def validate_api_key(self, api_key: str) -> bool:
141 |         """
142 |         Validate API key by making a test request
143 |         Returns True if valid, False otherwise
144 |         """
145 |         try:
146 |             if api_key and len(api_key) > 10:
147 |                 return True
148 |             return False
149 |         except Exception:
150 |             return False
151 | 
152 | 
153 | _config_manager = None
154 | 
155 | 
156 | def get_config_manager() -> ConfigManager:
157 |     """Get or create the global config manager instance"""
158 |     global _config_manager
159 |     if _config_manager is None:
160 |         _config_manager = ConfigManager()
161 |     return _config_manager
162 | 


--------------------------------------------------------------------------------
/docpixie/models/document.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Document models and data structures for DocPixie
  3 | Simplified version of schemas from production DocPixie
  4 | """
  5 | 
  6 | from dataclasses import dataclass, field
  7 | from typing import List, Dict, Any, Optional
  8 | from enum import Enum
  9 | from pathlib import Path
 10 | import uuid
 11 | from datetime import datetime
 12 | 
 13 | 
 14 | class QueryMode(str, Enum):
 15 |     """Query processing modes"""
 16 |     AUTO = "auto"    # Standard adaptive processing
 17 | 
 18 | 
 19 | class DocumentStatus(str, Enum):
 20 |     """Document processing status"""
 21 |     PENDING = "pending"
 22 |     PROCESSING = "processing"
 23 |     COMPLETED = "completed"
 24 |     FAILED = "failed"
 25 | 
 26 | 
 27 | @dataclass
 28 | class Page:
 29 |     """Represents a single document page"""
 30 |     page_number: int
 31 |     image_path: str
 32 |     metadata: Dict[str, Any] = field(default_factory=dict)
 33 |     document_name: Optional[str] = None
 34 |     document_id: Optional[str] = None
 35 |     
 36 |     def __post_init__(self):
 37 |         """Validate page data"""
 38 |         if self.page_number <= 0:
 39 |             raise ValueError("Page number must be positive")
 40 |         if not self.image_path:
 41 |             raise ValueError("Image path is required")
 42 | 
 43 | 
 44 | @dataclass 
 45 | class Document:
 46 |     """Represents a processed document with pages"""
 47 |     id: str
 48 |     name: str
 49 |     pages: List[Page]
 50 |     summary: Optional[str] = None
 51 |     status: DocumentStatus = DocumentStatus.PENDING
 52 |     metadata: Dict[str, Any] = field(default_factory=dict)
 53 |     created_at: datetime = field(default_factory=datetime.now)
 54 |     
 55 |     def __post_init__(self):
 56 |         """Generate ID if not provided and validate data"""
 57 |         if not self.id:
 58 |             self.id = str(uuid.uuid4())
 59 |         if not self.name:
 60 |             raise ValueError("Document name is required")
 61 |         if not isinstance(self.pages, list):
 62 |             raise ValueError("Pages must be a list")
 63 |     
 64 |     @property
 65 |     def page_count(self) -> int:
 66 |         """Get total number of pages"""
 67 |         return len(self.pages)
 68 |     
 69 |     
 70 |     def get_page(self, page_number: int) -> Optional[Page]:
 71 |         """Get specific page by number"""
 72 |         for page in self.pages:
 73 |             if page.page_number == page_number:
 74 |                 return page
 75 |         return None
 76 |     
 77 |     def get_pages_range(self, start: int, end: int) -> List[Page]:
 78 |         """Get pages in a range"""
 79 |         return [p for p in self.pages if start <= p.page_number <= end]
 80 | 
 81 | 
 82 | @dataclass
 83 | class QueryResult:
 84 |     """Result of a RAG query"""
 85 |     query: str
 86 |     answer: str
 87 |     selected_pages: List[Page]
 88 |     mode: QueryMode
 89 |     confidence: float = 0.0
 90 |     processing_time: float = 0.0
 91 |     metadata: Dict[str, Any] = field(default_factory=dict)
 92 |     total_cost: float = 0.0  # Total cost of all API calls for this query
 93 |     
 94 |     def __post_init__(self):
 95 |         """Validate result data"""
 96 |         if not self.query:
 97 |             raise ValueError("Query is required")
 98 |         if not self.answer:
 99 |             raise ValueError("Answer is required")
100 |         if self.confidence < 0 or self.confidence > 1:
101 |             raise ValueError("Confidence must be between 0 and 1")
102 |     
103 |     @property
104 |     def page_count(self) -> int:
105 |         """Number of pages used for the answer"""
106 |         return len(self.selected_pages)
107 |     
108 |     @property
109 |     def page_numbers(self) -> List[int]:
110 |         """Page numbers used for the answer"""
111 |         return [p.page_number for p in self.selected_pages]
112 |     
113 |     def get_pages_by_document(self) -> Dict[str, List[int]]:
114 |         """Get pages grouped by document name"""
115 |         pages_by_doc = {}
116 |         for page in self.selected_pages:
117 |             doc_name = page.document_name or "Unknown Document"
118 |             if doc_name not in pages_by_doc:
119 |                 pages_by_doc[doc_name] = []
120 |             pages_by_doc[doc_name].append(page.page_number)
121 |         
122 |         # Sort page numbers within each document
123 |         for doc_name in pages_by_doc:
124 |             pages_by_doc[doc_name].sort()
125 |         
126 |         return pages_by_doc
127 | 
128 | 
129 | @dataclass
130 | class DocumentProcessRequest:
131 |     """Request to process a document"""
132 |     file_path: str
133 |     document_id: Optional[str] = None
134 |     document_name: Optional[str] = None
135 |     
136 |     def __post_init__(self):
137 |         """Validate and set defaults"""
138 |         if not self.file_path or not Path(self.file_path).exists():
139 |             raise FileNotFoundError(f"File not found: {self.file_path}")
140 |         
141 |         if not self.document_name:
142 |             self.document_name = Path(self.file_path).stem
143 |         
144 |         if not self.document_id:
145 |             self.document_id = str(uuid.uuid4())
146 | 
147 | 
148 | @dataclass
149 | class QueryRequest:
150 |     """Request to query documents"""
151 |     query: str
152 |     mode: QueryMode = QueryMode.AUTO
153 |     document_ids: Optional[List[str]] = None
154 |     max_pages: Optional[int] = None
155 |     stream: bool = False
156 |     
157 |     def __post_init__(self):
158 |         """Validate query request"""
159 |         if not self.query.strip():
160 |             raise ValueError("Query cannot be empty")
161 |         
162 |         # Set default max_pages
163 |         if self.max_pages is None:
164 |             self.max_pages = 15  # Use standard page limit


--------------------------------------------------------------------------------
/docpixie/ai/page_selector.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Vision-based page selector for DocPixie RAG Agent
  3 | Selects relevant pages by analyzing page images directly with vision models
  4 | """
  5 | 
  6 | import json
  7 | import logging
  8 | from typing import List, Dict, Any, Optional
  9 | 
 10 | from ..models.document import Page
 11 | from ..providers.base import BaseProvider
 12 | from ..core.config import DocPixieConfig
 13 | from ..exceptions import PageSelectionError
 14 | from ..core.utils import sanitize_llm_json
 15 | from .prompts import SYSTEM_PAGE_SELECTOR, USER_VISION_ANALYSIS, VISION_PAGE_SELECTION_PROMPT
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | class VisionPageSelector:
 21 |     """
 22 |     Selects relevant document pages using vision model analysis
 23 |     Key feature: Analyzes actual page IMAGES, not text summaries
 24 |     """
 25 | 
 26 |     def __init__(self, provider: BaseProvider, config: DocPixieConfig):
 27 |         self.provider = provider
 28 |         self.config = config
 29 | 
 30 |     async def select_pages_for_task(
 31 |         self,
 32 |         query: str,
 33 |         query_description: str,
 34 |         task_pages: List[Page]
 35 |     ) -> List[Page]:
 36 |         """
 37 |         Select most relevant pages by analyzing page IMAGES with vision model
 38 | 
 39 |         Args:
 40 |             query: The question/task to find pages for
 41 |             task_pages: Pages from the task's assigned document
 42 | 
 43 |         Returns:
 44 |             List of selected pages, ordered by relevance
 45 | 
 46 |         Raises:
 47 |             PageSelectionError: If page selection fails
 48 |         """
 49 |         if not task_pages:
 50 |             logger.warning("No pages provided for selection")
 51 |             return []
 52 | 
 53 |         try:
 54 |             logger.info(f"Selecting most relevant pages from {len(task_pages)} task pages")
 55 | 
 56 |             # Build vision-based selection message
 57 |             messages = self._build_vision_selection_messages(query, query_description, task_pages)
 58 | 
 59 |             # Use vision model to analyze page images and select best ones
 60 |             result = await self.provider.process_multimodal_messages(
 61 |                 messages=messages,
 62 |                 max_tokens=200,
 63 |                 temperature=0.1  # Low temperature for consistent selection
 64 |             )
 65 | 
 66 |             # Parse selection result
 67 |             selected_pages = self._parse_page_selection(result, task_pages)
 68 | 
 69 |             logger.info(f"Successfully selected {len(selected_pages)} pages")
 70 |             return selected_pages
 71 | 
 72 |         except Exception as e:
 73 |             logger.error(f"Vision page selection failed: {e}")
 74 |             raise PageSelectionError(f"Failed to select pages for task: {e}")
 75 | 
 76 |     def _build_vision_selection_messages(
 77 |         self,
 78 |         query: str,
 79 |         query_description: str,
 80 |         all_pages: List[Page]
 81 |     ) -> List[Dict[str, Any]]:
 82 |         """
 83 |         Build multimodal message with all page images for vision analysis
 84 |         This is the key method that makes our system vision-first
 85 |         """
 86 |         messages = [
 87 |             {
 88 |                 "role": "system",
 89 |                 "content": SYSTEM_PAGE_SELECTOR
 90 |             }
 91 |         ]
 92 |         user_content = []
 93 |         # Add ALL page images to the message for vision analysis
 94 |         for i, page in enumerate(all_pages, 1):
 95 |             user_content.extend([
 96 |                 {
 97 |                     "type": "image_path",
 98 |                     "image_path": page.image_path,
 99 |                     "detail": self.config.vision_detail
100 |                 },
101 |                 {
102 |                     "type": "text",
103 |                     "text": f"[Page {i}]"
104 |                 }
105 |             ])
106 | 
107 |         user_content.append(
108 |             {
109 |                 "type": "text",
110 |                 "text": VISION_PAGE_SELECTION_PROMPT.format(query=query, query_description=query_description)
111 |             }
112 |         )
113 | 
114 |         messages.append(
115 |             {
116 |                 "role": "user",
117 |                 "content": user_content
118 |             }
119 |         )
120 | 
121 |         return messages
122 | 
123 |     def _parse_page_selection(
124 |         self,
125 |         result: str,
126 |         all_pages: List[Page]
127 |     ) -> List[Page]:
128 |         """
129 |         Parse the vision model's page selection response
130 |         """
131 |         try:
132 |             # Parse JSON response
133 |             selection_data = json.loads(sanitize_llm_json(result))
134 |             selected_indices = selection_data.get("selected_pages", [])
135 | 
136 |             selected_pages = []
137 |             for idx in selected_indices:
138 |                 if isinstance(idx, int) and 1 <= idx <= len(all_pages):
139 |                     page = all_pages[idx - 1]
140 |                     selected_pages.append(page)
141 |                     logger.debug(f"Selected page {idx}: {page.image_path}")
142 | 
143 |             # If no valid pages were selected, return empty list and raise error
144 |             if not selected_pages:
145 |                 logger.error("No valid pages selected by vision model")
146 |                 raise PageSelectionError("Vision model failed to select any valid pages")
147 | 
148 |             return selected_pages
149 | 
150 |         except (json.JSONDecodeError, KeyError, TypeError) as e:
151 |             logger.error(f"Failed to parse page selection JSON: {e}")
152 |             logger.debug(f"Raw vision model response: {result}")
153 | 
154 |             raise PageSelectionError(f"Failed to parse vision model page selection response: {e}, raw response: \n{result}")
155 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DocPixie
  2 | 
  3 | A lightweight multimodal RAG (Retrieval-Augmented Generation) library that uses vision AI instead of traditional embeddings or vector databases. DocPixie processes documents as images and uses vision language models for both document understanding and intelligent page selection.
  4 | 
  5 | ![DocPixie Demo](screenshot.png)
  6 | 
  7 | ## 🌟 Features
  8 | 
  9 | - **Vision-First Approach**: Documents processed as images using PyMuPDF, preserving visual information and formatting
 10 | - **No Vector Database Required**: Eliminates the complexity of embeddings and vector storage
 11 | - **Adaptive RAG Agent**: Single intelligent agent that dynamically plans tasks and selects relevant pages
 12 | - **Multi-Provider Support**: Works with OpenAI GPT-4V, Anthropic Claude, and OpenRouter
 13 | - **Modern CLI Interface**: Beautiful terminal UI built with Textual
 14 | - **Conversation Aware**: Maintains context across multiple queries
 15 | - **Pluggable Storage**: Local filesystem or in-memory storage backends
 16 | 
 17 | ## 🚀 Quick Start
 18 | 
 19 | ### Installation
 20 | 
 21 | ```bash
 22 | # use uv (recommended)
 23 | uv pip install docpixie
 24 | 
 25 | # or pip
 26 | pip install docpixie
 27 | ```
 28 | 
 29 | Try the CLI:
 30 | ```bash
 31 | docpixie
 32 | ```
 33 | 
 34 | ### Basic Usage
 35 | 
 36 | ```python
 37 | import asyncio
 38 | from docpixie import DocPixie
 39 | 
 40 | async def main():
 41 |     # Initialize with your API key
 42 |     docpixie = DocPixie()
 43 | 
 44 |     # Add a document
 45 |     document = await docpixie.add_document("path/to/your/document.pdf")
 46 |     print(f"Added document: {document.name}")
 47 | 
 48 |     # Query the document
 49 |     result = await docpixie.query("What are the key findings?")
 50 |     print(f"Answer: {result.answer}")
 51 |     print(f"Pages used: {result.page_numbers}")
 52 | 
 53 | # Run the example
 54 | asyncio.run(main())
 55 | ```
 56 | 
 57 | ### Using the CLI
 58 | 
 59 | Start the interactive terminal interface:
 60 | 
 61 | ```bash
 62 | docpixie
 63 | ```
 64 | 
 65 | The CLI provides:
 66 | - Interactive document chat
 67 | - Document management
 68 | - Conversation history
 69 | - Model configuration
 70 | - Command palette with shortcuts
 71 | 
 72 | ## 🛠️ Configuration
 73 | 
 74 | DocPixie uses environment variables for API key configuration:
 75 | 
 76 | ```bash
 77 | # For OpenAI (default)
 78 | export OPENAI_API_KEY="your-openai-key"
 79 | 
 80 | # For Anthropic Claude
 81 | export ANTHROPIC_API_KEY="your-anthropic-key"
 82 | 
 83 | # For OpenRouter (supports many models)
 84 | export OPENROUTER_API_KEY="your-openrouter-key"
 85 | ```
 86 | 
 87 | You can also specify the provider:
 88 | 
 89 | ```python
 90 | from docpixie import DocPixie, DocPixieConfig
 91 | 
 92 | config = DocPixieConfig(
 93 |     provider="anthropic",  # or "openai", "openrouter"
 94 |     model="claude-3-opus-20240229",
 95 |     vision_model="claude-3-opus-20240229"
 96 | )
 97 | 
 98 | docpixie = DocPixie(config=config)
 99 | ```
100 | 
101 | ## 📚 Supported File Types
102 | 
103 | - **PDF files** (.pdf) - Full multipage support
104 | - More file types coming soon
105 | 
106 | ## 🏗️ Architecture
107 | 
108 | DocPixie uses a clean, modular architecture:
109 | 
110 | ```
111 | 📁 Core Components
112 | ├── 🧠 Adaptive RAG Agent - Dynamic task planning and execution
113 | ├── 👁️  Vision Processing - Document-to-image conversion via PyMuPDF
114 | ├── 🔌 Provider System - Unified interface for AI providers
115 | ├── 💾 Storage Backends - Local filesystem or in-memory storage
116 | └── 🖥️  CLI Interface - Modern terminal UI with Textual
117 | 
118 | 📁 Processing Flow
119 | 1. Document → Images (PyMuPDF)
120 | 2. Vision-based summarization
121 | 3. Adaptive query processing
122 | 4. Intelligent page selection
123 | 5. Response synthesis
124 | ```
125 | 
126 | ### Key Design Principles
127 | 
128 | - **Provider-Agnostic**: Generic model configuration works across all providers
129 | - **Image-Based Processing**: All documents converted to images, preserving visual context
130 | - **Business Logic Separation**: Raw API operations separate from workflow logic
131 | - **Adaptive Intelligence**: Single agent mode that dynamically adjusts based on findings
132 | 
133 | ## 🎯 Use Cases
134 | 
135 | - **Research & Analysis**: Query academic papers, reports, and research documents
136 | - **Document Q&A**: Interactive questioning of PDFs, contracts, and manuals
137 | - **Content Discovery**: Find specific information across large document collections
138 | - **Visual Document Processing**: Handle documents with charts, diagrams, and complex layouts
139 | 
140 | ## 🌍 Environment Variables
141 | 
142 | | Variable | Description | Default |
143 | |----------|-------------|---------|
144 | | `OPENAI_API_KEY` | OpenAI API key | None |
145 | | `ANTHROPIC_API_KEY` | Anthropic API key | None |
146 | | `OPENROUTER_API_KEY` | OpenRouter API key | None |
147 | | `DOCPIXIE_PROVIDER` | AI provider | `openai` |
148 | | `DOCPIXIE_STORAGE_PATH` | Storage directory | `./docpixie_data` |
149 | | `DOCPIXIE_JPEG_QUALITY` | Image quality (1-100) | `90` |
150 | 
151 | ## 📖 Documentation
152 | 
153 | - [Getting Started Guide](docs/getting-started.md) - Detailed examples and tutorials
154 | - [CLI Tool Guide](docs/cli-tool.md) - Complete CLI documentation
155 | 
156 | ## 🤝 Contributing
157 | 
158 | 1. Fork the repository
159 | 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
160 | 3. Commit your changes (`git commit -m 'Add amazing feature'`)
161 | 4. Push to the branch (`git push origin feature/amazing-feature`)
162 | 5. Open a Pull Request
163 | 
164 | ## 📄 License
165 | 
166 | This project is licensed under the MIT License - see the LICENSE file for details.
167 | 
168 | ## 🙏 Acknowledgments
169 | 
170 | - Built with [PyMuPDF](https://pymupdf.readthedocs.io/) for PDF processing
171 | - CLI powered by [Textual](https://textual.textualize.io/)
172 | - Supports OpenAI, Anthropic, and OpenRouter APIs
173 | 
174 | ---
175 | 


--------------------------------------------------------------------------------
/docpixie/storage/base.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Base storage interface for documents
  3 | """
  4 | 
  5 | from abc import ABC, abstractmethod
  6 | from typing import List, Optional, Dict, Any
  7 | import logging
  8 | 
  9 | from ..models.document import Document, Page
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class BaseStorage(ABC):
 15 |     """Base class for storage backends"""
 16 |     
 17 |     @abstractmethod
 18 |     async def save_document(self, document: Document) -> str:
 19 |         """
 20 |         Save a processed document
 21 |         
 22 |         Args:
 23 |             document: Document to save
 24 |             
 25 |         Returns:
 26 |             Document ID
 27 |         """
 28 |         pass
 29 |     
 30 |     @abstractmethod
 31 |     async def get_document(self, document_id: str) -> Optional[Document]:
 32 |         """
 33 |         Retrieve a document by ID
 34 |         
 35 |         Args:
 36 |             document_id: ID of document to retrieve
 37 |             
 38 |         Returns:
 39 |             Document or None if not found
 40 |         """
 41 |         pass
 42 |     
 43 |     @abstractmethod
 44 |     async def list_documents(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
 45 |         """
 46 |         List all documents with metadata
 47 |         
 48 |         Args:
 49 |             limit: Maximum number of documents to return
 50 |             
 51 |         Returns:
 52 |             List of document metadata dicts
 53 |         """
 54 |         pass
 55 |     
 56 |     @abstractmethod
 57 |     async def delete_document(self, document_id: str) -> bool:
 58 |         """
 59 |         Delete a document and its associated files
 60 |         
 61 |         Args:
 62 |             document_id: ID of document to delete
 63 |             
 64 |         Returns:
 65 |             True if deletion was successful
 66 |         """
 67 |         pass
 68 |     
 69 |     @abstractmethod
 70 |     async def document_exists(self, document_id: str) -> bool:
 71 |         """
 72 |         Check if document exists
 73 |         
 74 |         Args:
 75 |             document_id: Document ID to check
 76 |             
 77 |         Returns:
 78 |             True if document exists
 79 |         """
 80 |         pass
 81 |     
 82 |     @abstractmethod
 83 |     async def get_document_summary(self, document_id: str) -> Optional[str]:
 84 |         """
 85 |         Get document summary without loading full document
 86 |         
 87 |         Args:
 88 |             document_id: Document ID
 89 |             
 90 |         Returns:
 91 |             Document summary or None
 92 |         """
 93 |         pass
 94 |     
 95 |     @abstractmethod
 96 |     async def update_document_summary(self, document_id: str, summary: str) -> bool:
 97 |         """
 98 |         Update document summary
 99 |         
100 |         Args:
101 |             document_id: Document ID
102 |             summary: New summary text
103 |             
104 |         Returns:
105 |             True if update was successful
106 |         """
107 |         pass
108 |     
109 |     @abstractmethod
110 |     async def get_all_documents(self) -> List[Document]:
111 |         """
112 |         Get all documents for agent processing
113 |         
114 |         Returns:
115 |             List of all documents in storage
116 |         """
117 |         pass
118 |     
119 |     @abstractmethod
120 |     async def get_all_pages(self) -> List[Page]:
121 |         """
122 |         Get all pages from all documents for agent processing
123 |         
124 |         Returns:
125 |             List of all pages across all documents
126 |         """
127 |         pass
128 |     
129 |     async def get_documents_by_ids(self, document_ids: List[str]) -> List[Document]:
130 |         """
131 |         Get multiple documents by IDs
132 |         
133 |         Args:
134 |             document_ids: List of document IDs
135 |             
136 |         Returns:
137 |             List of documents (may be fewer than requested if some not found)
138 |         """
139 |         documents = []
140 |         for doc_id in document_ids:
141 |             doc = await self.get_document(doc_id)
142 |             if doc:
143 |                 documents.append(doc)
144 |         return documents
145 |     
146 |     async def search_documents(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
147 |         """
148 |         Simple text search in document names and summaries
149 |         Default implementation - subclasses can override for better search
150 |         
151 |         Args:
152 |             query: Search query
153 |             limit: Maximum results
154 |             
155 |         Returns:
156 |             List of matching document metadata
157 |         """
158 |         all_docs = await self.list_documents()
159 |         matching_docs = []
160 |         query_lower = query.lower()
161 |         
162 |         for doc_meta in all_docs:
163 |             name_match = query_lower in doc_meta.get('name', '').lower()
164 |             summary_match = query_lower in doc_meta.get('summary', '').lower()
165 |             
166 |             if name_match or summary_match:
167 |                 matching_docs.append(doc_meta)
168 |             
169 |             if len(matching_docs) >= limit:
170 |                 break
171 |         
172 |         return matching_docs
173 |     
174 |     def get_storage_stats(self) -> Dict[str, Any]:
175 |         """
176 |         Get storage statistics
177 |         Default implementation - subclasses can override
178 |         
179 |         Returns:
180 |             Dictionary with storage statistics
181 |         """
182 |         return {
183 |             'backend': self.__class__.__name__,
184 |             'features': ['basic_storage']
185 |         }
186 | 
187 | 
188 | class StorageError(Exception):
189 |     """Exception raised by storage operations"""
190 |     
191 |     def __init__(self, message: str, document_id: Optional[str] = None):
192 |         self.document_id = document_id
193 |         super().__init__(message)


--------------------------------------------------------------------------------
/docpixie/ai/synthesizer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Response synthesizer for DocPixie RAG Agent
  3 | Combines multiple task results into coherent final answers
  4 | """
  5 | 
  6 | import logging
  7 | from typing import List
  8 | 
  9 | from ..models.agent import TaskResult
 10 | from ..providers.base import BaseProvider
 11 | from .prompts import SYNTHESIS_PROMPT, SYSTEM_SYNTHESIS
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | class ResponseSynthesizer:
 17 |     """
 18 |     Synthesizes multiple task results into a comprehensive final response
 19 |     Key feature: Combines findings from different tasks into coherent narrative
 20 |     """
 21 | 
 22 |     def __init__(self, provider: BaseProvider):
 23 |         self.provider = provider
 24 | 
 25 |     async def synthesize_response(
 26 |         self,
 27 |         original_query: str,
 28 |         task_results: List[TaskResult]
 29 |     ) -> str:
 30 |         """
 31 |         Synthesize multiple task results into a final comprehensive response
 32 | 
 33 |         Args:
 34 |             original_query: The user's original question
 35 |             task_results: List of completed task results to combine
 36 | 
 37 |         Returns:
 38 |             Synthesized response that addresses the original query
 39 |         """
 40 |         if not task_results:
 41 |             logger.warning("No task results provided for synthesis")
 42 |             return "I couldn't find any relevant information to answer your query."
 43 | 
 44 |         try:
 45 |             logger.info(f"Synthesizing response from {len(task_results)} task results")
 46 | 
 47 |             # Build results text from all task findings
 48 |             results_text = self._build_results_text(task_results)
 49 | 
 50 |             # Generate synthesis prompt
 51 |             prompt = SYNTHESIS_PROMPT.format(
 52 |                 original_query=original_query,
 53 |                 results_text=results_text
 54 |             )
 55 | 
 56 |             messages = [
 57 |                 {"role": "system", "content": SYSTEM_SYNTHESIS},
 58 |                 {"role": "user", "content": prompt}
 59 |             ]
 60 | 
 61 |             # Get synthesized response
 62 |             result = await self.provider.process_text_messages(
 63 |                 messages=messages,
 64 |                 max_tokens=2048,  # Longer response for synthesis
 65 |                 temperature=0.2  # Low temperature for consistent synthesis
 66 |             )
 67 | 
 68 |             logger.info("Successfully synthesized final response")
 69 |             return result.strip()
 70 | 
 71 |         except Exception as e:
 72 |             logger.error(f"Failed to synthesize response: {e}")
 73 |             # Fallback: return basic combination of results
 74 |             return self._create_fallback_response(original_query, task_results)
 75 | 
 76 |     def _build_results_text(self, task_results: List[TaskResult]) -> str:
 77 |         """Build formatted text from all task results"""
 78 |         results_sections = []
 79 | 
 80 |         for i, result in enumerate(task_results, 1):
 81 |             section = f"""TASK {i}: {result.task.name}
 82 | Description: {result.task.description}
 83 | Analysis: {result.analysis}
 84 | 
 85 | ---"""
 86 |             results_sections.append(section)
 87 | 
 88 |         return "\n".join(results_sections)
 89 | 
 90 |     def _create_fallback_response(
 91 |         self,
 92 |         original_query: str,
 93 |         task_results: List[TaskResult]
 94 |     ) -> str:
 95 |         """Create a simple fallback response if synthesis fails"""
 96 |         logger.warning("Using fallback response synthesis")
 97 | 
 98 |         response_parts = [
 99 |             f"Based on my analysis of the documents, here's what I found regarding your query: {original_query}\n"
100 |         ]
101 | 
102 |         for i, result in enumerate(task_results, 1):
103 |             response_parts.append(f"**{result.task.name}:**")
104 |             response_parts.append(result.analysis)
105 | 
106 |             if i < len(task_results):
107 |                 response_parts.append("")  # Add blank line between results
108 | 
109 |         return "\n".join(response_parts)
110 | 
111 |     async def synthesize_single_result(
112 |         self,
113 |         original_query: str,
114 |         task_result: TaskResult
115 |     ) -> str:
116 |         """
117 |         Handle synthesis for single task result (simpler case)
118 | 
119 |         Args:
120 |             original_query: The user's original question
121 |             task_result: Single task result to present
122 | 
123 |         Returns:
124 |             Formatted response for single task
125 |         """
126 |         try:
127 |             # For single results, we can often just clean up the analysis
128 |             # But still use synthesis prompt for consistency
129 |             return await self.synthesize_response(original_query, [task_result])
130 | 
131 |         except Exception as e:
132 |             logger.error(f"Failed to synthesize single result: {e}")
133 | 
134 |             # Simple fallback for single result
135 |             response = f"Based on my analysis, here's what I found regarding your query:\n\n"
136 |             response += f"**{task_result.task.name}**\n{task_result.analysis}"
137 | 
138 |             return response
139 | 
140 |     def validate_synthesis_quality(self, synthesized_response: str) -> bool:
141 |         """
142 |         Basic validation of synthesis quality
143 | 
144 |         Args:
145 |             synthesized_response: The synthesized response to validate
146 | 
147 |         Returns:
148 |             True if response meets basic quality criteria
149 |         """
150 |         if not synthesized_response or not synthesized_response.strip():
151 |             return False
152 | 
153 |         # Check minimum length (synthesis should be substantial)
154 |         if len(synthesized_response.strip()) < 50:
155 |             return False
156 | 
157 |         # Check it doesn't just repeat the prompt
158 |         if "SYNTHESIS_PROMPT" in synthesized_response:
159 |             return False
160 | 
161 |         # Check for basic structure indicators
162 |         if "I couldn't find" in synthesized_response and len(synthesized_response) < 100:
163 |             return False
164 | 
165 |         return True
166 | 


--------------------------------------------------------------------------------
/docpixie/providers/openrouter.py:
--------------------------------------------------------------------------------
  1 | """
  2 | OpenRouter provider for raw API operations
  3 | Uses OpenAI client with OpenRouter's API endpoint
  4 | """
  5 | 
  6 | import logging
  7 | from typing import List, Dict, Any
  8 | 
  9 | from .base import BaseProvider, ProviderError
 10 | from ..core.config import DocPixieConfig
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class OpenRouterProvider(BaseProvider):
 16 |     """OpenRouter provider for raw API operations"""
 17 | 
 18 |     def __init__(self, config: DocPixieConfig):
 19 |         super().__init__(config)
 20 | 
 21 |         if not config.openrouter_api_key:
 22 |             raise ValueError("OpenRouter API key is required")
 23 | 
 24 |         # Import here to make it optional dependency
 25 |         try:
 26 |             from openai import AsyncOpenAI
 27 |             self.client = AsyncOpenAI(
 28 |                 api_key=config.openrouter_api_key,
 29 |                 base_url="https://openrouter.ai/api/v1"
 30 |             )
 31 |         except ImportError:
 32 |             raise ImportError("OpenAI library not found. Install with: pip install openai")
 33 | 
 34 |         self.model = config.vision_model
 35 | 
 36 |     async def process_text_messages(
 37 |         self,
 38 |         messages: List[Dict[str, Any]],
 39 |         max_tokens: int = 300,
 40 |         temperature: float = 0.3
 41 |     ) -> str:
 42 |         """Process text-only messages through OpenRouter API"""
 43 |         try:
 44 |             response = await self.client.chat.completions.create(
 45 |                 model=self.config.model,
 46 |                 messages=messages,
 47 |                 max_tokens=max_tokens,
 48 |                 temperature=temperature,
 49 |                 extra_body= {
 50 |                       "usage": {
 51 |                         "include": True,
 52 |                       },
 53 |                 },
 54 |             )
 55 | 
 56 |             result = response.choices[0].message.content.strip()
 57 |             logger.debug(f"OpenRouter text response: {result[:50]}...")
 58 | 
 59 |             # Track cost if available
 60 |             if hasattr(response, 'usage') and hasattr(response.usage, 'cost'):
 61 |                 self.last_api_cost = response.usage.cost
 62 |                 self.total_cost += response.usage.cost
 63 |                 logger.debug(f"OpenRouter API cost: ${response.usage.cost}")
 64 |             else:
 65 |                 self.last_api_cost = None
 66 | 
 67 |             return result
 68 | 
 69 |         except Exception as e:
 70 |             logger.error(f"OpenRouter text processing failed: {e}")
 71 |             raise ProviderError(f"Text processing failed: {e}", "openrouter")
 72 | 
 73 |     async def process_multimodal_messages(
 74 |         self,
 75 |         messages: List[Dict[str, Any]],
 76 |         max_tokens: int = 300,
 77 |         temperature: float = 0.3
 78 |     ) -> str:
 79 |         """Process multimodal messages (text + images) through OpenRouter API"""
 80 |         try:
 81 |             # Process messages to convert image paths to data URLs
 82 |             processed_messages = self._prepare_openai_messages(messages)
 83 | 
 84 |             response = await self.client.chat.completions.create(
 85 |                 model=self.model,  # Use vision model
 86 |                 messages=processed_messages,
 87 |                 max_tokens=max_tokens,
 88 |                 temperature=temperature,
 89 |                 extra_body= {
 90 |                       "usage": {
 91 |                         "include": True,
 92 |                       },
 93 |                 },
 94 |             )
 95 | 
 96 |             result = response.choices[0].message.content.strip()
 97 |             logger.debug(f"OpenRouter multimodal response: {result[:50]}...")
 98 | 
 99 |             # Track cost if available
100 |             if hasattr(response, 'usage') and hasattr(response.usage, 'cost'):
101 |                 self.last_api_cost = response.usage.cost
102 |                 self.total_cost += response.usage.cost
103 |                 logger.debug(f"OpenRouter API cost: ${response.usage.cost}")
104 |             else:
105 |                 self.last_api_cost = None
106 | 
107 |             return result
108 | 
109 |         except Exception as e:
110 |             logger.error(f"OpenRouter multimodal processing failed: {e}")
111 |             raise ProviderError(f"Multimodal processing failed: {e}", "openrouter")
112 | 
113 |     def _prepare_openai_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
114 |         """Prepare messages for OpenRouter API by converting image paths to data URLs"""
115 |         processed_messages = []
116 | 
117 |         for message in messages:
118 |             if message["role"] == "system":
119 |                 # System messages are text-only
120 |                 processed_messages.append(message)
121 |             elif message["role"] == "user" and isinstance(message["content"], list):
122 |                 # User message with multimodal content
123 |                 processed_content = []
124 | 
125 |                 for content_item in message["content"]:
126 |                     if content_item["type"] == "text":
127 |                         processed_content.append(content_item)
128 |                     elif content_item["type"] == "image_path":
129 |                         # Convert image path to OpenRouter format (same as OpenAI)
130 |                         image_path = content_item["image_path"]
131 |                         if self._validate_image_path(image_path):
132 |                             image_data_url = self._create_image_data_url(image_path)
133 |                             processed_content.append({
134 |                                 "type": "image_url",
135 |                                 "image_url": {
136 |                                     "url": image_data_url,
137 |                                     "detail": content_item.get("detail", "high")
138 |                                 }
139 |                             })
140 |                         else:
141 |                             logger.warning(f"Skipping invalid image path: {image_path}")
142 |                     else:
143 |                         # Pass through other content types
144 |                         processed_content.append(content_item)
145 | 
146 |                 processed_messages.append({
147 |                     "role": message["role"],
148 |                     "content": processed_content
149 |                 })
150 |             else:
151 |                 # Regular text message
152 |                 processed_messages.append(message)
153 | 
154 |         return processed_messages
155 | 


--------------------------------------------------------------------------------
/docpixie/ai/context_processor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Context Processor - Handles conversation history summarization and context building
  3 | """
  4 | 
  5 | import logging
  6 | from typing import List, Tuple, Optional
  7 | 
  8 | from ..models.agent import ConversationMessage
  9 | from ..providers.base import BaseProvider
 10 | from ..core.config import DocPixieConfig
 11 | from ..exceptions import ContextProcessingError
 12 | from .prompts import CONVERSATION_SUMMARIZATION_PROMPT
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class ContextProcessor:
 18 |     """
 19 |     Processes conversation history to create optimized context for RAG
 20 | 
 21 |     When conversation exceeds max_turns:
 22 |     - Summarizes first turns_to_summarize turns
 23 |     - Includes last turns_to_keep_full turns in full
 24 |     - Creates condensed context for query reformulation
 25 |     """
 26 | 
 27 |     def __init__(self, provider: BaseProvider, config: DocPixieConfig):
 28 |         self.provider = provider
 29 |         self.max_turns_before_summary = config.max_conversation_turns
 30 |         self.turns_to_summarize = config.turns_to_summarize
 31 |         self.turns_to_keep_full = config.turns_to_keep_full
 32 | 
 33 |     async def process_conversation_context(
 34 |         self,
 35 |         messages: List[ConversationMessage],
 36 |         current_query: str
 37 |     ) -> Tuple[str, List[ConversationMessage]]:
 38 |         """
 39 |         Process conversation history and return optimized context
 40 | 
 41 |         Args:
 42 |             messages: List of conversation messages
 43 |             current_query: The current user query
 44 | 
 45 |         Returns:
 46 |             Tuple of (processed_context_string, messages_for_display)
 47 | 
 48 |         Raises:
 49 |             ContextProcessingError: If context processing fails
 50 |         """
 51 |         try:
 52 |             # Calculate number of turns (1 turn = 1 user message + 1 assistant message)
 53 |             turns = self._count_turns(messages)
 54 | 
 55 |             if turns <= self.max_turns_before_summary:
 56 |                 # No summarization needed
 57 |                 context = self._format_messages_as_context(messages)
 58 |                 return context, messages
 59 | 
 60 |             logger.info(f"Conversation has {turns} turns, applying context summarization")
 61 | 
 62 |             # Split messages for summarization
 63 |             messages_to_summarize, messages_to_keep = self._split_messages_for_summary(messages)
 64 | 
 65 |             # Summarize the first part
 66 |             summary = await self._summarize_conversation_chunk(messages_to_summarize)
 67 | 
 68 |             # Build final context
 69 |             context_parts = []
 70 | 
 71 |             # Add summary
 72 |             context_parts.append(f"Previous Conversation Summary:\n{summary}\n")
 73 | 
 74 |             # Add recent messages in full
 75 |             if messages_to_keep:
 76 |                 context_parts.append("Recent Conversation:")
 77 |                 context_parts.append(self._format_messages_as_context(messages_to_keep))
 78 | 
 79 |             # Add current query
 80 |             context_parts.append(f"\nCurrent Query: {current_query}")
 81 | 
 82 |             final_context = "\n".join(context_parts)
 83 | 
 84 |             # Create display messages (summary + recent)
 85 |             summary_message = ConversationMessage(
 86 |                 role="system",
 87 |                 content=f"[Conversation Summary of First {self.turns_to_summarize} Turns]\n{summary}"
 88 |             )
 89 |             display_messages = [summary_message] + messages_to_keep
 90 | 
 91 |             return final_context, display_messages
 92 | 
 93 |         except Exception as e:
 94 |             logger.error(f"Context processing failed: {e}")
 95 |             raise ContextProcessingError(f"Failed to process conversation context: {e}")
 96 | 
 97 |     def _count_turns(self, messages: List[ConversationMessage]) -> int:
 98 |         """Count conversation turns (user messages only)"""
 99 |         user_messages = sum(1 for msg in messages if msg.role == "user")
100 |         return user_messages
101 | 
102 |     def _split_messages_for_summary(
103 |         self,
104 |         messages: List[ConversationMessage]
105 |     ) -> Tuple[List[ConversationMessage], List[ConversationMessage]]:
106 |         """Split messages into parts to summarize and keep"""
107 |         # Find the split point based on turns
108 |         turn_count = 0
109 |         split_index = 0
110 | 
111 |         for i in range(0, len(messages), 2):  # Process in pairs
112 |             if i + 1 < len(messages) and messages[i].role == "user":
113 |                 turn_count += 1
114 |                 if turn_count == self.turns_to_summarize:
115 |                     split_index = i + 2  # Include the assistant response
116 |                     break
117 | 
118 |         messages_to_summarize = messages[:split_index]
119 |         messages_to_keep = messages[split_index:]
120 | 
121 |         # Ensure we keep at most the last N turns
122 |         if self.turns_to_keep_full > 0:
123 |             max_messages_to_keep = self.turns_to_keep_full * 2  # Each turn has 2 messages
124 |             if len(messages_to_keep) > max_messages_to_keep:
125 |                 messages_to_keep = messages_to_keep[-max_messages_to_keep:]
126 | 
127 |         return messages_to_summarize, messages_to_keep
128 | 
129 |     def _format_messages_as_context(self, messages: List[ConversationMessage]) -> str:
130 |         """Format messages as readable context"""
131 |         formatted_parts = []
132 | 
133 |         for msg in messages:
134 |             role = "User" if msg.role == "user" else "Assistant"
135 |             formatted_parts.append(f"{role}: {msg.content}")
136 | 
137 |         return "\n\n".join(formatted_parts)
138 | 
139 |     async def _summarize_conversation_chunk(self, messages: List[ConversationMessage]) -> str:
140 |         """Summarize a chunk of conversation"""
141 |         try:
142 |             conversation_text = self._format_messages_as_context(messages)
143 | 
144 |             prompt = CONVERSATION_SUMMARIZATION_PROMPT.format(
145 |                 conversation_text=conversation_text
146 |             )
147 | 
148 |             messages_for_api = [
149 |                 {"role": "system", "content": "You are a helpful assistant that creates concise conversation summaries."},
150 |                 {"role": "user", "content": prompt}
151 |             ]
152 | 
153 |             summary = await self.provider.process_text_messages(
154 |                 messages=messages_for_api,
155 |                 max_tokens=500,
156 |                 temperature=0.3
157 |             )
158 | 
159 |             return summary.strip()
160 | 
161 |         except Exception as e:
162 |             logger.error(f"Conversation summarization failed: {e}")
163 |             raise ContextProcessingError(f"Failed to summarize conversation: {e}")
164 | 


--------------------------------------------------------------------------------
/docpixie/core/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | DocPixie Configuration
  3 | Simplified version of production config without embedding/vector DB settings
  4 | """
  5 | 
  6 | import os
  7 | from dataclasses import dataclass, field
  8 | from typing import Tuple, Optional, Dict, Any
  9 | from pathlib import Path
 10 | 
 11 | 
 12 | @dataclass
 13 | class DocPixieConfig:
 14 |     """DocPixie configuration with sensible defaults"""
 15 | 
 16 |     # Document Processing with PyMuPDF
 17 |     pdf_render_scale: float = 2.0  # Higher scale = better quality, larger files
 18 |     pdf_max_image_size: Tuple[int, int] = (1200, 1200)
 19 |     jpeg_quality: int = 90
 20 |     thumbnail_size: Tuple[int, int] = (256, 256)  # For quick page selection
 21 | 
 22 |     # Processing settings
 23 |     vision_detail: str = "high"  # Use full resolution for best quality
 24 | 
 25 |     # Storage
 26 |     storage_type: str = "local"  # local, memory, s3
 27 |     local_storage_path: str = "./docpixie_data"
 28 | 
 29 |     # AI Provider Settings (Provider-agnostic)
 30 |     provider: str = "openai"  # openai, anthropic, openrouter
 31 |     model: str = "gpt-4o"  # Primary model for all operations
 32 |     vision_model: str = "gpt-4o"  # Vision model for multimodal analysis
 33 | 
 34 |     # API keys loaded from environment variables only
 35 |     openai_api_key: Optional[str] = None
 36 |     anthropic_api_key: Optional[str] = None
 37 |     openrouter_api_key: Optional[str] = None
 38 | 
 39 |     # Agent Settings
 40 |     max_agent_iterations: int = 5  # Maximum adaptive planning iterations
 41 |     max_pages_per_task: int = 6    # Maximum pages to analyze per task
 42 |     max_tasks_per_plan: int = 4    # Maximum tasks in initial plan
 43 | 
 44 |     # Conversation Processing Settings
 45 |     max_conversation_turns: int = 8  # When to start summarizing conversation
 46 |     turns_to_summarize: int = 5      # How many turns to summarize
 47 |     turns_to_keep_full: int = 3      # How many recent turns to keep in full
 48 | 
 49 |     # Logging
 50 |     log_level: str = "INFO"
 51 |     log_requests: bool = False
 52 | 
 53 |     def __post_init__(self):
 54 |         """Initialize and validate configuration"""
 55 |         # Create storage directory if it doesn't exist
 56 |         if self.storage_type == "local":
 57 |             Path(self.local_storage_path).mkdir(parents=True, exist_ok=True)
 58 | 
 59 |         # Load API keys from environment if not provided
 60 |         if not self.openai_api_key:
 61 |             self.openai_api_key = os.getenv("OPENAI_API_KEY")
 62 | 
 63 |         if not self.anthropic_api_key:
 64 |             self.anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
 65 | 
 66 |         if not self.openrouter_api_key:
 67 |             self.openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
 68 | 
 69 |         # Set provider-specific default models if using defaults
 70 |         self._set_provider_defaults()
 71 | 
 72 |         # Skip validation with test API keys (for testing)
 73 |         if self.openai_api_key != "test-key" and self.anthropic_api_key != "test-key" and self.openrouter_api_key != "test-key":
 74 |             # Validate required settings based on provider
 75 |             if self.provider == "openai" and not self.openai_api_key:
 76 |                 raise ValueError("OpenAI API key is required when using OpenAI provider")
 77 | 
 78 |             if self.provider == "anthropic" and not self.anthropic_api_key:
 79 |                 raise ValueError("Anthropic API key is required when using Anthropic provider")
 80 | 
 81 |             if self.provider == "openrouter" and not self.openrouter_api_key:
 82 |                 raise ValueError("OpenRouter API key is required when using OpenRouter provider")
 83 | 
 84 |         # Validate image settings
 85 |         if self.pdf_render_scale <= 0:
 86 |             raise ValueError("PDF render scale must be positive")
 87 | 
 88 |         if self.jpeg_quality < 1 or self.jpeg_quality > 100:
 89 |             raise ValueError("JPEG quality must be between 1 and 100")
 90 | 
 91 |     def _set_provider_defaults(self):
 92 |         """Set appropriate default models based on provider"""
 93 |         provider_defaults = {
 94 |             "openai": {
 95 |                 "model": "gpt-4o",
 96 |                 "vision_model": "gpt-4o"
 97 |             },
 98 |             "anthropic": {
 99 |                 "model": "claude-3-opus-20240229",
100 |                 "vision_model": "claude-3-opus-20240229"
101 |             },
102 |             "openrouter": {
103 |                 "model": "openai/gpt-4o",
104 |                 "vision_model": "openai/gpt-4o"
105 |             }
106 |         }
107 | 
108 |         if self.provider in provider_defaults:
109 |             defaults = provider_defaults[self.provider]
110 |             # Only update if still using OpenAI defaults (means user didn't specify custom models)
111 |             if self.model == "gpt-4o":
112 |                 self.model = defaults["model"]
113 |             if self.vision_model == "gpt-4o":
114 |                 self.vision_model = defaults["vision_model"]
115 | 
116 |     @classmethod
117 |     def from_dict(cls, config_dict: Dict[str, Any]) -> 'DocPixieConfig':
118 |         """Create config from dictionary"""
119 |         return cls(**config_dict)
120 | 
121 |     @classmethod
122 |     def from_env(cls) -> 'DocPixieConfig':
123 |         """Create config from environment variables"""
124 |         config_dict = {}
125 | 
126 |         # Map environment variables to config fields
127 |         env_mapping = {
128 |             'DOCPIXIE_PROVIDER': 'provider',
129 |             'DOCPIXIE_STORAGE_PATH': 'local_storage_path',
130 |             'DOCPIXIE_JPEG_QUALITY': 'jpeg_quality',
131 |             'DOCPIXIE_LOG_LEVEL': 'log_level',
132 |         }
133 | 
134 |         for env_var, config_field in env_mapping.items():
135 |             value = os.getenv(env_var)
136 |             if value is not None:
137 |                 # Convert string values to appropriate types
138 |                 if config_field in ['jpeg_quality']:
139 |                     config_dict[config_field] = int(value)
140 |                 elif config_field in ['enable_cache']:
141 |                     config_dict[config_field] = value.lower() in ('true', '1', 'yes')
142 |                 else:
143 |                     config_dict[config_field] = value
144 | 
145 |         return cls(**config_dict)
146 | 
147 |     def get_query_config(self) -> Dict[str, Any]:
148 |         """Get configuration for query processing"""
149 |         return {
150 |             'vision_detail': self.vision_detail,
151 |             'model': self.model
152 |         }
153 | 
154 |     def validate_provider_config(self) -> None:
155 |         """Validate provider-specific configuration"""
156 |         if self.provider == "openai":
157 |             if not self.openai_api_key:
158 |                 raise ValueError("OpenAI API key is required")
159 |         elif self.provider == "anthropic":
160 |             if not self.anthropic_api_key:
161 |                 raise ValueError("Anthropic API key is required")
162 |         elif self.provider == "openrouter":
163 |             if not self.openrouter_api_key:
164 |                 raise ValueError("OpenRouter API key is required")
165 |         else:
166 |             raise ValueError(f"Unsupported provider: {self.provider}")
167 | 


--------------------------------------------------------------------------------
/docpixie/providers/anthropic.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Anthropic Claude provider for raw API operations
  3 | """
  4 | 
  5 | import logging
  6 | from typing import List, Dict, Any
  7 | 
  8 | from .base import BaseProvider, ProviderError
  9 | from ..core.config import DocPixieConfig
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class AnthropicProvider(BaseProvider):
 15 |     """Anthropic Claude provider for raw API operations"""
 16 |     
 17 |     def __init__(self, config: DocPixieConfig):
 18 |         super().__init__(config)
 19 |         
 20 |         if not config.anthropic_api_key:
 21 |             raise ValueError("Anthropic API key is required")
 22 |         
 23 |         # Import here to make it optional dependency
 24 |         try:
 25 |             import anthropic
 26 |             self.client = anthropic.AsyncAnthropic(api_key=config.anthropic_api_key)
 27 |         except ImportError:
 28 |             raise ImportError("Anthropic library not found. Install with: pip install anthropic")
 29 |         
 30 |         self.model = config.vision_model  # Use vision model for multimodal operations
 31 |     
 32 |     async def process_text_messages(
 33 |         self, 
 34 |         messages: List[Dict[str, Any]], 
 35 |         max_tokens: int = 300, 
 36 |         temperature: float = 0.3
 37 |     ) -> str:
 38 |         """Process text-only messages through Anthropic API"""
 39 |         try:
 40 |             # Convert system message format for Anthropic
 41 |             claude_messages = self._prepare_claude_text_messages(messages)
 42 |             
 43 |             response = await self.client.messages.create(
 44 |                 model=self.model,
 45 |                 max_tokens=max_tokens,
 46 |                 temperature=temperature,
 47 |                 messages=claude_messages
 48 |             )
 49 |             
 50 |             result = response.content[0].text.strip()
 51 |             logger.debug(f"Anthropic text response: {result[:50]}...")
 52 |             
 53 |             return result
 54 |             
 55 |         except Exception as e:
 56 |             logger.error(f"Anthropic text processing failed: {e}")
 57 |             raise ProviderError(f"Text processing failed: {e}", "anthropic")
 58 |     
 59 |     async def process_multimodal_messages(
 60 |         self, 
 61 |         messages: List[Dict[str, Any]], 
 62 |         max_tokens: int = 300, 
 63 |         temperature: float = 0.3
 64 |     ) -> str:
 65 |         """Process multimodal messages (text + images) through Anthropic Vision API"""
 66 |         try:
 67 |             # Process messages to convert image paths to base64
 68 |             claude_messages = self._prepare_claude_multimodal_messages(messages)
 69 |             
 70 |             response = await self.client.messages.create(
 71 |                 model=self.model,
 72 |                 max_tokens=max_tokens,
 73 |                 temperature=temperature,
 74 |                 messages=claude_messages
 75 |             )
 76 |             
 77 |             result = response.content[0].text.strip()
 78 |             logger.debug(f"Anthropic multimodal response: {result[:50]}...")
 79 |             
 80 |             return result
 81 |             
 82 |         except Exception as e:
 83 |             logger.error(f"Anthropic multimodal processing failed: {e}")
 84 |             raise ProviderError(f"Multimodal processing failed: {e}", "anthropic")
 85 |     
 86 |     def _prepare_claude_text_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 87 |         """Prepare text-only messages for Claude API (handle system messages)"""
 88 |         claude_messages = []
 89 |         
 90 |         for message in messages:
 91 |             if message["role"] == "system":
 92 |                 # Claude handles system messages differently - we'll prepend to first user message
 93 |                 continue
 94 |             else:
 95 |                 claude_messages.append(message)
 96 |         
 97 |         # Prepend system message content to first user message if present
 98 |         system_content = None
 99 |         for message in messages:
100 |             if message["role"] == "system":
101 |                 system_content = message["content"]
102 |                 break
103 |         
104 |         if system_content and claude_messages and claude_messages[0]["role"] == "user":
105 |             # Prepend system content to first user message
106 |             original_content = claude_messages[0]["content"]
107 |             claude_messages[0]["content"] = f"{system_content}\n\n{original_content}"
108 |         
109 |         return claude_messages
110 |     
111 |     def _prepare_claude_multimodal_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
112 |         """Prepare multimodal messages for Claude API by converting image paths to base64"""
113 |         claude_messages = []
114 |         system_content = None
115 |         
116 |         # Extract system message
117 |         for message in messages:
118 |             if message["role"] == "system":
119 |                 system_content = message["content"]
120 |                 break
121 |         
122 |         for message in messages:
123 |             if message["role"] == "system":
124 |                 continue  # Skip system message, will be prepended to user message
125 |             elif message["role"] == "user" and isinstance(message["content"], list):
126 |                 # User message with multimodal content
127 |                 processed_content = []
128 |                 
129 |                 for content_item in message["content"]:
130 |                     if content_item["type"] == "text":
131 |                         processed_content.append(content_item)
132 |                     elif content_item["type"] == "image_path":
133 |                         # Convert image path to Claude format
134 |                         image_path = content_item["image_path"]
135 |                         if self._validate_image_path(image_path):
136 |                             encoded_image = self._encode_image(image_path)
137 |                             processed_content.append({
138 |                                 "type": "image",
139 |                                 "source": {
140 |                                     "type": "base64",
141 |                                     "media_type": "image/jpeg",
142 |                                     "data": encoded_image
143 |                                 }
144 |                             })
145 |                         else:
146 |                             logger.warning(f"Skipping invalid image path: {image_path}")
147 |                     else:
148 |                         # Pass through other content types
149 |                         processed_content.append(content_item)
150 |                 
151 |                 # Prepend system content to first user message
152 |                 if system_content and len(claude_messages) == 0:
153 |                     processed_content.insert(0, {
154 |                         "type": "text",
155 |                         "text": system_content
156 |                     })
157 |                 
158 |                 claude_messages.append({
159 |                     "role": message["role"],
160 |                     "content": processed_content
161 |                 })
162 |             else:
163 |                 # Regular text message
164 |                 claude_messages.append(message)
165 |         
166 |         return claude_messages


--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------
  1 | # CLAUDE.md
  2 | 
  3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
  4 | 
  5 | ## Project Overview
  6 | 
  7 | DocPixie is a lightweight multimodal RAG library that uses vision AI instead of embeddings/vector databases. Documents are processed as images and analyzed using vision language models for both understanding and page selection.
  8 | 
  9 | ## Development Commands
 10 | 
 11 | ### Environment Setup
 12 | ```bash
 13 | # Set up virtual environment with uv (recommended)
 14 | uv venv
 15 | source .venv/bin/activate
 16 | 
 17 | uv pip install docpixie
 18 | ```
 19 | 
 20 | Start the CLI:
 21 | ```bash
 22 | docpixie
 23 | ```
 24 | 
 25 | ## Core Architecture
 26 | 
 27 | ### Provider System
 28 | The codebase uses a clean separation between **raw API operations** and **business logic**:
 29 | 
 30 | - **Providers** (`docpixie/providers/`): Handle only raw API calls with generic `process_text_messages()` and `process_multimodal_messages()` methods
 31 | - **AI Operations** (`docpixie/ai/`): Contain all business logic, prompt construction, and workflow orchestration
 32 | 
 33 | ### Key Architectural Principles
 34 | 
 35 | 1. **Provider-Agnostic Configuration**: Uses generic `flash_model`, `pro_model`, `vision_model` fields that work across all providers
 36 | 2. **Automatic Provider Defaults**: `DocPixieConfig._set_provider_defaults()` sets appropriate models based on selected provider
 37 | 3. **Image-Based Processing**: All documents converted to images via PyMuPDF, preserving visual information
 38 | 4. **Adaptive RAG Agent**: Single adaptive mode that dynamically plans and re-evaluates tasks based on findings (replaces Flash/Pro modes in Phase 2)
 39 | 
 40 | ### Provider Implementation Pattern
 41 | When adding new providers:
 42 | 1. Inherit from `BaseProvider`
 43 | 2. Implement only `process_text_messages()` and `process_multimodal_messages()`
 44 | 3. Handle provider-specific message formatting (e.g., image_path → provider format)
 45 | 4. Add to `providers/factory.py` and provider defaults in `config.py`
 46 | 
 47 | Example: OpenRouter provider uses OpenAI client with `base_url="https://openrouter.ai/api/v1"`
 48 | 
 49 | ### Document Processing Flow
 50 | 1. **PDF → Images**: PyMuPDF converts PDF pages to optimized JPEGs
 51 | 2. **Storage**: Local filesystem or in-memory storage via pluggable backends
 52 | 3. **Summarization**: Vision models analyze all page images in single API call for document summary
 53 | 4. **Adaptive RAG Pipeline** (Phase 2): Vision-based page selection + dynamic task planning + conversation processing
 54 | 
 55 | ### Configuration System
 56 | - Environment-first approach: API keys loaded from `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `OPENROUTER_API_KEY`
 57 | - Provider-agnostic model configuration
 58 | - Agent-specific settings (max iterations, pages per task, conversation context)
 59 | - Test API key support: Use `"test-key"` to bypass validation during testing
 60 | 
 61 | ### File Structure Significance
 62 | 
 63 | ```
 64 | docpixie/
 65 | ├── core/config.py          # Central configuration with provider defaults
 66 | ├── providers/              # Raw API operations only
 67 | │   ├── base.py            # Generic message processing interface
 68 | │   ├── openai.py          # OpenAI API client
 69 | │   ├── anthropic.py       # Claude API client (handles different system message format)
 70 | │   ├── openrouter.py      # OpenRouter using OpenAI client + different base_url
 71 | │   └── factory.py         # Provider creation and validation
 72 | ├── ai/                     # Business logic layer
 73 | │   ├── summarizer.py      # Page/document summarization workflows
 74 | │   ├── agent.py           # Main adaptive RAG agent orchestrator
 75 | │   ├── task_planner.py    # Dynamic task planning with document selection
 76 | │   ├── page_selector.py   # Vision-based page selection
 77 | │   ├── context_processor.py # Conversation summarization
 78 | │   ├── query_reformulator.py # Reference resolution
 79 | │   ├── query_classifier.py # Document need classification
 80 | │   ├── synthesizer.py     # Response synthesis
 81 | │   └── prompts.py         # All AI prompts
 82 | ├── processors/             # Document-to-image conversion
 83 | │   ├── pdf.py             # PyMuPDF implementation
 84 | │   └── factory.py         # Auto-detection of processor type
 85 | ├── storage/                # Pluggable storage backends
 86 | │   ├── local.py           # Filesystem storage
 87 | │   └── memory.py          # In-memory storage (for testing)
 88 | ├── models/
 89 | │   ├── document.py        # Core data models without embeddings
 90 | │   └── agent.py           # Agent task/plan data models
 91 | ├── exceptions.py          # Custom exception classes
 92 | └── __init__.py            # Main API entry point
 93 | ```
 94 | 
 95 | ## Important Implementation Details
 96 | 
 97 | ### Configuration Testing
 98 | Never use test mode flags. Instead, use test API keys (`"test-key"`) which automatically bypass validation.
 99 | 
100 | ### Document Summarization
101 | The critical architectural decision: document summaries use ALL page images in a single vision API call, not individual page summaries combined. This preserves visual context and document structure.
102 | 
103 | ### Provider Message Format
104 | All providers receive messages with `image_path` type, then convert to their specific format:
105 | - OpenAI: `image_url` with data URL
106 | - Anthropic: `image` with base64 data
107 | - OpenRouter: Same as OpenAI
108 | 
109 | ### Adaptive RAG Agent Implementation
110 | The agent operates in a single adaptive mode with dynamic task planning:
111 | 1. **Context Processing**: Summarizes conversation when > 8 turns
112 | 2. **Query Reformulation**: Resolves references using context (outputs JSON)
113 | 3. **Query Classification**: Determines if documents needed (reasoning + needs_documents)
114 | 4. **Task Planning**: Creates 2-4 focused tasks with single document assignments
115 | 5. **Adaptive Execution**: Re-evaluates and modifies task list after each completion
116 | 6. **Response Synthesis**: Combines all task findings into comprehensive response
117 | 
118 | ## Environment Variables
119 | 
120 | ```bash
121 | # Required for respective providers
122 | OPENAI_API_KEY=your_openai_key
123 | ANTHROPIC_API_KEY=your_anthropic_key
124 | OPENROUTER_API_KEY=your_openrouter_key
125 | 
126 | # Optional configuration overrides
127 | DOCPIXIE_PROVIDER=openai|anthropic|openrouter
128 | DOCPIXIE_STORAGE_PATH=./docpixie_data
129 | DOCPIXIE_MAX_AGENT_ITERATIONS=5
130 | DOCPIXIE_JPEG_QUALITY=90
131 | ```
132 | 
133 | ## Development Guidelines
134 | 
135 | ### Code Modification Priority
136 | **CRITICAL**: When implementing new features, always prioritize modifying existing code over creating new files or methods unless absolutely necessary. This maintains codebase coherence and avoids unnecessary duplication.
137 | 
138 | ### Error Handling Philosophy
139 | Error handling should be simple and direct - raise appropriate custom exceptions from `docpixie/exceptions.py` instead of implementing fallback mechanisms. This ensures clear failure modes and easier debugging.
140 | 
141 | ### Prompt Management
142 | All AI prompts must be centralized in `docpixie/ai/prompts.py`. This includes system prompts, user prompts, and any template strings used for AI interactions. Never embed prompts directly in component files.
143 | 
144 | ### Agent Task Architecture
145 | Each agent task should be assigned to exactly **one document** (not multiple). This simplifies page selection and analysis while maintaining clear scope boundaries.
146 | 


--------------------------------------------------------------------------------
/docs/cli-tool.md:
--------------------------------------------------------------------------------
  1 | # DocPixie CLI Tool
  2 | 
  3 | DocPixie includes a modern, interactive terminal interface built with Textual that provides a beautiful and intuitive way to chat with your documents.
  4 | 
  5 | ## 🚀 Quick Start
  6 | 
  7 | ### Starting the CLI
  8 | 
  9 | ```bash
 10 | # Start the interactive CLI
 11 | docpixie
 12 | ```
 13 | 
 14 | ## 🎛️ First-Time Setup
 15 | 
 16 | When you first run the CLI, you'll be prompted to enter your API key:
 17 | 
 18 | ```
 19 | ┌─────────────────────────────────────────────────────────────────┐
 20 | │                          Welcome to DocPixie!                   │
 21 | │                                                                  │
 22 | │          DocPixie needs an OpenRouter API key to work           │
 23 | │                    with documents.                               │
 24 | │                                                                  │
 25 | │              Get your API key from:                              │
 26 | │                   https://openrouter.ai/keys                    │
 27 | │                                                                  │
 28 | │    [                API Key Input                         ]     │
 29 | │                                                                  │
 30 | │         Press Enter to confirm • Press Esc to quit              │
 31 | └─────────────────────────────────────────────────────────────────┘
 32 | ```
 33 | 
 34 | > **Note**: While the setup screen mentions OpenRouter, DocPixie CLI supports all providers (OpenAI, Anthropic, OpenRouter). You can set any provider's API key as an environment variable before starting the CLI.
 35 | 
 36 | ## 🎨 Interface Overview
 37 | 
 38 | The CLI interface consists of several key areas:
 39 | 
 40 | ```
 41 | ┌─ DocPixie ──────────────────────────────────── 12:34:56 PM ─┐
 42 | │                                                               │
 43 | │  ┌─ Chat Area ───────────────────────────────────────────┐  │
 44 | │  │                                                        │  │
 45 | │  │  Welcome to DocPixie!                                  │  │
 46 | │  │  2 documents indexed and ready!                       │  │
 47 | │  │                                                        │  │
 48 | │  │  Start chatting or type / for commands               │  │
 49 | │  └────────────────────────────────────────────────────────┘  │
 50 | │                                                               │
 51 | │  Status: Ready • 2 documents indexed                         │
 52 | │                                                               │
 53 | │  > [                 Input Area                        ]     │
 54 | │                                                               │
 55 | │  Enter to send • Shift+Enter for new line • Ctrl+/ commands │
 56 | │                                                               │
 57 | ├─────────────────────────────────────────────────────────────┤
 58 | │ ^N New  ^L Conversations  ^O Models  ^D Docs  ^/ Cmds  ^Q   │
 59 | └─────────────────────────────────────────────────────────────┘
 60 | ```
 61 | 
 62 | ## ⌨️ Keyboard Shortcuts
 63 | 
 64 | ### Global Shortcuts
 65 | 
 66 | | Shortcut | Action | Description |
 67 | |----------|--------|-------------|
 68 | | `Ctrl+N` | New Conversation | Start a fresh conversation |
 69 | | `Ctrl+L` | Conversations | Manage conversation history |
 70 | | `Ctrl+O` | Model Config | Configure AI models/providers |
 71 | | `Ctrl+D` | Documents | Manage documents |
 72 | | `Ctrl+/` | Commands | Toggle command palette |
 73 | | `Ctrl+Q` | Quit | Exit the application |
 74 | 
 75 | ### Chat Input Shortcuts
 76 | 
 77 | | Shortcut | Action | Description |
 78 | |----------|--------|-------------|
 79 | | `Enter` | Send Message | Submit your message |
 80 | | `Shift+Enter` | New Line | Add line break in message |
 81 | 
 82 | ## 🛠️ Command System
 83 | 
 84 | DocPixie CLI includes a powerful command system. Type `/` to open the command palette or use slash commands directly.
 85 | 
 86 | ### Available Commands
 87 | 
 88 | #### `/new` - New Conversation
 89 | Starts a fresh conversation, clearing chat history.
 90 | 
 91 | ```
 92 | > /new
 93 | ```
 94 | 
 95 | #### `/clear` - Clear Chat
 96 | Clears the current chat display (conversation is still saved).
 97 | 
 98 | ```
 99 | > /clear
100 | ```
101 | 
102 | #### `/save` - Save Conversation
103 | Manually saves the current conversation to history.
104 | 
105 | ```
106 | > /save
107 | ```
108 | 
109 | #### `/conversations` - Conversation Manager
110 | Opens the conversation management dialog where you can:
111 | - View conversation history
112 | - Load previous conversations
113 | - Delete old conversations
114 | 
115 | #### `/model` - Model Configuration
116 | Opens the model selector where you can:
117 | - Switch between providers (OpenAI, Anthropic, OpenRouter)
118 | - Configure model settings
119 | - View current model status
120 | 
121 | #### `/documents` - Document Manager
122 | Opens the document management interface where you can:
123 | - View indexed documents
124 | - Add new documents
125 | - Remove documents from the index
126 | - See document statistics
127 | 
128 | #### `/exit` - Exit Application
129 | Saves the current conversation and exits the CLI.
130 | 
131 | ```
132 | > /exit
133 | ```
134 | 
135 | ### Command Palette
136 | 
137 | Press `Ctrl+/` or type `/` to open the interactive command palette:
138 | 
139 | ```
140 | ┌─ Commands ─────────────────────────────────────────────────┐
141 | │                                                             │
142 | │  > /new                     Start new conversation         │
143 | │    /clear                   Clear current chat             │
144 | │    /save                    Save conversation               │
145 | │    /conversations           Manage conversations           │
146 | │    /model                   Configure AI model             │
147 | │    /documents               Manage documents               │
148 | │    /exit                    Exit DocPixie                  │
149 | │                                                             │
150 | └─────────────────────────────────────────────────────────────┘
151 | ```
152 | 
153 | Use arrow keys to navigate and Enter to select a command.
154 | 
155 | ## 📚 Document Management
156 | 
157 | ### Adding Documents
158 | 
159 | The CLI automatically discovers and indexes PDF files from a `./documents` directory in your current working directory. Simply:
160 | 
161 | 1. Create a `./documents` folder
162 | 2. Copy your PDF files into it
163 | 3. Restart the CLI or use the `/documents` command to refresh
164 | ───────────────────────────────────────────────────┘
165 | ```
166 | 
167 | ### Supported File Types
168 | 
169 | - **PDF files** (.pdf) - Multi-page documents
170 | 
171 | ### Features
172 | 
173 | - **Auto-save**: Conversations are automatically saved
174 | - **Context awareness**: Previous messages provide context for new queries
175 | - **Search**: Find conversations by content or title
176 | - **Export**: Save conversations to text files
177 | 
178 | ## 🎯 Chat Features
179 | 
180 | ### Smart Document Analysis
181 | 
182 | DocPixie's CLI uses an adaptive RAG agent that:
183 | 
184 | 1. **Analyzes your question** to determine if documents are needed
185 | 2. **Plans tasks** dynamically based on available documents
186 | 3. **Selects relevant pages** using vision AI
187 | 4. **Synthesizes responses** from multiple sources
188 | 5. **Maintains context** across conversation turns
189 | 
190 | ## ⚙️ Configuration
191 | 
192 | ### CLI Settings
193 | 
194 | The CLI stores settings in:
195 | - **macOS/Linux**: `~/.docpixie/config.json`
196 | - **Windows**: `%APPDATA%\.docpixie\config.json`
197 | 
198 | ---
199 | 
200 | The DocPixie CLI provides a powerful, interactive way to work with your documents. Its adaptive AI agent, beautiful interface, and comprehensive features make document analysis both efficient and enjoyable.
201 | 
202 | Happy chatting! 🚀
203 | 


--------------------------------------------------------------------------------
/docpixie/cli/state_manager.py:
--------------------------------------------------------------------------------
  1 | """
  2 | State management for DocPixie CLI application
  3 | """
  4 | 
  5 | from pathlib import Path
  6 | from typing import List, Optional, Any, Set
  7 | from docpixie import ConversationMessage
  8 | from docpixie.models.document import Document
  9 | from .config import get_config_manager
 10 | from .conversation_storage import ConversationStorage
 11 | 
 12 | 
 13 | class AppStateManager:
 14 |     """Manages application state including conversations, documents, and UI state"""
 15 |     
 16 |     def __init__(self):
 17 |         self.indexed_documents: List[Document] = []
 18 |         self.conversation_history: List[ConversationMessage] = []
 19 |         self.current_conversation_id: Optional[str] = None
 20 |         self.documents_folder = Path("./documents")
 21 |         self.processing = False
 22 |         
 23 |         self.command_palette_active = False
 24 |         self.partial_command = ""
 25 |         self.default_input_hint = (
 26 |             "Press / for commands • Shift+Enter: new line • Shift+Tab: switch panel"
 27 |         )
 28 |         
 29 |         self.current_plan: Optional[Any] = None
 30 |         self.completed_tasks: Set = set()
 31 |         
 32 |         self.config_manager = get_config_manager()
 33 |         self.conversation_storage = ConversationStorage()
 34 |     
 35 |     def get_status_text(self) -> str:
 36 |         """Get current status bar text with emoji prefixes"""
 37 |         text_model, vision_model = self.config_manager.get_models()
 38 |         doc_count = len(self.indexed_documents)
 39 | 
 40 |         segments = [
 41 |             f"📄: {doc_count}",
 42 |             f"🧠: {text_model.split('/')[-1]}",
 43 |             f"👁️: {vision_model.split('/')[-1]}",
 44 |         ]
 45 | 
 46 |         if self.current_conversation_id:
 47 |             conversations = self.conversation_storage.list_local_conversations()
 48 |             current_conv = next(
 49 |                 (conv for conv in conversations if conv.id == self.current_conversation_id),
 50 |                 None,
 51 |             )
 52 |             if current_conv:
 53 |                 # Conversation name (truncate to 20 chars, add ellipsis if longer)
 54 |                 conv_name = current_conv.name[:20] + ("..." if len(current_conv.name) > 20 else "")
 55 |                 segments.append(f"💬: {conv_name}")
 56 | 
 57 |                 # Total cost formatting
 58 |                 total_cost = getattr(current_conv, "total_cost", 0.0) or 0.0
 59 |                 if total_cost < 0.01:
 60 |                     segments.append(f"💰: {total_cost:.6f}")
 61 |                 else:
 62 |                     segments.append(f"💰: {total_cost:.4f}")
 63 | 
 64 |         return " | ".join(segments)
 65 |     
 66 |     def add_document(self, document: Document) -> None:
 67 |         """Add a document to the indexed documents list"""
 68 |         if not any(existing.id == document.id for existing in self.indexed_documents):
 69 |             self.indexed_documents.append(document)
 70 |     
 71 |     def remove_document(self, document_id: str) -> bool:
 72 |         """Remove a document from the indexed documents list"""
 73 |         for doc in self.indexed_documents[:]:
 74 |             if doc.id == document_id:
 75 |                 self.indexed_documents.remove(doc)
 76 |                 return True
 77 |         return False
 78 |     
 79 |     def clear_documents(self) -> None:
 80 |         """Clear all indexed documents"""
 81 |         self.indexed_documents.clear()
 82 |     
 83 |     def add_conversation_message(self, message: ConversationMessage) -> None:
 84 |         """Add a message to conversation history"""
 85 |         self.conversation_history.append(message)
 86 |     
 87 |     def limit_conversation_history(self, max_messages: int = 20) -> None:
 88 |         """Limit conversation history to maximum number of messages"""
 89 |         if len(self.conversation_history) > max_messages:
 90 |             self.conversation_history = self.conversation_history[-max_messages:]
 91 |     
 92 |     def clear_conversation_history(self) -> None:
 93 |         """Clear conversation history"""
 94 |         self.conversation_history = []
 95 |     
 96 |     def set_current_conversation(self, conversation_id: Optional[str]) -> None:
 97 |         """Set the current conversation ID"""
 98 |         self.current_conversation_id = conversation_id
 99 |     
100 |     def create_new_conversation(self) -> str:
101 |         """Create a new conversation and return its ID"""
102 |         doc_ids = [doc.id for doc in self.indexed_documents]
103 |         self.current_conversation_id = self.conversation_storage.create_new_conversation(doc_ids)
104 |         self.conversation_history = []
105 |         return self.current_conversation_id
106 |     
107 |     def load_conversation(self, conversation_id: str) -> bool:
108 |         """Load a conversation by ID"""
109 |         result = self.conversation_storage.load_conversation(conversation_id)
110 |         if result:
111 |             metadata, messages = result
112 |             self.current_conversation_id = conversation_id
113 |             self.conversation_history = messages
114 |             return True
115 |         return False
116 |     
117 |     def save_current_conversation(self) -> None:
118 |         """Save the current conversation if it exists"""
119 |         if self.current_conversation_id and self.conversation_history:
120 |             doc_ids = [doc.id for doc in self.indexed_documents]
121 |             self.conversation_storage.save_conversation(
122 |                 self.current_conversation_id,
123 |                 self.conversation_history,
124 |                 doc_ids
125 |             )
126 |     
127 |     def get_last_conversation_id(self) -> Optional[str]:
128 |         """Get the ID of the last conversation"""
129 |         return self.conversation_storage.get_last_conversation()
130 |     
131 |     def set_processing(self, processing: bool) -> None:
132 |         """Set processing state"""
133 |         self.processing = processing
134 |     
135 |     def is_processing(self) -> bool:
136 |         """Check if currently processing"""
137 |         return self.processing
138 |     
139 |     def set_command_palette_active(self, active: bool) -> None:
140 |         """Set command palette active state"""
141 |         self.command_palette_active = active
142 |     
143 |     def is_command_palette_active(self) -> bool:
144 |         """Check if command palette is active"""
145 |         return self.command_palette_active
146 |     
147 |     def set_partial_command(self, command: str) -> None:
148 |         """Set partial command text"""
149 |         self.partial_command = command
150 |     
151 |     def get_partial_command(self) -> str:
152 |         """Get partial command text"""
153 |         return self.partial_command
154 |     
155 |     def set_current_plan(self, plan: Optional[Any]) -> None:
156 |         """Set current task plan"""
157 |         self.current_plan = plan
158 |     
159 |     def get_current_plan(self) -> Optional[Any]:
160 |         """Get current task plan"""
161 |         return self.current_plan
162 |     
163 |     def clear_task_plan(self) -> None:
164 |         """Clear current task plan and completed tasks"""
165 |         self.current_plan = None
166 |         self.completed_tasks.clear()
167 |     
168 |     def add_completed_task(self, task_name: str) -> None:
169 |         """Mark a task as completed"""
170 |         self.completed_tasks.add(task_name)
171 |     
172 |     def get_completed_tasks(self) -> List[str]:
173 |         """Get list of completed task names"""
174 |         return list(self.completed_tasks)
175 |     
176 |     def has_documents(self) -> bool:
177 |         """Check if any documents are indexed"""
178 |         return len(self.indexed_documents) > 0
179 |     
180 |     def has_conversation_history(self) -> bool:
181 |         """Check if conversation history exists"""
182 |         return len(self.conversation_history) > 0
183 | 


--------------------------------------------------------------------------------
/docpixie/cli/widgets/command_palette.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Command palette widget for DocPixie CLI
  3 | Provides quick access to all commands with filtering and navigation
  4 | """
  5 | 
  6 | from typing import List, Dict, Callable, Optional
  7 | from textual.widgets import Static, ListView, ListItem, Label
  8 | from textual.containers import Container, Vertical
  9 | from textual.reactive import reactive
 10 | from textual.message import Message
 11 | from textual import events
 12 | from rich.text import Text
 13 | 
 14 | 
 15 | class CommandItem:
 16 |     """Represents a single command in the palette"""
 17 |     
 18 |     def __init__(self, command: str, description: str, handler: Callable = None):
 19 |         self.command = command
 20 |         self.description = description
 21 |         self.handler = handler
 22 |     
 23 |     def __str__(self) -> str:
 24 |         return f"{self.command} - {self.description}"
 25 | 
 26 | 
 27 | class DocPixieCommandPalette(Container):
 28 |     """Command palette overlay widget for DocPixie"""
 29 |     
 30 |     DEFAULT_CSS = """
 31 |     DocPixieCommandPalette {
 32 |         display: none;
 33 |         layer: overlay;
 34 |         dock: bottom;
 35 |         offset: 0 -4;  /* Position above input area */
 36 |         width: 80;
 37 |         height: auto;
 38 |         max-height: 15;
 39 |         background: #2d1f2d;   /* match app background */
 40 |         border: solid #ff99cc; /* brand pink border */
 41 |         padding: 1;
 42 |         align: center bottom;
 43 |     }
 44 |     
 45 |     DocPixieCommandPalette.visible {
 46 |         display: block;
 47 |     }
 48 |     
 49 |     #command-list {
 50 |         height: auto;
 51 |         max-height: 12;
 52 |         scrollbar-background: #2d1f2d;
 53 |         scrollbar-color: #ff99cc; /* brand pink scrollbar */
 54 |     }
 55 |     
 56 |     .command-item {
 57 |         height: 1;
 58 |         padding: 0 1;
 59 |     }
 60 |     
 61 |     .command-item.--highlight {
 62 |         background: #4a3344;
 63 |         color: $text;
 64 |     }
 65 |     
 66 |     .command-item-selected {
 67 |         background: #4a3344;
 68 |         border-left: thick #ff99cc;
 69 |         color: $text;
 70 |     }
 71 |     
 72 |     #filter-display {
 73 |         background: #2d1f2d;
 74 |         color: #ff99cc;
 75 |         height: 1;
 76 |         padding: 0 1;
 77 |         margin: 0 0 1 0;
 78 |     }
 79 |     """
 80 |     
 81 |     COMMANDS = [
 82 |         CommandItem("/new", "Start a new conversation (Ctrl+N)"),
 83 |         CommandItem("/conversations", "Switch between conversations (Ctrl+L)"),
 84 |         CommandItem("/save", "Save current conversation"),
 85 |         CommandItem("/clear", "Clear current chat display"),
 86 |         CommandItem("/model", "Configure Planning and Vision models (Ctrl+M)"),
 87 |         CommandItem("/documents", "Manage and index documents (Ctrl+D)"),
 88 |         CommandItem("/help", "Show all available commands"),
 89 |         CommandItem("/exit", "Exit the program (Ctrl+Q)"),
 90 |     ]
 91 |     
 92 |     def __init__(self, **kwargs):
 93 |         super().__init__(**kwargs)
 94 |         self.filtered_commands: List[CommandItem] = []
 95 |         self.selected_index = 0
 96 |         self.current_filter = ""
 97 |         self.command_items: List[ListItem] = []
 98 |     
 99 |     def compose(self):
100 |         """Create the command palette UI"""
101 |         with Vertical():
102 |             yield Static("Type to filter commands:", id="filter-display")
103 |             yield ListView(id="command-list")
104 |     
105 |     def on_mount(self):
106 |         """Initialize the command palette"""
107 |         self._update_commands("")
108 |     
109 |     def show(self, filter_text: str = ""):
110 |         """Show the command palette with optional filter"""
111 |         self.current_filter = filter_text
112 |         self._update_commands(filter_text)
113 |         self.add_class("visible")
114 |         
115 |     
116 |     def hide(self):
117 |         """Hide the command palette"""
118 |         self.remove_class("visible")
119 |         self.current_filter = ""
120 |         self.selected_index = 0
121 |     
122 |     def update_filter(self, filter_text: str):
123 |         """Update the command filter"""
124 |         self.current_filter = filter_text
125 |         self._update_commands(filter_text)
126 |         
127 |         filter_display = self.query_one("#filter-display", Static)
128 |         if filter_text:
129 |             filter_display.update(f"Filter: {filter_text}")
130 |         else:
131 |             filter_display.update("Type to filter commands:")
132 |     
133 |     def _update_commands(self, filter_text: str):
134 |         """Update the displayed commands based on filter"""
135 |         if filter_text:
136 |             self.filtered_commands = [
137 |                 cmd for cmd in self.COMMANDS
138 |                 if cmd.command.lower().startswith(filter_text.lower())
139 |             ]
140 |         else:
141 |             self.filtered_commands = self.COMMANDS.copy()
142 |         
143 |         self.selected_index = 0
144 |         
145 |         list_view = self.query_one("#command-list", ListView)
146 |         list_view.clear()
147 |         
148 |         self.command_items = []
149 |         for i, cmd in enumerate(self.filtered_commands):
150 |             command_text = Text()
151 |             command_text.append(cmd.command, style="bold #ff99cc")
152 |             command_text.append(" - ", style="dim")
153 |             command_text.append(cmd.description, style="white")
154 |             
155 |             list_item = ListItem(Static(command_text), classes="command-item")
156 |             list_view.append(list_item)
157 |             self.command_items.append(list_item)
158 |         
159 |         if self.command_items and len(self.command_items) > 0:
160 |             self.selected_index = 0
161 |             self.command_items[0].add_class("command-item-selected")
162 |     
163 |     def _highlight_selected(self):
164 |         """Highlight the currently selected command"""
165 |         for item in self.command_items:
166 |             item.remove_class("command-item-selected")
167 |         
168 |         if 0 <= self.selected_index < len(self.command_items):
169 |             self.command_items[self.selected_index].add_class("command-item-selected")
170 |             
171 |             list_view = self.query_one("#command-list", ListView)
172 |             list_view.scroll_to_widget(self.command_items[self.selected_index])
173 |     
174 |     def move_selection_up(self):
175 |         """Move selection up"""
176 |         if self.filtered_commands:
177 |             self.selected_index = max(0, self.selected_index - 1)
178 |             self._highlight_selected()
179 |     
180 |     def move_selection_down(self):
181 |         """Move selection down"""
182 |         if self.filtered_commands:
183 |             self.selected_index = min(len(self.filtered_commands) - 1, self.selected_index + 1)
184 |             self._highlight_selected()
185 |     
186 |     def get_selected_command(self) -> Optional[CommandItem]:
187 |         """Get the currently selected command"""
188 |         if 0 <= self.selected_index < len(self.filtered_commands):
189 |             return self.filtered_commands[self.selected_index]
190 |         return None
191 |     
192 |     def select_current_command(self) -> Optional[str]:
193 |         """Select the current command and return its command string"""
194 |         selected = self.get_selected_command()
195 |         if selected:
196 |             self.hide()
197 |             return selected.command
198 |         return None
199 |     
200 | 
201 | 
202 | class CommandSelected(Message):
203 |     """Message sent when a command is selected"""
204 |     
205 |     def __init__(self, command: str):
206 |         self.command = command
207 |         super().__init__()
208 | 
209 | 
210 | class CommandAutoComplete(Message):
211 |     """Message sent when auto-complete is requested"""
212 |     
213 |     def __init__(self, command: str):
214 |         self.command = command
215 |         super().__init__()
216 | 


--------------------------------------------------------------------------------
/docpixie/processors/image.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Image processor for direct image files
  3 | Handles JPG, PNG, WebP, and other image formats
  4 | """
  5 | 
  6 | import asyncio
  7 | import logging
  8 | import tempfile
  9 | import os
 10 | from typing import List, Optional
 11 | from pathlib import Path
 12 | 
 13 | from PIL import Image
 14 | 
 15 | from .base import BaseProcessor, ProcessingError
 16 | from ..models.document import Document, Page, DocumentStatus
 17 | from ..core.config import DocPixieConfig
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | class ImageProcessor(BaseProcessor):
 23 |     """Processor for image files"""
 24 |     
 25 |     SUPPORTED_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.webp', '.bmp', '.tiff', '.tif']
 26 |     
 27 |     def __init__(self, config: DocPixieConfig):
 28 |         super().__init__(config)
 29 |         self.temp_dir = None
 30 |     
 31 |     def supports(self, file_path: str) -> bool:
 32 |         """Check if file is a supported image format"""
 33 |         return Path(file_path).suffix.lower() in self.SUPPORTED_EXTENSIONS
 34 |     
 35 |     def get_supported_extensions(self) -> List[str]:
 36 |         """Get supported file extensions"""
 37 |         return self.SUPPORTED_EXTENSIONS.copy()
 38 |     
 39 |     async def process(self, file_path: str, document_id: Optional[str] = None) -> Document:
 40 |         """
 41 |         Process image file into a single-page document
 42 |         
 43 |         Args:
 44 |             file_path: Path to image file
 45 |             document_id: Optional custom document ID
 46 |             
 47 |         Returns:
 48 |             Document with single page
 49 |         """
 50 |         self._validate_file(file_path)
 51 |         logger.info(f"Processing image: {file_path}")
 52 |         
 53 |         try:
 54 |             # Create temporary directory for processed image
 55 |             self.temp_dir = tempfile.mkdtemp(prefix="docpixie_img_")
 56 |             
 57 |             # Process image in thread pool
 58 |             page = await asyncio.get_event_loop().run_in_executor(
 59 |                 None,
 60 |                 self._process_image_sync,
 61 |                 file_path
 62 |             )
 63 |             
 64 |             # Create document with single page
 65 |             document = self._create_document(file_path, [page], document_id)
 66 |             document.status = DocumentStatus.COMPLETED
 67 |             
 68 |             # Update page with document info
 69 |             for page in document.pages:
 70 |                 page.document_name = document.name
 71 |                 page.document_id = document.id
 72 |             
 73 |             logger.info(f"Successfully processed image: {file_path}")
 74 |             return document
 75 |             
 76 |         except Exception as e:
 77 |             logger.error(f"Failed to process image {file_path}: {e}")
 78 |             # Clean up temp directory on error
 79 |             if self.temp_dir and os.path.exists(self.temp_dir):
 80 |                 import shutil
 81 |                 shutil.rmtree(self.temp_dir, ignore_errors=True)
 82 |             raise ProcessingError(f"Image processing failed: {e}", file_path)
 83 |     
 84 |     def _process_image_sync(self, file_path: str) -> Page:
 85 |         """Synchronous image processing"""
 86 |         try:
 87 |             # Open and process image
 88 |             with Image.open(file_path) as img:
 89 |                 # Get original dimensions
 90 |                 original_width, original_height = img.size
 91 |                 
 92 |                 # Optimize image
 93 |                 optimized_img = self._optimize_image(img)
 94 |                 
 95 |                 # Save optimized image
 96 |                 output_filename = "page_001.jpg"
 97 |                 output_path = os.path.join(self.temp_dir, output_filename)
 98 |                 
 99 |                 optimized_img.save(
100 |                     output_path,
101 |                     'JPEG',
102 |                     quality=self.config.jpeg_quality,
103 |                     optimize=True
104 |                 )
105 |                 
106 |                 # Get final image dimensions and file size
107 |                 final_width, final_height = optimized_img.size
108 |                 file_size = os.path.getsize(output_path)
109 |                 
110 |                 # Create page object
111 |                 page = Page(
112 |                     page_number=1,
113 |                     image_path=output_path,
114 |                     metadata={
115 |                         'original_width': original_width,
116 |                         'original_height': original_height,
117 |                         'final_width': final_width,
118 |                         'final_height': final_height,
119 |                         'file_size': file_size,
120 |                         'original_format': img.format
121 |                     }
122 |                 )
123 |                 
124 |                 return page
125 |                 
126 |         except Image.UnidentifiedImageError as e:
127 |             raise ProcessingError(f"Unrecognized image format: {e}", file_path)
128 |         except Exception as e:
129 |             raise ProcessingError(f"Failed to process image: {e}", file_path)
130 |     
131 |     def _optimize_image(self, img: Image.Image) -> Image.Image:
132 |         """
133 |         Optimize image for storage and processing
134 |         Same logic as PDF processor
135 |         """
136 |         # Convert to RGB if necessary
137 |         if img.mode in ('RGBA', 'LA', 'P'):
138 |             # Create white background for transparency
139 |             rgb_img = Image.new('RGB', img.size, (255, 255, 255))
140 |             if img.mode == 'RGBA':
141 |                 rgb_img.paste(img, mask=img.split()[-1])  # Use alpha channel as mask
142 |             elif img.mode == 'P' and 'transparency' in img.info:
143 |                 # Handle palette mode with transparency
144 |                 img = img.convert('RGBA')
145 |                 rgb_img.paste(img, mask=img.split()[-1])
146 |             else:
147 |                 rgb_img.paste(img)
148 |             img = rgb_img
149 |         elif img.mode != 'RGB':
150 |             img = img.convert('RGB')
151 |         
152 |         # Resize if image is too large
153 |         max_width, max_height = self.config.pdf_max_image_size
154 |         if img.width > max_width or img.height > max_height:
155 |             # Calculate new size maintaining aspect ratio
156 |             ratio = min(max_width / img.width, max_height / img.height)
157 |             new_width = int(img.width * ratio)
158 |             new_height = int(img.height * ratio)
159 |             
160 |             img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
161 |             logger.debug(f"Resized image to {new_width}x{new_height}")
162 |         
163 |         return img
164 |     
165 |     def create_thumbnail(self, image_path: str) -> str:
166 |         """Create thumbnail for quick page selection"""
167 |         try:
168 |             with Image.open(image_path) as img:
169 |                 # Create thumbnail
170 |                 thumbnail = img.copy()
171 |                 thumbnail.thumbnail(self.config.thumbnail_size, Image.Resampling.LANCZOS)
172 |                 
173 |                 # Save thumbnail
174 |                 thumb_path = image_path.replace('.jpg', '_thumb.jpg')
175 |                 thumbnail.save(thumb_path, 'JPEG', quality=85, optimize=True)
176 |                 
177 |                 return thumb_path
178 |                 
179 |         except Exception as e:
180 |             logger.error(f"Failed to create thumbnail for {image_path}: {e}")
181 |             return image_path  # Return original if thumbnail creation fails
182 |     
183 |     def get_image_metadata(self, file_path: str) -> dict:
184 |         """Extract image metadata"""
185 |         try:
186 |             with Image.open(file_path) as img:
187 |                 metadata = {
188 |                     'format': img.format,
189 |                     'mode': img.mode,
190 |                     'width': img.width,
191 |                     'height': img.height,
192 |                     'has_transparency': img.mode in ('RGBA', 'LA') or 'transparency' in img.info
193 |                 }
194 |                 
195 |                 # Add EXIF data if available
196 |                 if hasattr(img, '_getexif') and img._getexif() is not None:
197 |                     exif = img._getexif()
198 |                     metadata['exif'] = exif
199 |                 
200 |                 return metadata
201 |                 
202 |         except Exception as e:
203 |             logger.error(f"Failed to extract image metadata: {e}")
204 |             return {}


--------------------------------------------------------------------------------
/docpixie/cli/event_handlers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Event handling mixins for DocPixie CLI
  3 | """
  4 | 
  5 | from typing import TYPE_CHECKING
  6 | from textual import events
  7 | from textual.widgets import TextArea, Label
  8 | from .widgets import (
  9 |     CommandPalette, CommandSelected, CommandAutoComplete,
 10 |     ConversationSelected, ConversationDeleted,
 11 |     ModelSelected, DocumentRemoved, DocumentsIndexed,
 12 |     ChatArea
 13 | )
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from .app import DocPixieTUI
 17 | 
 18 | 
 19 | class CommandEventMixin:
 20 |     """Handles command palette and text input events"""
 21 | 
 22 |     async def on_text_area_changed(self: 'DocPixieTUI', event: TextArea.Changed) -> None:
 23 |         """Handle text area changes for command palette"""
 24 |         if event.text_area.id != "chat-input":
 25 |             return
 26 | 
 27 |         lines = event.text_area.text.split('\\n')
 28 |         if lines:
 29 |             current_line = lines[-1] if lines else ""
 30 | 
 31 |             if current_line.startswith("/"):
 32 |                 command_palette = self.query_one("#command-palette", CommandPalette)
 33 |                 if not self.state_manager.command_palette_active:
 34 |                     self.state_manager.command_palette_active = True
 35 |                     command_palette.show(current_line)
 36 |                 else:
 37 |                     command_palette.update_filter(current_line)
 38 |             else:
 39 |                 if self.state_manager.command_palette_active:
 40 |                     command_palette = self.query_one("#command-palette", CommandPalette)
 41 |                     command_palette.hide()
 42 |                     self.state_manager.command_palette_active = False
 43 | 
 44 |     async def on_key(self: 'DocPixieTUI', event: events.Key) -> None:
 45 |         """Handle key events for command palette navigation"""
 46 |         if self.state_manager.command_palette_active:
 47 |             command_palette = self.query_one("#command-palette", CommandPalette)
 48 | 
 49 |             if event.key == "escape":
 50 |                 command_palette.hide()
 51 |                 self.state_manager.command_palette_active = False
 52 |                 text_area = self.query_one("#chat-input")
 53 |                 text_area.clear()
 54 |                 event.prevent_default()
 55 | 
 56 |             elif event.key == "up":
 57 |                 command_palette.move_selection_up()
 58 |                 event.prevent_default()
 59 | 
 60 |             elif event.key == "down":
 61 |                 command_palette.move_selection_down()
 62 |                 event.prevent_default()
 63 | 
 64 |             elif event.key == "tab":
 65 |                 selected = command_palette.get_selected_command()
 66 |                 if selected:
 67 |                     text_area = self.query_one("#chat-input")
 68 |                     text_area.text = selected.command
 69 |                     text_area.cursor_location = (0, len(selected.command))
 70 |                 event.prevent_default()
 71 | 
 72 |     async def on_command_selected(self: 'DocPixieTUI', event: CommandSelected) -> None:
 73 |         """Handle command selection from palette"""
 74 |         command_palette = self.query_one("#command-palette", CommandPalette)
 75 |         command_palette.hide()
 76 |         self.state_manager.command_palette_active = False
 77 | 
 78 |         text_area = self.query_one("#chat-input")
 79 |         text_area.clear()
 80 | 
 81 |         await self.handle_command(event.command)
 82 | 
 83 |     async def on_command_auto_complete(self: 'DocPixieTUI', event: CommandAutoComplete) -> None:
 84 |         """Handle command auto-completion"""
 85 |         text_area = self.query_one("#chat-input")
 86 |         text_area.text = event.command
 87 |         text_area.cursor_location = (0, len(event.command))
 88 | 
 89 | 
 90 | class ConversationEventMixin:
 91 |     """Handles conversation-related events"""
 92 | 
 93 |     async def on_conversation_selected(self: 'DocPixieTUI', event: ConversationSelected) -> None:
 94 |         """Handle conversation selection from dialog"""
 95 |         chat_log = self.query_one("#chat-log", ChatArea)
 96 | 
 97 |         if event.conversation_id == "new":
 98 |             await self.handle_command("/new")
 99 |             return
100 | 
101 |         try:
102 |             self.state_manager.save_current_conversation()
103 | 
104 |             if self.state_manager.load_conversation(event.conversation_id):
105 |                 conversations = self.state_manager.conversation_storage.list_local_conversations()
106 |                 metadata = next(
107 |                     (conv for conv in conversations if conv.id == event.conversation_id),
108 |                     None
109 |                 )
110 | 
111 |                 chat_log.clear()
112 | 
113 |                 for msg in self.state_manager.conversation_history:
114 |                     if msg.role == "user":
115 |                         chat_log.add_user_message(msg.content)
116 |                     else:
117 |                         chat_log.add_assistant_message(msg.content)
118 | 
119 |                 status_label = self.query_one("#status-label", Label)
120 |                 status_label.update(self.state_manager.get_status_text())
121 | 
122 |                 conv_name = metadata.name if metadata else "Unknown"
123 |                 chat_log.write(f"[green bold]●[/green bold] Loaded conversation: {conv_name}\n\n")
124 |             else:
125 |                 chat_log.write("[red bold]●[/red bold] Failed to load conversation\n\n")
126 | 
127 |         except Exception as e:
128 |             chat_log.write(f"[red bold]●[/red bold] Error loading conversation: {e}\n\n")
129 | 
130 |     async def on_conversation_deleted(self: 'DocPixieTUI', event: ConversationDeleted) -> None:
131 |         """Handle conversation deletion"""
132 |         chat_log = self.query_one("#chat-log", ChatArea)
133 |         chat_log.write("[green bold]●[/green bold] Conversation deleted\n\n")
134 | 
135 | 
136 | class ModelEventMixin:
137 |     """Handles model selection events"""
138 | 
139 |     async def on_model_selected(self: 'DocPixieTUI', event: ModelSelected) -> None:
140 |         """Handle model selection"""
141 |         chat_log = self.query_one("#chat-log", ChatArea)
142 | 
143 |         if event.old_text_model and event.text_model != event.old_text_model:
144 |             chat_log.write(f"[green bold]●[/green bold] Action model switched to {event.text_model}\n\n")
145 |             await self.docpixie_manager.switch_models()
146 |         elif event.old_vision_model and event.vision_model != event.old_vision_model:
147 |             chat_log.write(f"[green bold]●[/green bold] Vision model switched to {event.vision_model}\n\n")
148 |             await self.docpixie_manager.switch_models()
149 |         else:
150 |             chat_log.write("[dim]No model changes made[/dim]\n\n")
151 | 
152 |         status_label = self.query_one("#status-label", Label)
153 |         status_label.update(self.state_manager.get_status_text())
154 | 
155 | 
156 | class DocumentEventMixin:
157 |     """Handles document management events"""
158 | 
159 |     async def on_document_removed(self: 'DocPixieTUI', event: DocumentRemoved) -> None:
160 |         """Handle document removal"""
161 |         chat_log = self.query_one("#chat-log", ChatArea)
162 | 
163 |         removed_count = 0
164 |         for doc_id in event.document_ids:
165 |             if self.state_manager.remove_document(doc_id):
166 |                 removed_count += 1
167 | 
168 |                 if self.docpixie:
169 |                     try:
170 |                         success = self.docpixie_manager.delete_document_sync(doc_id)
171 |                         if not success:
172 |                             doc_name = f"Document {doc_id}"  # Fallback name
173 |                             chat_log.write(f"[warning]Warning: Could not delete {doc_name} from storage[/warning]\n")
174 |                     except Exception as e:
175 |                         doc_name = f"Document {doc_id}"  # Fallback name
176 |                         chat_log.write(f"[error]Error deleting {doc_name}: {e}[/error]\n")
177 | 
178 |         if removed_count == 1:
179 |             chat_log.write(f"[green bold]●[/green bold] Removed 1 document from index\n\n")
180 |         else:
181 |             chat_log.write(f"[green bold]●[/green bold] Removed {removed_count} documents from index\n\n")
182 | 
183 |         status_label = self.query_one("#status-label", Label)
184 |         status_label.update(self.state_manager.get_status_text())
185 | 
186 |     async def on_documents_indexed(self: 'DocPixieTUI', event: DocumentsIndexed) -> None:
187 |         """Handle documents being indexed"""
188 |         chat_log = self.query_one("#chat-log", ChatArea)
189 | 
190 |         indexed_count = 0
191 |         for doc in event.documents:
192 |             if not any(existing.id == doc.id for existing in self.state_manager.indexed_documents):
193 |                 self.state_manager.add_document(doc)
194 |                 indexed_count += 1
195 | 
196 |         if indexed_count == 1:
197 |             chat_log.write(f"[green bold]●[/green bold] Successfully indexed 1 document\n\n")
198 |         else:
199 |             chat_log.write(f"[green bold]●[/green bold] Successfully indexed {indexed_count} documents\n\n")
200 | 
201 |         status_label = self.query_one("#status-label", Label)
202 |         status_label.update(self.state_manager.get_status_text())
203 | 


--------------------------------------------------------------------------------
/docpixie/processors/pdf.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PyMuPDF-based PDF processor
  3 | Replacement for pdf2image with better performance and quality
  4 | """
  5 | 
  6 | import asyncio
  7 | import logging
  8 | from typing import List, Optional, Tuple
  9 | from pathlib import Path
 10 | import tempfile
 11 | import os
 12 | 
 13 | from PIL import Image
 14 | import fitz  # PyMuPDF
 15 | 
 16 | from .base import BaseProcessor, ProcessingError
 17 | from ..models.document import Document, Page, DocumentStatus
 18 | from ..core.config import DocPixieConfig
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | class PDFProcessor(BaseProcessor):
 24 |     """PDF processor using PyMuPDF for better performance"""
 25 |     
 26 |     SUPPORTED_EXTENSIONS = ['.pdf']
 27 |     
 28 |     def __init__(self, config: DocPixieConfig):
 29 |         super().__init__(config)
 30 |         self.temp_dir = None
 31 |     
 32 |     def supports(self, file_path: str) -> bool:
 33 |         """Check if file is a PDF"""
 34 |         return Path(file_path).suffix.lower() in self.SUPPORTED_EXTENSIONS
 35 |     
 36 |     def get_supported_extensions(self) -> List[str]:
 37 |         """Get supported file extensions"""
 38 |         return self.SUPPORTED_EXTENSIONS.copy()
 39 |     
 40 |     async def process(self, file_path: str, document_id: Optional[str] = None) -> Document:
 41 |         """
 42 |         Process PDF into document pages using PyMuPDF
 43 |         
 44 |         Args:
 45 |             file_path: Path to PDF file
 46 |             document_id: Optional custom document ID
 47 |             
 48 |         Returns:
 49 |             Document with processed pages
 50 |         """
 51 |         self._validate_file(file_path)
 52 |         logger.info(f"Processing PDF: {file_path}")
 53 |         
 54 |         try:
 55 |             # Create temporary directory for page images
 56 |             self.temp_dir = tempfile.mkdtemp(prefix="docpixie_pdf_")
 57 |             
 58 |             # Process PDF in thread pool (PyMuPDF is not async)
 59 |             pages = await asyncio.get_event_loop().run_in_executor(
 60 |                 None, 
 61 |                 self._process_pdf_sync,
 62 |                 file_path
 63 |             )
 64 |             
 65 |             # Create document
 66 |             document = self._create_document(file_path, pages, document_id)
 67 |             document.status = DocumentStatus.COMPLETED
 68 |             
 69 |             # Update pages with document info
 70 |             for page in document.pages:
 71 |                 page.document_name = document.name
 72 |                 page.document_id = document.id
 73 |             
 74 |             logger.info(f"Successfully processed PDF: {len(pages)} pages")
 75 |             return document
 76 |             
 77 |         except Exception as e:
 78 |             logger.error(f"Failed to process PDF {file_path}: {e}")
 79 |             # Clean up temp directory on error
 80 |             if self.temp_dir and os.path.exists(self.temp_dir):
 81 |                 import shutil
 82 |                 shutil.rmtree(self.temp_dir, ignore_errors=True)
 83 |             raise ProcessingError(f"PDF processing failed: {e}", file_path)
 84 |     
 85 |     def _process_pdf_sync(self, file_path: str) -> List[Page]:
 86 |         """Synchronous PDF processing with PyMuPDF"""
 87 |         pages = []
 88 |         
 89 |         try:
 90 |             # Open PDF document
 91 |             pdf_doc = fitz.open(file_path)
 92 |             total_pages = pdf_doc.page_count
 93 |             
 94 |             logger.info(f"Processing {total_pages} pages from PDF")
 95 |             
 96 |             for page_num in range(total_pages):
 97 |                 try:
 98 |                     # Get page
 99 |                     page = pdf_doc[page_num]
100 |                     
101 |                     # Create transformation matrix for scaling
102 |                     matrix = fitz.Matrix(
103 |                         self.config.pdf_render_scale, 
104 |                         self.config.pdf_render_scale
105 |                     )
106 |                     
107 |                     # Render page to pixmap
108 |                     pix = page.get_pixmap(
109 |                         matrix=matrix,
110 |                         alpha=False  # No transparency for JPEG
111 |                     )
112 |                     
113 |                     # Convert to PIL Image
114 |                     img_data = pix.tobytes("ppm")
115 |                     img = Image.open(io.BytesIO(img_data))
116 |                     
117 |                     # Optimize image
118 |                     optimized_img = self._optimize_image(img)
119 |                     
120 |                     # Save page image
121 |                     page_filename = f"page_{page_num + 1:03d}.jpg"
122 |                     page_image_path = os.path.join(self.temp_dir, page_filename)
123 |                     
124 |                     optimized_img.save(
125 |                         page_image_path, 
126 |                         'JPEG', 
127 |                         quality=self.config.jpeg_quality,
128 |                         optimize=True
129 |                     )
130 |                     
131 |                     # Create page object
132 |                     page_obj = Page(
133 |                         page_number=page_num + 1,
134 |                         image_path=page_image_path,
135 |                         metadata={
136 |                             'width': pix.width,
137 |                             'height': pix.height,
138 |                             'file_size': os.path.getsize(page_image_path)
139 |                         }
140 |                     )
141 |                     
142 |                     pages.append(page_obj)
143 |                     
144 |                 except Exception as e:
145 |                     logger.error(f"Failed to process page {page_num + 1}: {e}")
146 |                     raise ProcessingError(
147 |                         f"Failed to process page {page_num + 1}: {e}",
148 |                         file_path,
149 |                         page_num + 1
150 |                     )
151 |             
152 |             pdf_doc.close()
153 |             return pages
154 |             
155 |         except fitz.FileDataError as e:
156 |             raise ProcessingError(f"Invalid PDF file: {e}", file_path)
157 |         except fitz.FileNotFoundError as e:
158 |             raise ProcessingError(f"PDF file not found: {e}", file_path)
159 |         except Exception as e:
160 |             raise ProcessingError(f"Unexpected error processing PDF: {e}", file_path)
161 |     
162 |     def _optimize_image(self, img: Image.Image) -> Image.Image:
163 |         """
164 |         Optimize image for storage and processing
165 |         Adapted from existing resize_image_for_upload logic
166 |         """
167 |         # Convert to RGB if necessary
168 |         if img.mode in ('RGBA', 'LA', 'P'):
169 |             # Create white background
170 |             rgb_img = Image.new('RGB', img.size, (255, 255, 255))
171 |             if img.mode == 'RGBA':
172 |                 rgb_img.paste(img, mask=img.split()[-1])  # Use alpha channel as mask
173 |             else:
174 |                 rgb_img.paste(img)
175 |             img = rgb_img
176 |         elif img.mode != 'RGB':
177 |             img = img.convert('RGB')
178 |         
179 |         # Resize if image is too large
180 |         max_width, max_height = self.config.pdf_max_image_size
181 |         if img.width > max_width or img.height > max_height:
182 |             # Calculate new size maintaining aspect ratio
183 |             ratio = min(max_width / img.width, max_height / img.height)
184 |             new_width = int(img.width * ratio)
185 |             new_height = int(img.height * ratio)
186 |             
187 |             img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
188 |             logger.debug(f"Resized image to {new_width}x{new_height}")
189 |         
190 |         return img
191 |     
192 |     def create_thumbnail(self, image_path: str) -> str:
193 |         """Create thumbnail for quick page selection"""
194 |         try:
195 |             with Image.open(image_path) as img:
196 |                 # Create thumbnail
197 |                 thumbnail = img.copy()
198 |                 thumbnail.thumbnail(self.config.thumbnail_size, Image.Resampling.LANCZOS)
199 |                 
200 |                 # Save thumbnail
201 |                 thumb_path = image_path.replace('.jpg', '_thumb.jpg')
202 |                 thumbnail.save(thumb_path, 'JPEG', quality=85, optimize=True)
203 |                 
204 |                 return thumb_path
205 |                 
206 |         except Exception as e:
207 |             logger.error(f"Failed to create thumbnail for {image_path}: {e}")
208 |             return image_path  # Return original if thumbnail creation fails
209 |     
210 |     def get_pdf_metadata(self, file_path: str) -> dict:
211 |         """Extract PDF metadata"""
212 |         try:
213 |             pdf_doc = fitz.open(file_path)
214 |             metadata = pdf_doc.metadata
215 |             page_count = pdf_doc.page_count
216 |             pdf_doc.close()
217 |             
218 |             return {
219 |                 'title': metadata.get('title', ''),
220 |                 'author': metadata.get('author', ''),
221 |                 'subject': metadata.get('subject', ''),
222 |                 'creator': metadata.get('creator', ''),
223 |                 'producer': metadata.get('producer', ''),
224 |                 'creation_date': metadata.get('creationDate', ''),
225 |                 'modification_date': metadata.get('modDate', ''),
226 |                 'page_count': page_count
227 |             }
228 |         except Exception as e:
229 |             logger.error(f"Failed to extract PDF metadata: {e}")
230 |             return {}
231 | 
232 | 
233 | # Import io for BytesIO
234 | import io


--------------------------------------------------------------------------------
/docpixie/storage/memory.py:
--------------------------------------------------------------------------------
  1 | """
  2 | In-memory storage backend for testing
  3 | """
  4 | 
  5 | import asyncio
  6 | from typing import List, Dict, Any, Optional
  7 | from datetime import datetime
  8 | import logging
  9 | import copy
 10 | 
 11 | from .base import BaseStorage, StorageError
 12 | from ..models.document import Document, Page
 13 | from ..core.config import DocPixieConfig
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | class InMemoryStorage(BaseStorage):
 19 |     """In-memory storage backend for testing and development"""
 20 |     
 21 |     def __init__(self, config: DocPixieConfig):
 22 |         self.config = config
 23 |         self._documents: Dict[str, Document] = {}
 24 |         self._document_summaries: Dict[str, str] = {}
 25 |         self._created_at = datetime.now()
 26 |         logger.info("Initialized in-memory storage")
 27 |     
 28 |     async def save_document(self, document: Document) -> str:
 29 |         """Save document to memory"""
 30 |         try:
 31 |             # Deep copy to avoid external modifications
 32 |             stored_document = copy.deepcopy(document)
 33 |             
 34 |             # Store document
 35 |             self._documents[document.id] = stored_document
 36 |             
 37 |             # Store summary separately for quick access
 38 |             if document.summary:
 39 |                 self._document_summaries[document.id] = document.summary
 40 |             
 41 |             logger.info(f"Saved document {document.id} to memory ({len(document.pages)} pages)")
 42 |             return document.id
 43 |             
 44 |         except Exception as e:
 45 |             logger.error(f"Failed to save document {document.id} to memory: {e}")
 46 |             raise StorageError(f"Failed to save document: {e}", document.id)
 47 |     
 48 |     async def get_document(self, document_id: str) -> Optional[Document]:
 49 |         """Retrieve document from memory"""
 50 |         try:
 51 |             document = self._documents.get(document_id)
 52 |             if document:
 53 |                 # Return a deep copy to avoid external modifications
 54 |                 return copy.deepcopy(document)
 55 |             return None
 56 |             
 57 |         except Exception as e:
 58 |             logger.error(f"Failed to get document {document_id} from memory: {e}")
 59 |             raise StorageError(f"Failed to get document: {e}", document_id)
 60 |     
 61 |     async def list_documents(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
 62 |         """List all documents in memory"""
 63 |         try:
 64 |             documents = []
 65 |             
 66 |             for doc_id, document in self._documents.items():
 67 |                 doc_info = {
 68 |                     'id': document.id,
 69 |                     'name': document.name,
 70 |                     'summary': self._document_summaries.get(doc_id),
 71 |                     'page_count': len(document.pages),
 72 |                     'created_at': document.created_at.isoformat(),
 73 |                     'updated_at': document.created_at.isoformat(),  # No update tracking in memory
 74 |                     'status': document.status.value
 75 |                 }
 76 |                 documents.append(doc_info)
 77 |                 
 78 |                 if limit and len(documents) >= limit:
 79 |                     break
 80 |             
 81 |             # Sort by creation time (newest first)
 82 |             documents.sort(key=lambda x: x['created_at'], reverse=True)
 83 |             return documents
 84 |             
 85 |         except Exception as e:
 86 |             logger.error(f"Failed to list documents in memory: {e}")
 87 |             raise StorageError(f"Failed to list documents: {e}")
 88 |     
 89 |     async def delete_document(self, document_id: str) -> bool:
 90 |         """Delete document from memory"""
 91 |         try:
 92 |             if document_id in self._documents:
 93 |                 del self._documents[document_id]
 94 |                 self._document_summaries.pop(document_id, None)
 95 |                 logger.info(f"Deleted document {document_id} from memory")
 96 |                 return True
 97 |             else:
 98 |                 logger.warning(f"Document {document_id} not found in memory")
 99 |                 return False
100 |                 
101 |         except Exception as e:
102 |             logger.error(f"Failed to delete document {document_id} from memory: {e}")
103 |             raise StorageError(f"Failed to delete document: {e}", document_id)
104 |     
105 |     async def document_exists(self, document_id: str) -> bool:
106 |         """Check if document exists in memory"""
107 |         return document_id in self._documents
108 |     
109 |     async def get_document_summary(self, document_id: str) -> Optional[str]:
110 |         """Get document summary from memory"""
111 |         return self._document_summaries.get(document_id)
112 |     
113 |     async def update_document_summary(self, document_id: str, summary: str) -> bool:
114 |         """Update document summary in memory"""
115 |         try:
116 |             if document_id in self._documents:
117 |                 # Update summary in both document and summary cache
118 |                 self._documents[document_id].summary = summary
119 |                 self._document_summaries[document_id] = summary
120 |                 logger.info(f"Updated summary for document {document_id} in memory")
121 |                 return True
122 |             else:
123 |                 logger.warning(f"Document {document_id} not found for summary update")
124 |                 return False
125 |                 
126 |         except Exception as e:
127 |             logger.error(f"Failed to update summary for {document_id} in memory: {e}")
128 |             return False
129 |     
130 |     async def search_documents(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
131 |         """Search documents in memory"""
132 |         try:
133 |             matching_docs = []
134 |             query_lower = query.lower()
135 |             
136 |             for doc_id, document in self._documents.items():
137 |                 # Check name match
138 |                 name_match = query_lower in document.name.lower()
139 |                 
140 |                 # Check summary match
141 |                 summary = self._document_summaries.get(doc_id, '')
142 |                 summary_match = query_lower in summary.lower()
143 |                 
144 |                 if name_match or summary_match:
145 |                     doc_info = {
146 |                         'id': document.id,
147 |                         'name': document.name,
148 |                         'summary': summary,
149 |                         'page_count': len(document.pages),
150 |                         'created_at': document.created_at.isoformat(),
151 |                         'status': document.status.value,
152 |                         'relevance_score': self._calculate_relevance(
153 |                             query_lower, document, summary
154 |                         )
155 |                     }
156 |                     matching_docs.append(doc_info)
157 |                     
158 |                     if len(matching_docs) >= limit:
159 |                         break
160 |             
161 |             # Sort by relevance score
162 |             matching_docs.sort(key=lambda x: x['relevance_score'], reverse=True)
163 |             return matching_docs
164 |             
165 |         except Exception as e:
166 |             logger.error(f"Failed to search documents in memory: {e}")
167 |             return []
168 |     
169 |     def _calculate_relevance(self, query: str, document: Document, summary: str) -> float:
170 |         """Calculate simple relevance score for search results"""
171 |         score = 0.0
172 |         
173 |         # Name matches are highly relevant
174 |         if query in document.name.lower():
175 |             score += 10.0
176 |         
177 |         # Summary matches are relevant
178 |         summary_matches = summary.lower().count(query)
179 |         score += summary_matches * 2.0
180 |         
181 |         
182 |         return score
183 |     
184 |     async def get_all_documents(self) -> List[Document]:
185 |         """Get all documents for agent processing"""
186 |         return list(self._documents.values())
187 |     
188 |     async def get_all_pages(self) -> List[Page]:
189 |         """Get all pages from all documents for agent processing"""
190 |         all_pages = []
191 |         for document in self._documents.values():
192 |             if document.pages:
193 |                 all_pages.extend(document.pages)
194 |         return all_pages
195 |     
196 |     def get_storage_stats(self) -> Dict[str, Any]:
197 |         """Get storage statistics"""
198 |         try:
199 |             total_pages = sum(len(doc.pages) for doc in self._documents.values())
200 |             
201 |             return {
202 |                 'backend': 'InMemoryStorage',
203 |                 'total_documents': len(self._documents),
204 |                 'total_pages': total_pages,
205 |                 'created_at': self._created_at.isoformat(),
206 |                 'features': ['in_memory', 'fast_access', 'search', 'testing']
207 |             }
208 |             
209 |         except Exception as e:
210 |             return {
211 |                 'backend': 'InMemoryStorage',
212 |                 'error': str(e)
213 |             }
214 |     
215 |     def clear_all(self):
216 |         """Clear all documents (useful for testing)"""
217 |         self._documents.clear()
218 |         self._document_summaries.clear()
219 |         logger.info("Cleared all documents from memory")
220 |     
221 |     def get_document_count(self) -> int:
222 |         """Get total number of documents in memory"""
223 |         return len(self._documents)
224 |     
225 |     def get_total_pages(self) -> int:
226 |         """Get total number of pages across all documents"""
227 |         return sum(len(doc.pages) for doc in self._documents.values())


--------------------------------------------------------------------------------
/docpixie/cli/docpixie_manager.py:
--------------------------------------------------------------------------------
  1 | """
  2 | DocPixie integration manager for CLI application
  3 | """
  4 | 
  5 | import asyncio
  6 | from typing import TYPE_CHECKING, Optional, Any, Callable
  7 | from pathlib import Path
  8 | 
  9 | from docpixie import DocPixie, ConversationMessage
 10 | from docpixie.core.config import DocPixieConfig
 11 | from docpixie.models.document import Document
 12 | 
 13 | from .config import get_config_manager
 14 | from .state_manager import AppStateManager
 15 | from .widgets import ChatArea, DocumentManagerDialog
 16 | 
 17 | if TYPE_CHECKING:
 18 |     from .app import DocPixieTUI
 19 | 
 20 | 
 21 | class DocPixieManager:
 22 |     """Manages DocPixie instance and all related operations"""
 23 | 
 24 |     def __init__(self, app: 'DocPixieTUI', state_manager: AppStateManager):
 25 |         self.app = app
 26 |         self.state_manager = state_manager
 27 |         self.config_manager = get_config_manager()
 28 |         self.docpixie: Optional[DocPixie] = None
 29 | 
 30 |     async def create_docpixie_instance(self) -> bool:
 31 |         try:
 32 |             api_key = self.config_manager.get_api_key()
 33 |             if not api_key:
 34 |                 return False
 35 | 
 36 |             text_model, vision_model = self.config_manager.get_models()
 37 | 
 38 |             config = DocPixieConfig(
 39 |                 provider="openrouter",
 40 |                 model=text_model,
 41 |                 vision_model=vision_model,
 42 |                 storage_type="local",
 43 |                 local_storage_path="./.docpixie/documents",
 44 |                 openrouter_api_key=api_key,
 45 |                 jpeg_quality=85,
 46 |                 max_pages_per_task=4
 47 |             )
 48 | 
 49 |             self.docpixie = DocPixie(config=config)
 50 |             self.app.docpixie = self.docpixie
 51 |             return True
 52 | 
 53 |         except Exception as e:
 54 |             try:
 55 |                 chat_log = self.app.query_one("#chat-log", ChatArea)
 56 |                 chat_log.write(f"[error]❌ Failed to create DocPixie instance: {e}[/error]")
 57 |             except:
 58 |                 pass
 59 |             return False
 60 | 
 61 |     async def initialize_docpixie(self, show_welcome: bool = True) -> None:
 62 |         chat_log = self.app.query_one("#chat-log", ChatArea)
 63 | 
 64 |         if not await self.create_docpixie_instance():
 65 |             chat_log.write("[error]❌ No API key configured. Please restart and configure.[/error]")
 66 |             return
 67 | 
 68 |         try:
 69 |             await self.check_and_prompt_for_documents()
 70 |             await self.load_or_create_conversation()
 71 | 
 72 |             if show_welcome:
 73 |                 self.app.show_welcome_message()
 74 | 
 75 |             if self.state_manager.current_conversation_id and self.state_manager.conversation_history:
 76 |                 chat_log.add_static_text("[dim]━━━ Restored previous conversation ━━━[/dim]\n\n")
 77 | 
 78 |                 for msg in self.state_manager.conversation_history:
 79 |                     if msg.role == "user":
 80 |                         chat_log.add_user_message(msg.content)
 81 |                     else:
 82 |                         chat_log.add_assistant_message(msg.content)
 83 | 
 84 |                 chat_log.add_static_text("[dim]━━━ Continue your conversation below ━━━[/dim]\n\n")
 85 | 
 86 |         except Exception as e:
 87 |             chat_log.write(f"[error]❌ Failed to initialize: {e}[/error]")
 88 | 
 89 |     async def switch_models(self) -> None:
 90 |         await self.create_docpixie_instance()
 91 | 
 92 |     async def check_and_prompt_for_documents(self) -> None:
 93 |         chat_log = self.app.query_one("#chat-log", ChatArea)
 94 | 
 95 |         if not self.state_manager.documents_folder.exists():
 96 |             self.state_manager.documents_folder.mkdir(parents=True)
 97 |             chat_log.write(f"[green bold]●[/green bold] Created documents folder: {self.state_manager.documents_folder.absolute()}\n")
 98 |             chat_log.write("[blue bold]●[/blue bold] Add PDF files to the ./documents folder or use /documents to manage them.\n")
 99 |             # Auto-open the Document Manager when the folder is first created
100 |             await self.app.push_screen(DocumentManagerDialog(
101 |                 self.state_manager.documents_folder,
102 |                 self.docpixie
103 |             ))
104 |             return
105 | 
106 |         self.state_manager.clear_documents()
107 | 
108 |         try:
109 |             existing_docs = await self.docpixie.list_documents()
110 |             indexed_names = {doc['name'] for doc in existing_docs}
111 | 
112 |             for doc_meta in existing_docs:
113 |                 doc = await self.docpixie.get_document(doc_meta['id'])
114 |                 if doc:
115 |                     self.state_manager.add_document(doc)
116 | 
117 |         except Exception as e:
118 |             indexed_names = set()
119 |             chat_log.write(f"[dim]Note: Could not load existing documents: {e}[/dim]\\n")
120 | 
121 |         pdf_files = list(self.state_manager.documents_folder.glob("*.pdf"))
122 | 
123 |         if not pdf_files:
124 |             # Auto-open the Document Manager when there are no PDFs yet
125 |             await self.app.push_screen(DocumentManagerDialog(
126 |                 self.state_manager.documents_folder,
127 |                 self.docpixie
128 |             ))
129 |             return
130 | 
131 |         new_pdf_files = [
132 |             pdf for pdf in pdf_files
133 |             if pdf.stem not in indexed_names
134 |         ]
135 | 
136 |         if new_pdf_files:
137 |             chat_log.write(f"[blue bold]●[/blue bold] Found {len(new_pdf_files)} new PDF file(s)\n")
138 |             await self.app.push_screen(DocumentManagerDialog(
139 |                 self.state_manager.documents_folder,
140 |                 self.docpixie
141 |             ))
142 | 
143 |     async def load_or_create_conversation(self) -> None:
144 |         try:
145 |             doc_ids = [doc.id for doc in self.state_manager.indexed_documents]
146 |             last_conversation_id = self.state_manager.get_last_conversation_id()
147 | 
148 |             if last_conversation_id:
149 |                 if self.state_manager.load_conversation(last_conversation_id):
150 |                     status_label = self.app.query_one("#status-label")
151 |                     status_label.update(self.state_manager.get_status_text())
152 |                     return
153 | 
154 |             self.state_manager.create_new_conversation()
155 |             status_label = self.app.query_one("#status-label")
156 |             status_label.update(self.state_manager.get_status_text())
157 | 
158 |         except Exception as e:
159 |             print(f"Error loading conversation: {e}")
160 |             self.state_manager.set_current_conversation(None)
161 | 
162 |     async def process_query(self, query: str, task_callback: Optional[Callable] = None) -> None:
163 |         chat_log = self.app.query_one("#chat-log", ChatArea)
164 | 
165 |         if not self.docpixie:
166 |             chat_log.write("[error]❌ DocPixie not initialized[/error]\\n")
167 |             return
168 | 
169 |         if not self.state_manager.has_documents():
170 |             chat_log.write("[warning]⚠️ No documents indexed yet. Use /documents to add and index documents first.[/warning]\\n")
171 |             return
172 | 
173 |         self.state_manager.set_processing(True)
174 | 
175 |         try:
176 |             chat_log.show_processing_status()
177 | 
178 |             result = await asyncio.get_event_loop().run_in_executor(
179 |                 None,
180 |                 self.docpixie.query_sync,
181 |                 query,
182 |                 None,  # mode
183 |                 None,  # document_ids
184 |                 None,  # max_pages
185 |                 self.state_manager.conversation_history,
186 |                 task_callback
187 |             )
188 | 
189 |             chat_log.add_assistant_message(result.answer)
190 | 
191 |             if hasattr(result, 'get_pages_by_document'):
192 |                 pages_by_doc = result.get_pages_by_document()
193 |                 if pages_by_doc:
194 |                     chat_log.write("[dim]Analyzed documents:[/dim]\n")
195 |                     for doc_name, page_nums in pages_by_doc.items():
196 |                         pages_str = ", ".join(str(p) for p in page_nums)
197 |                         chat_log.write(f"[dim]  • {doc_name}: Pages {pages_str}[/dim]\n")
198 |             elif hasattr(result, 'page_numbers') and result.page_numbers:
199 |                 chat_log.write(f"[dim]Analyzed pages: {result.page_numbers}[/dim]\n")
200 | 
201 |             if hasattr(result, 'processing_time') and result.processing_time > 0:
202 |                 chat_log.write(f"[dim]Processing time: {result.processing_time:.2f}s[/dim]\n")
203 | 
204 |             cost = getattr(result, 'total_cost', 0.0) or 0.0
205 |             if cost < 0.01:
206 |                 chat_log.write(f"[dim]Cost: ${cost:.6f}[/dim]\n")
207 |             else:
208 |                 chat_log.write(f"[dim]Cost: ${cost:.4f}[/dim]\n")
209 | 
210 |             chat_log.write("\n")
211 | 
212 |             self.state_manager.add_conversation_message(
213 |                 ConversationMessage(role="user", content=query)
214 |             )
215 |             self.state_manager.add_conversation_message(
216 |                 ConversationMessage(role="assistant", content=result.answer,
217 |                                   cost=getattr(result, 'total_cost', 0.0) or 0.0)
218 |             )
219 | 
220 |             self.state_manager.limit_conversation_history()
221 |             self.state_manager.save_current_conversation()
222 | 
223 |             status_label = self.app.query_one("#status-label")
224 |             status_label.update(self.state_manager.get_status_text())
225 | 
226 |         except Exception as e:
227 |             chat_log.write(f"[red bold]●[/red bold] Error: {e}\n\n")
228 |         finally:
229 |             self.state_manager.set_processing(False)
230 | 
231 |     def delete_document_sync(self, document_id: str) -> bool:
232 |         if self.docpixie:
233 |             try:
234 |                 return self.docpixie.delete_document_sync(document_id)
235 |             except Exception:
236 |                 return False
237 |         return False
238 | 


--------------------------------------------------------------------------------
/docpixie/cli/legacy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | DocPixie CLI - Interactive document chat interface
  4 | """
  5 | 
  6 | import os
  7 | import sys
  8 | import asyncio
  9 | from pathlib import Path
 10 | from typing import List, Optional, Any, Dict
 11 | from datetime import datetime
 12 | 
 13 | from docpixie import DocPixie, ConversationMessage
 14 | from docpixie.core.config import DocPixieConfig
 15 | from docpixie.models.document import Document, QueryResult
 16 | from docpixie.models.agent import TaskStatus
 17 | 
 18 | 
 19 | class DocPixieCLI:
 20 |     """Command-line interface for DocPixie document chat"""
 21 | 
 22 |     def __init__(self):
 23 |         """Initialize the CLI application"""
 24 |         self.documents_folder = Path("./documents")
 25 |         self.docpixie: Optional[DocPixie] = None
 26 |         self.indexed_documents: List[Document] = []
 27 |         self.conversation_history: List[ConversationMessage] = []
 28 |         self.current_task_plan = None
 29 | 
 30 |     def initialize_docpixie(self) -> bool:
 31 |         """Initialize DocPixie with OpenRouter and in-memory storage"""
 32 |         try:
 33 |             api_key = os.getenv("OPENROUTER_API_KEY")
 34 |             if not api_key:
 35 |                 print("❌ Error: OPENROUTER_API_KEY environment variable not set")
 36 |                 print("Please set it with: export OPENROUTER_API_KEY='your-api-key'")
 37 |                 return False
 38 | 
 39 |             config = DocPixieConfig(
 40 |                 provider="openrouter",
 41 |                 model="openai/gpt-5-mini",
 42 |                 vision_model="openai/gpt-4.1",
 43 |                 storage_type="memory",
 44 |                 openrouter_api_key=api_key,
 45 |                 jpeg_quality=85,
 46 |                 max_pages_per_task=4
 47 |             )
 48 | 
 49 |             self.docpixie = DocPixie(config=config)
 50 |             print("✅ DocPixie initialized with OpenRouter (Gemini 2.5 Flash)")
 51 |             return True
 52 | 
 53 |         except Exception as e:
 54 |             print(f"❌ Failed to initialize DocPixie: {e}")
 55 |             return False
 56 | 
 57 |     def scan_documents(self) -> List[Path]:
 58 |         """Scan the documents folder for PDF files"""
 59 |         if not self.documents_folder.exists():
 60 |             self.documents_folder.mkdir(parents=True)
 61 |             print(f"📁 Created documents folder: {self.documents_folder.absolute()}")
 62 | 
 63 |         pdf_files = list(self.documents_folder.glob("*.pdf"))
 64 | 
 65 |         if not pdf_files:
 66 |             print(f"📭 No PDF files found in {self.documents_folder.absolute()}")
 67 |             print("Please add PDF files to the documents folder and restart the program.")
 68 |             return []
 69 | 
 70 |         print(f"\n📚 Found {len(pdf_files)} PDF file(s):")
 71 |         for i, pdf in enumerate(pdf_files, 1):
 72 |             print(f"  {i}. {pdf.name}")
 73 | 
 74 |         return pdf_files
 75 | 
 76 |     def index_documents(self, pdf_files: List[Path]) -> bool:
 77 |         """Index all PDF documents"""
 78 |         if not pdf_files:
 79 |             return False
 80 | 
 81 |         print(f"\n🔄 Starting document indexing...")
 82 | 
 83 |         for i, pdf_file in enumerate(pdf_files, 1):
 84 |             try:
 85 |                 print(f"\n📄 Processing ({i}/{len(pdf_files)}): {pdf_file.name}")
 86 | 
 87 |                 document = self.docpixie.add_document_sync(
 88 |                     file_path=str(pdf_file),
 89 |                     document_name=pdf_file.stem
 90 |                 )
 91 | 
 92 |                 self.indexed_documents.append(document)
 93 |                 print(f"   ✅ Indexed: {document.page_count} pages")
 94 | 
 95 |                 if document.summary:
 96 |                     print(f"   📝 Summary: {document.summary[:100]}...")
 97 | 
 98 |             except Exception as e:
 99 |                 print(f"   ❌ Failed to index {pdf_file.name}: {e}")
100 |                 continue
101 | 
102 |         successful = len(self.indexed_documents)
103 |         if successful > 0:
104 |             print(f"\n✅ Successfully indexed {successful}/{len(pdf_files)} document(s)")
105 |             return True
106 |         else:
107 |             print(f"\n❌ Failed to index any documents")
108 |             return False
109 | 
110 |     def display_welcome_message(self):
111 |         """Display welcome message and instructions"""
112 |         print("\n" + "="*60)
113 |         print("🧚 DocPixie Chat Interface")
114 |         print("="*60)
115 |         print("\nYou can now chat with your documents!")
116 |         print("Commands:")
117 |         print("  /new  - Start a new conversation")
118 |         print("  /exit - Exit the program")
119 |         print("  Ctrl+C - Force exit")
120 |         print("\n" + "-"*60)
121 | 
122 |     def format_answer(self, result: QueryResult) -> str:
123 |         """Format the query result for display"""
124 |         output = []
125 | 
126 |         # Add the answer
127 |         output.append(f"\n🤖 Assistant: {result.answer}")
128 | 
129 |         # Add metadata if available
130 |         if hasattr(result, 'get_pages_by_document'):
131 |             pages_by_doc = result.get_pages_by_document()
132 |             if pages_by_doc:
133 |                 output.append("\n📄 Analyzed documents:")
134 |                 for doc_name, page_nums in pages_by_doc.items():
135 |                     pages_str = ", ".join(str(p) for p in page_nums)
136 |                     output.append(f"  • {doc_name}: Pages {pages_str}")
137 |         elif result.page_numbers:
138 |             output.append(f"\n📄 Analyzed pages: {result.page_numbers}")
139 | 
140 |         if result.confidence > 0:
141 |             confidence_pct = int(result.confidence * 100)
142 |             output.append(f"💡 Confidence: {confidence_pct}%")
143 | 
144 |         if result.processing_time > 0:
145 |             output.append(f"⏱️ Processing time: {result.processing_time:.2f}s")
146 | 
147 |         return "\n".join(output)
148 | 
149 |     def display_task_plan(self, plan, action="Current"):
150 |         """Display the current task plan in a formatted way"""
151 |         print("\n" + "="*60)
152 |         print(f"📋 {action} Task Plan:")
153 |         print("="*60)
154 | 
155 |         for task in plan.tasks:
156 |             if task.status == TaskStatus.COMPLETED:
157 |                 icon = "✅"
158 |             elif task.status == TaskStatus.IN_PROGRESS:
159 |                 icon = "⏳"
160 |             else:
161 |                 icon = "⏸️ "
162 | 
163 |             doc_info = ""
164 |             if task.document:
165 |                 doc = next((d for d in self.indexed_documents if d.id == task.document), None)
166 |                 if doc:
167 |                     doc_info = f" [{doc.name}]"
168 | 
169 |             print(f"  {icon} {task.name}{doc_info}")
170 |             if task.description:
171 |                 print(f"      {task.description}")
172 | 
173 |         print("=" * 60)
174 | 
175 |     def display_task_update(self, event_type: str, data: Any):
176 |         """Display task plan updates as they happen"""
177 |         if event_type == 'plan_created':
178 |             self.current_task_plan = data
179 |             self.display_task_plan(data, "Initial")
180 | 
181 |         elif event_type == 'task_started':
182 |             task = data['task']
183 |             plan = data['plan']
184 |             self.current_task_plan = plan
185 | 
186 |             doc_info = ""
187 |             if task.document:
188 |                 doc = next((d for d in self.indexed_documents if d.id == task.document), None)
189 |                 if doc:
190 |                     doc_info = f" in {doc.name}"
191 | 
192 |             print(f"\n🔄 Starting task: {task.name}{doc_info}")
193 | 
194 |         elif event_type == 'pages_selected':
195 |             task = data['task']
196 |             page_numbers = data['page_numbers']
197 | 
198 |             if page_numbers:
199 |                 pages_str = ", ".join(str(p) for p in page_numbers)
200 |                 print(f"   📑 Selected pages: {pages_str}")
201 |             else:
202 |                 print(f"   📑 No relevant pages found")
203 | 
204 |         elif event_type == 'task_completed':
205 |             task = data['task']
206 |             result = data['result']
207 |             plan = data['plan']
208 |             self.current_task_plan = plan
209 | 
210 |             pages_analyzed = len(result.selected_pages) if hasattr(result, 'selected_pages') else 0
211 |             print(f"   ✅ Completed ({pages_analyzed} pages analyzed)")
212 | 
213 |         elif event_type == 'plan_updated':
214 |             self.current_task_plan = data
215 |             print("\n🔧 Task plan updated based on findings:")
216 |             self.display_task_plan(data, "Updated")
217 | 
218 |     async def task_update_callback(self, event_type: str, data: Any):
219 |         """Async callback for task updates"""
220 |         self.display_task_update(event_type, data)
221 | 
222 |     def chat_loop(self):
223 |         """Main chat interaction loop"""
224 |         self.display_welcome_message()
225 | 
226 |         while True:
227 |             try:
228 |                 user_input = input("\n👤 You: ").strip()
229 | 
230 |                 if not user_input:
231 |                     continue
232 | 
233 |                 if user_input.lower() == "/exit":
234 |                     print("\n👋 Goodbye!")
235 |                     break
236 | 
237 |                 if user_input.lower() == "/new":
238 |                     self.conversation_history = []
239 |                     print("\n🔄 Started new conversation")
240 |                     continue
241 | 
242 |                 print("\n⏳ Processing query...")
243 | 
244 |                 result = self.docpixie.query_sync(
245 |                     question=user_input,
246 |                     conversation_history=self.conversation_history,
247 |                     task_update_callback=self.task_update_callback
248 |                 )
249 | 
250 |                 print(self.format_answer(result))
251 | 
252 |                 self.conversation_history.append(
253 |                     ConversationMessage(role="user", content=user_input)
254 |                 )
255 |                 self.conversation_history.append(
256 |                     ConversationMessage(role="assistant", content=result.answer)
257 |                 )
258 | 
259 |                 if len(self.conversation_history) > 20:
260 |                     self.conversation_history = self.conversation_history[-20:]
261 | 
262 |             except KeyboardInterrupt:
263 |                 print("\n\n👋 Interrupted. Goodbye!")
264 |                 break
265 | 
266 |             except Exception as e:
267 |                 print(f"\n❌ Error: {e}")
268 |                 print("Please try again or use /new to start fresh.")
269 | 
270 |     def run(self):
271 |         """Main entry point for the CLI application"""
272 |         print("\n🧚 DocPixie CLI - Document Chat Interface")
273 |         print("="*60)
274 | 
275 |         if not self.initialize_docpixie():
276 |             return 1
277 | 
278 |         pdf_files = self.scan_documents()
279 |         if not pdf_files:
280 |             return 1
281 | 
282 |         print(f"\n❓ Index these {len(pdf_files)} document(s)? (y/n): ", end="")
283 |         response = input().strip().lower()
284 | 
285 |         if response != 'y':
286 |             print("📭 Indexing cancelled")
287 |             return 0
288 | 
289 |         if not self.index_documents(pdf_files):
290 |             return 1
291 | 
292 |         self.chat_loop()
293 | 
294 |         return 0
295 | 
296 | 
297 | def main():
298 |     """Main entry point"""
299 |     cli = DocPixieCLI()
300 |     sys.exit(cli.run())
301 | 
302 | 
303 | if __name__ == "__main__":
304 |     main()
305 | 


--------------------------------------------------------------------------------
/docpixie/cli/conversation_storage.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Local conversation storage for DocPixie CLI
  3 | Stores conversations per project directory
  4 | """
  5 | 
  6 | import json
  7 | import uuid
  8 | import os
  9 | from pathlib import Path
 10 | from typing import List, Dict, Any, Optional
 11 | from datetime import datetime
 12 | from dataclasses import dataclass, asdict
 13 | 
 14 | from docpixie.models.agent import ConversationMessage
 15 | 
 16 | 
 17 | @dataclass
 18 | class ConversationMetadata:
 19 |     """Metadata for a conversation"""
 20 |     id: str
 21 |     name: str
 22 |     working_directory: str
 23 |     created_at: str
 24 |     updated_at: str
 25 |     message_count: int
 26 |     indexed_documents: List[str]
 27 |     total_cost: float = 0.0
 28 | 
 29 | 
 30 | class ConversationStorage:
 31 |     """Manages local conversation storage in ./.docpixie/conversations/"""
 32 |     
 33 |     def __init__(self):
 34 |         """Initialize conversation storage for current directory"""
 35 |         self.base_path = Path("./.docpixie")
 36 |         self.conversations_dir = self.base_path / "conversations"
 37 |         self.metadata_file = self.conversations_dir / "metadata.json"
 38 |         
 39 |         self.conversations_dir.mkdir(parents=True, exist_ok=True)
 40 |         
 41 |         self.working_directory = str(Path.cwd().resolve())
 42 |         
 43 |         self.current_conversation_id: Optional[str] = None
 44 |         
 45 |         self._load_metadata()
 46 |     
 47 |     def _load_metadata(self) -> Dict[str, ConversationMetadata]:
 48 |         """Load conversation metadata from file"""
 49 |         if not self.metadata_file.exists():
 50 |             return {}
 51 |         
 52 |         try:
 53 |             with open(self.metadata_file, 'r') as f:
 54 |                 data = json.load(f)
 55 |             
 56 |             metadata = {}
 57 |             for conv_id, conv_data in data.items():
 58 |                 if 'total_cost' not in conv_data:
 59 |                     conv_data['total_cost'] = 0.0
 60 |                 metadata[conv_id] = ConversationMetadata(**conv_data)
 61 |             
 62 |             return metadata
 63 |         except Exception as e:
 64 |             print(f"Warning: Failed to load conversation metadata: {e}")
 65 |             return {}
 66 |     
 67 |     def _save_metadata(self, metadata: Dict[str, ConversationMetadata]):
 68 |         """Save conversation metadata to file"""
 69 |         try:
 70 |             data = {}
 71 |             for conv_id, conv_meta in metadata.items():
 72 |                 data[conv_id] = asdict(conv_meta)
 73 |             
 74 |             with open(self.metadata_file, 'w') as f:
 75 |                 json.dump(data, f, indent=2)
 76 |         except Exception as e:
 77 |             print(f"Error saving conversation metadata: {e}")
 78 |     
 79 |     def _conversation_file_path(self, conversation_id: str) -> Path:
 80 |         """Get path for conversation file"""
 81 |         return self.conversations_dir / f"{conversation_id}.json"
 82 |     
 83 |     def _generate_conversation_name(self, messages: List[ConversationMessage]) -> str:
 84 |         """Generate a conversation name from the first user message"""
 85 |         if not messages:
 86 |             return f"Chat {datetime.now().strftime('%Y-%m-%d %H:%M')}"
 87 |         
 88 |         first_user_message = None
 89 |         for msg in messages:
 90 |             if msg.role == "user":
 91 |                 first_user_message = msg
 92 |                 break
 93 |         
 94 |         if first_user_message:
 95 |             name = first_user_message.content.strip()[:50]
 96 |             if len(first_user_message.content) > 50:
 97 |                 name += "..."
 98 |             return name
 99 |         else:
100 |             return f"Chat {datetime.now().strftime('%Y-%m-%d %H:%M')}"
101 |     
102 |     def create_new_conversation(self, indexed_documents: List[str] = None) -> str:
103 |         """Create a new conversation and return its ID"""
104 |         conversation_id = str(uuid.uuid4())
105 |         now = datetime.now().isoformat()
106 |         
107 |         metadata = ConversationMetadata(
108 |             id=conversation_id,
109 |             name="New Chat",
110 |             working_directory=self.working_directory,
111 |             created_at=now,
112 |             updated_at=now,
113 |             message_count=0,
114 |             indexed_documents=indexed_documents or [],
115 |             total_cost=0.0
116 |         )
117 |         
118 |         conversation_data = {
119 |             "id": conversation_id,
120 |             "metadata": asdict(metadata),
121 |             "messages": []
122 |         }
123 |         
124 |         conversation_file = self._conversation_file_path(conversation_id)
125 |         with open(conversation_file, 'w') as f:
126 |             json.dump(conversation_data, f, indent=2)
127 |         
128 |         all_metadata = self._load_metadata()
129 |         all_metadata[conversation_id] = metadata
130 |         self._save_metadata(all_metadata)
131 |         
132 |         self.current_conversation_id = conversation_id
133 |         return conversation_id
134 |     
135 |     def save_conversation(self, conversation_id: str, messages: List[ConversationMessage], 
136 |                          indexed_documents: List[str] = None):
137 |         """Save conversation messages"""
138 |         try:
139 |             now = datetime.now().isoformat()
140 |             
141 |             messages_data = []
142 |             total_cost = 0.0
143 |             for msg in messages:
144 |                 msg_dict = {
145 |                     "role": msg.role,
146 |                     "content": msg.content,
147 |                     "timestamp": msg.timestamp.isoformat()
148 |                 }
149 |                 msg_cost = getattr(msg, 'cost', 0.0) or 0.0
150 |                 msg_dict["cost"] = msg_cost
151 |                 total_cost += msg_cost
152 |                 messages_data.append(msg_dict)
153 |             
154 |             all_metadata = self._load_metadata()
155 |             if conversation_id in all_metadata:
156 |                 conv_metadata = all_metadata[conversation_id]
157 |                 conv_metadata.updated_at = now
158 |                 conv_metadata.message_count = len(messages)
159 |                 conv_metadata.total_cost = total_cost
160 |                 if indexed_documents is not None:
161 |                     conv_metadata.indexed_documents = indexed_documents
162 |                 
163 |                 if conv_metadata.name == "New Chat" and messages:
164 |                     conv_metadata.name = self._generate_conversation_name(messages)
165 |             else:
166 |                 conv_metadata = ConversationMetadata(
167 |                     id=conversation_id,
168 |                     name=self._generate_conversation_name(messages),
169 |                     working_directory=self.working_directory,
170 |                     created_at=now,
171 |                     updated_at=now,
172 |                     message_count=len(messages),
173 |                     indexed_documents=indexed_documents or [],
174 |                     total_cost=total_cost
175 |                 )
176 |                 all_metadata[conversation_id] = conv_metadata
177 |             
178 |             conversation_data = {
179 |                 "id": conversation_id,
180 |                 "metadata": asdict(conv_metadata),
181 |                 "messages": messages_data
182 |             }
183 |             
184 |             conversation_file = self._conversation_file_path(conversation_id)
185 |             with open(conversation_file, 'w') as f:
186 |                 json.dump(conversation_data, f, indent=2)
187 |             
188 |             self._save_metadata(all_metadata)
189 |             
190 |         except Exception as e:
191 |             print(f"Error saving conversation: {e}")
192 |     
193 |     def load_conversation(self, conversation_id: str) -> Optional[tuple[ConversationMetadata, List[ConversationMessage]]]:
194 |         """Load conversation by ID"""
195 |         try:
196 |             conversation_file = self._conversation_file_path(conversation_id)
197 |             if not conversation_file.exists():
198 |                 return None
199 |             
200 |             with open(conversation_file, 'r') as f:
201 |                 data = json.load(f)
202 |             
203 |             metadata = ConversationMetadata(**data["metadata"])
204 |             
205 |             messages = []
206 |             for msg_data in data["messages"]:
207 |                 message = ConversationMessage(
208 |                     role=msg_data["role"],
209 |                     content=msg_data["content"],
210 |                     timestamp=datetime.fromisoformat(msg_data["timestamp"]),
211 |                     cost=msg_data.get("cost", 0.0)
212 |                 )
213 |                 messages.append(message)
214 |             
215 |             self.current_conversation_id = conversation_id
216 |             return metadata, messages
217 |             
218 |         except Exception as e:
219 |             print(f"Error loading conversation: {e}")
220 |             return None
221 |     
222 |     def list_local_conversations(self) -> List[ConversationMetadata]:
223 |         """List conversations from current working directory only"""
224 |         all_metadata = self._load_metadata()
225 |         
226 |         local_conversations = []
227 |         for conv_id, metadata in all_metadata.items():
228 |             if metadata.working_directory == self.working_directory:
229 |                 local_conversations.append(metadata)
230 |         
231 |         local_conversations.sort(key=lambda x: x.updated_at, reverse=True)
232 |         return local_conversations
233 |     
234 |     def delete_conversation(self, conversation_id: str) -> bool:
235 |         """Delete a conversation"""
236 |         try:
237 |             conversation_file = self._conversation_file_path(conversation_id)
238 |             if conversation_file.exists():
239 |                 conversation_file.unlink()
240 |             
241 |             all_metadata = self._load_metadata()
242 |             if conversation_id in all_metadata:
243 |                 del all_metadata[conversation_id]
244 |                 self._save_metadata(all_metadata)
245 |             
246 |             if self.current_conversation_id == conversation_id:
247 |                 self.current_conversation_id = None
248 |             
249 |             return True
250 |         except Exception as e:
251 |             print(f"Error deleting conversation: {e}")
252 |             return False
253 |     
254 |     def rename_conversation(self, conversation_id: str, new_name: str) -> bool:
255 |         """Rename a conversation"""
256 |         try:
257 |             all_metadata = self._load_metadata()
258 |             if conversation_id not in all_metadata:
259 |                 return False
260 |             
261 |             all_metadata[conversation_id].name = new_name
262 |             all_metadata[conversation_id].updated_at = datetime.now().isoformat()
263 |             
264 |             conversation_file = self._conversation_file_path(conversation_id)
265 |             if conversation_file.exists():
266 |                 with open(conversation_file, 'r') as f:
267 |                     data = json.load(f)
268 |                 
269 |                 data["metadata"]["name"] = new_name
270 |                 data["metadata"]["updated_at"] = all_metadata[conversation_id].updated_at
271 |                 
272 |                 with open(conversation_file, 'w') as f:
273 |                     json.dump(data, f, indent=2)
274 |             
275 |             self._save_metadata(all_metadata)
276 |             return True
277 |             
278 |         except Exception as e:
279 |             print(f"Error renaming conversation: {e}")
280 |             return False
281 |     
282 |     def get_last_conversation(self) -> Optional[str]:
283 |         """Get the most recently updated conversation ID from current directory"""
284 |         conversations = self.list_local_conversations()
285 |         if conversations:
286 |             return conversations[0].id
287 |         return None


--------------------------------------------------------------------------------