├── .env.example
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── build.bat
├── mcp-image-recognition.code-workspace
├── requirements-dev.txt
├── requirements.txt
├── run.bat
├── setup.py
├── src
    └── image_recognition_server
    │   ├── __init__.py
    │   ├── server.py
    │   ├── utils
    │       ├── __init__.py
    │       ├── image.py
    │       └── ocr.py
    │   └── vision
    │       ├── __init__.py
    │       ├── anthropic.py
    │       └── openai.py
└── tests
    ├── test_ocr.py
    └── test_server.py


/.env.example:
--------------------------------------------------------------------------------
 1 | # Vision Provider (anthropic or openai)
 2 | VISION_PROVIDER=openai
 3 | 
 4 | # Fallback Provider (optional, if primary provider fails)
 5 | FALLBACK_PROVIDER=
 6 | 
 7 | # Logging Level (DEBUG, INFO, WARNING, ERROR)
 8 | LOG_LEVEL=ERROR
 9 | 
10 | # Anthropic Settings
11 | # ANTHROPIC_API_KEY=aaaaaaa
12 | # ANTHROPIC_MODEL=claude-3.5-lates 
13 | 
14 | # OpenAI Settings
15 | # OPENAI_TIMEOUT=60
16 | 
17 | OPENAI_API_KEY=gggggggggg
18 | OPENAI_BASE_URL=
19 | OPENAI_MODEL=gpt-4o-mini
20 | 
21 | # Optional: Set a custom base URL/Model for the OpenAI API
22 | # - openrouter
23 | # OPENAI_API_KEY=ooooooo
24 | # OPENAI_BASE_URL=https://openrouter.ai/api/v1
25 | # OPENAI_MODEL=anthropic/claude-3.5-sonnet:beta
26 | # - grok
27 | # OPENAI_API_KEY=xxxxxxx
28 | # OPENAI_BASE_URL=https://api.x.ai/v1
29 | # OPENAI_MODEL=grok-2-vision-latest
30 | 
31 | # Tesseract OCR Settings
32 | # Set to 'true' to enable Tesseract OCR text extraction
33 | # ENABLE_OCR=false 
34 | # Path to Tesseract executable
35 | # TESSERACT_CMD= 
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | # PyPI configuration file
171 | .pypirc
172 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # escape=`
 2 | 
 3 | # Use Windows Server Core as base image
 4 | FROM mcr.microsoft.com/windows/servercore:ltsc2019
 5 | 
 6 | # Set shell to PowerShell
 7 | SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"]
 8 | 
 9 | # Install Python 3.10 and Tesseract OCR
10 | RUN Invoke-WebRequest -Uri 'https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe' -OutFile 'python-3.10.0-amd64.exe'; `
11 |     Start-Process python-3.10.0-amd64.exe -ArgumentList '/quiet InstallAllUsers=1 PrependPath=1' -Wait; `
12 |     Remove-Item python-3.10.0-amd64.exe; `
13 |     Invoke-WebRequest -Uri 'https://github.com/UB-Mannheim/tesseract/releases/download/v5.5.0/tesseract-ocr-w64-setup-5.5.0.20241111.exe' -OutFile 'tesseract-installer.exe'; `
14 |     Start-Process tesseract-installer.exe -ArgumentList '/S /D=C:\Program Files\Tesseract-OCR' -Wait; `
15 |     Remove-Item tesseract-installer.exe
16 | 
17 | # Set working directory
18 | WORKDIR /app
19 | 
20 | # Copy project files
21 | COPY requirements.txt .
22 | COPY src/ ./src/
23 | COPY .env.example ./.env
24 | 
25 | # Install dependencies
26 | RUN pip install --no-cache-dir -r requirements.txt
27 | 
28 | # Set environment variables
29 | ENV PYTHONPATH=/app/src
30 | ENV TESSERACT_CMD="C:\Program Files\Tesseract-OCR\tesseract.exe"
31 | 
32 | # Run the server
33 | CMD ["python", "-m", "src.image_recognition_server.server"]
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 mario-andreschak
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MCP Image Recognition Server
  2 | 
  3 | An MCP server that provides image recognition capabilities using Anthropic and OpenAI vision APIs. Version 0.1.2.
  4 | 
  5 | ## Features
  6 | 
  7 | - Image description using Anthropic Claude Vision or OpenAI GPT-4 Vision
  8 | - Support for multiple image formats (JPEG, PNG, GIF, WebP)
  9 | - Configurable primary and fallback providers
 10 | - Base64 and file-based image input support
 11 | - Optional text extraction using Tesseract OCR
 12 | 
 13 | ## Requirements
 14 | 
 15 | - Python 3.8 or higher
 16 | - Tesseract OCR (optional) - Required for text extraction feature
 17 |   - Windows: Download and install from [UB-Mannheim/tesseract](https://github.com/UB-Mannheim/tesseract/wiki)
 18 |   - Linux: `sudo apt-get install tesseract-ocr`
 19 |   - macOS: `brew install tesseract`
 20 | 
 21 | ## Installation
 22 | 
 23 | 1. Clone the repository:
 24 | ```bash
 25 | git clone https://github.com/mario-andreschak/mcp-image-recognition.git
 26 | cd mcp-image-recognition
 27 | ```
 28 | 
 29 | 2. Create and configure your environment file:
 30 | ```bash
 31 | cp .env.example .env
 32 | # Edit .env with your API keys and preferences
 33 | ```
 34 | 
 35 | 3. Build the project:
 36 | ```bash
 37 | build.bat
 38 | ```
 39 | 
 40 | ## Usage
 41 | 
 42 | ### Running the Server
 43 | Spawn the server using python:
 44 | ```bash
 45 | python -m image_recognition_server.server
 46 | ```
 47 | 
 48 | Start the server using batch instead:
 49 | ```bash
 50 | run.bat server
 51 | ```
 52 | 
 53 | Start the server in development mode with the MCP Inspector:
 54 | ```bash
 55 | run.bat debug
 56 | ```
 57 | 
 58 | ### Available Tools
 59 | 
 60 | 1. `describe_image`
 61 |    - Input: Base64-encoded image data and MIME type
 62 |    - Output: Detailed description of the image
 63 | 
 64 | 2. `describe_image_from_file`
 65 |    - Input: Path to an image file
 66 |    - Output: Detailed description of the image
 67 | 
 68 | ### Environment Configuration
 69 | 
 70 | - `ANTHROPIC_API_KEY`: Your Anthropic API key.
 71 | - `OPENAI_API_KEY`: Your OpenAI API key.
 72 | - `VISION_PROVIDER`: Primary vision provider (`anthropic` or `openai`).
 73 | - `FALLBACK_PROVIDER`: Optional fallback provider.
 74 | - `LOG_LEVEL`: Logging level (DEBUG, INFO, WARNING, ERROR).
 75 | - `ENABLE_OCR`: Enable Tesseract OCR text extraction (`true` or `false`).
 76 | - `TESSERACT_CMD`: Optional custom path to Tesseract executable.
 77 | - `OPENAI_MODEL`: OpenAI Model (default: `gpt-4o-mini`). Can use OpenRouter format for other models (e.g., `anthropic/claude-3.5-sonnet:beta`).
 78 | - `OPENAI_BASE_URL`: Optional custom base URL for the OpenAI API.  Set to `https://openrouter.ai/api/v1` for OpenRouter.
 79 | - `OPENAI_TIMEOUT`: Optional custom timeout (in seconds) for the OpenAI API.
 80 | 
 81 | ### Using OpenRouter
 82 | 
 83 | OpenRouter allows you to access various models using the OpenAI API format. To use OpenRouter, follow these steps:
 84 | 
 85 | 1.  Obtain an OpenAI API key from OpenRouter.
 86 | 2.  Set `OPENAI_API_KEY` in your `.env` file to your OpenRouter API key.
 87 | 3.  Set `OPENAI_BASE_URL` to `https://openrouter.ai/api/v1`.
 88 | 4.  Set `OPENAI_MODEL` to the desired model using the OpenRouter format (e.g., `anthropic/claude-3.5-sonnet:beta`).
 89 | 5. Set `VISION_PROVIDER` to `openai`.
 90 | 
 91 | ### Default Models
 92 | 
 93 | - Anthropic: `claude-3.5-sonnet-beta`
 94 | - OpenAI: `gpt-4o-mini`
 95 | - OpenRouter: Use the `anthropic/claude-3.5-sonnet:beta` format in `OPENAI_MODEL`.
 96 | 
 97 | ## Development
 98 | 
 99 | ### Running Tests
100 | 
101 | Run all tests:
102 | ```bash
103 | run.bat test
104 | ```
105 | 
106 | Run specific test suite:
107 | ```bash
108 | run.bat test server
109 | run.bat test anthropic
110 | run.bat test openai
111 | ```
112 | 
113 | ### Docker Support
114 | 
115 | Build the Docker image:
116 | ```bash
117 | docker build -t mcp-image-recognition .
118 | ```
119 | 
120 | Run the container:
121 | ```bash
122 | docker run -it --env-file .env mcp-image-recognition
123 | ```
124 | 
125 | ## License
126 | 
127 | MIT License - see LICENSE file for details.
128 | 
129 | ## Release History
130 | 
131 | - **0.1.2** (2025-02-20): Improved OCR error handling and added comprehensive test coverage for OCR functionality
132 | - **0.1.1** (2025-02-19): Added Tesseract OCR support for text extraction from images (optional feature)
133 | - **0.1.0** (2025-02-19): Initial release with Anthropic and OpenAI vision support
134 | 


--------------------------------------------------------------------------------
/build.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | REM Build script for MCP Image Recognition Server
 3 | 
 4 | REM Install dependencies
 5 | pip install -r requirements.txt
 6 | pip install -r requirements-dev.txt
 7 | pip install -e .
 8 | 
 9 | REM Run code formatting
10 | black src/ 
11 | isort src/ 
12 | 
13 | REM Run linting
14 | ruff check src/
15 | mypy src/ 
16 | 
17 | REM Build package
18 | python setup.py build
19 | 
20 | 
21 | 
22 | 
23 | 
24 | 
25 | REM Run code formatting
26 | @REM black tests/
27 | @REM isort tests/
28 | 
29 | REM Run linting
30 | @REM ruff check tests/
31 | @REM mypy tests/
32 | 
33 | REM Run tests
34 | @REM pytest tests/ -v --cov=src
35 | 


--------------------------------------------------------------------------------
/mcp-image-recognition.code-workspace:
--------------------------------------------------------------------------------
1 | {
2 | 	"folders": [
3 | 		{
4 | 			"path": "."
5 | 		}
6 | 	],
7 | 	"settings": {}
8 | }


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | pytest>=7.0.0
3 | pytest-asyncio>=0.23.0
4 | pytest-cov>=4.1.0
5 | black>=23.0.0
6 | isort>=5.12.0
7 | mypy>=1.0.0
8 | ruff>=0.1.0
9 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mcp>=1.2.0
2 | anthropic>=0.8.0
3 | openai>=1.0.0
4 | python-dotenv>=1.0.0
5 | Pillow>=10.0.0
6 | numpy>=1.26.0
7 | pandas>=2.1.0
8 | pytesseract>=0.3.13
9 | 


--------------------------------------------------------------------------------
/run.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | IF "%1"=="test" (
 3 |     IF "%2"=="server" (
 4 |         cls
 5 |         python -m pytest tests/test_server.py -v
 6 |     ) ELSE IF "%2"=="anthropic" (
 7 |         cls
 8 |         python -m pytest tests/test_anthropic.py -v
 9 |     ) ELSE IF "%2"=="openai" (
10 |         cls
11 |         python -m pytest tests/test_openai.py -v
12 |     ) ELSE (
13 |         cls
14 |         python -m pytest tests/ -v
15 |     )
16 | ) ELSE IF "%1"=="server" (
17 |     cls
18 |     cd ./build/lib
19 |     set PYTHONIOENCODING=utf-8
20 |     python -m image_recognition_server.server
21 |     cd ../..
22 | ) ELSE IF "%1"=="debug" (
23 |     cls
24 |     cd ./build/lib
25 |     npx @modelcontextprotocol/inspector python -m image_recognition_server.server
26 |     cd ../..
27 | ) ELSE IF "%1"=="full" (
28 |     build.bat
29 |     run.bat debug
30 | ) ELSE (
31 |     echo Invalid command.
32 |     echo Usage:
33 |     echo   run.bat test [server ^| anthropic ^| openai]
34 |     echo   run.bat server
35 |     echo   run.bat debug
36 |     echo   run.bat full
37 | )
38 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name="mcp-image-recognition",
 5 |     version="0.1.1",
 6 |     description="MCP server for image recognition using Anthropic and OpenAI vision APIs",
 7 |     author="Mario",
 8 |     packages=find_packages(where="src"),
 9 |     package_dir={"": "src"},
10 |     python_requires=">=3.10",
11 |     install_requires=[
12 |         "mcp>=1.2.0",
13 |         "anthropic>=0.8.0",
14 |         "openai>=1.0.0",
15 |         "python-dotenv>=1.0.0",
16 |         "Pillow>=10.0.0",
17 |         "numpy>=1.26.0",
18 |         "pandas>=2.1.0",
19 |         "pytesseract>=0.3.13",
20 |     ],
21 |     extras_require={
22 |         "dev": [
23 |             "pytest>=7.0.0",
24 |             "pytest-asyncio>=0.23.0",
25 |             "pytest-cov>=4.1.0",
26 |             "black>=23.0.0",
27 |             "isort>=5.12.0",
28 |             "mypy>=1.0.0",
29 |             "ruff>=0.1.0",
30 |         ]
31 |     },
32 | )
33 | 


--------------------------------------------------------------------------------
/src/image_recognition_server/__init__.py:
--------------------------------------------------------------------------------
1 | """MCP server for image recognition using Anthropic and OpenAI vision APIs."""
2 | 
3 | __version__ = "0.1.0"
4 | 


--------------------------------------------------------------------------------
/src/image_recognition_server/server.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import io
  3 | import logging
  4 | import os
  5 | from typing import Union
  6 | 
  7 | from dotenv import load_dotenv
  8 | from mcp.server.fastmcp import FastMCP
  9 | from PIL import Image
 10 | 
 11 | from .utils.image import image_to_base64, validate_base64_image
 12 | from .utils.ocr import OCRError, extract_text_from_image
 13 | from .vision.anthropic import AnthropicVision
 14 | from .vision.openai import OpenAIVision
 15 | 
 16 | # Load environment variables
 17 | load_dotenv()
 18 | 
 19 | # Configure encoding, defaulting to UTF-8
 20 | DEFAULT_ENCODING = "utf-8"
 21 | ENCODING = os.getenv("MCP_OUTPUT_ENCODING", DEFAULT_ENCODING)
 22 | 
 23 | # Configure logging to file
 24 | log_file_path = os.path.join(os.path.dirname(__file__), "mcp_server.log")
 25 | logging.basicConfig(
 26 |     level=os.getenv("LOG_LEVEL", "INFO"),
 27 |     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 28 |     filename=log_file_path,
 29 |     filemode="a",  # Append to log file
 30 | )
 31 | logger = logging.getLogger(__name__)
 32 | 
 33 | logger.info(f"Using encoding: {ENCODING}")
 34 | 
 35 | 
 36 | def sanitize_output(text: str) -> str:
 37 |     """Sanitize output string to replace problematic characters."""
 38 |     if text is None:
 39 |         return ""  # Return empty string for None
 40 |     try:
 41 |         return text.encode(ENCODING, "replace").decode(ENCODING)
 42 |     except Exception as e:
 43 |         logger.error(f"Error during sanitization: {str(e)}", exc_info=True)
 44 |         return text  # Return original text if sanitization fails
 45 | 
 46 | 
 47 | # Create MCP server
 48 | mcp = FastMCP(
 49 |     "mcp-image-recognition",
 50 |     description="MCP server for image recognition using Anthropic and OpenAI vision APIs",
 51 | )
 52 | 
 53 | 
 54 | # Initialize vision clients
 55 | def get_vision_client() -> Union[AnthropicVision, OpenAIVision]:
 56 |     """Get the configured vision client based on environment settings."""
 57 |     provider = os.getenv("VISION_PROVIDER", "anthropic").lower()
 58 | 
 59 |     try:
 60 |         if provider == "anthropic":
 61 |             return AnthropicVision()
 62 |         elif provider == "openai":
 63 |             return OpenAIVision()
 64 |         else:
 65 |             raise ValueError(f"Invalid vision provider: {provider}")
 66 |     except Exception as e:
 67 |         # Try fallback provider if configured
 68 |         fallback = os.getenv("FALLBACK_PROVIDER")
 69 |         if fallback and fallback.lower() != provider:
 70 |             logger.warning(
 71 |                 f"Primary provider failed: {str(e)}. Trying fallback: {fallback}"
 72 |             )
 73 |             if fallback.lower() == "anthropic":
 74 |                 return AnthropicVision()
 75 |             elif fallback.lower() == "openai":
 76 |                 return OpenAIVision()
 77 |         raise
 78 | 
 79 | 
 80 | async def process_image_with_ocr(image_data: str, prompt: str) -> str:
 81 |     """Process image with both vision AI and OCR.
 82 | 
 83 |     Args:
 84 |         image_data: Base64 encoded image data
 85 |         prompt: Prompt for vision AI
 86 | 
 87 |     Returns:
 88 |         str: Combined description from vision AI and OCR
 89 |     """
 90 |     # Get vision AI description
 91 |     client = get_vision_client()
 92 | 
 93 |     # Handle both sync (Anthropic) and async (OpenAI) clients
 94 |     if isinstance(client, OpenAIVision):
 95 |         description = await client.describe_image(image_data, prompt)
 96 |     else:
 97 |         description = client.describe_image(image_data, prompt)
 98 | 
 99 |     # Check for empty or default response
100 |     if not description or description == "No description available.":
101 |         raise ValueError("Vision API returned empty or default response")
102 | 
103 |     # Handle OCR if enabled
104 |     ocr_enabled = os.getenv("ENABLE_OCR", "false").lower() == "true"
105 |     if ocr_enabled:
106 |         try:
107 |             # Convert base64 to PIL Image
108 |             image_bytes = base64.b64decode(image_data)
109 |             image = Image.open(io.BytesIO(image_bytes))
110 | 
111 |             # Extract text with OCR required flag
112 |             if ocr_text := extract_text_from_image(image, ocr_required=True):
113 |                 description += (
114 |                     f"\n\nAdditionally, this is the output of tesseract-ocr: {ocr_text}"
115 |                 )
116 |         except OCRError as e:
117 |             # Propagate OCR errors when OCR is enabled
118 |             logger.error(f"OCR processing failed: {str(e)}")
119 |             raise ValueError(f"OCR Error: {str(e)}")
120 |         except Exception as e:
121 |             logger.error(f"Unexpected error during OCR: {str(e)}")
122 |             raise
123 | 
124 |     return sanitize_output(description)
125 | 
126 | 
127 | @mcp.tool()
128 | async def describe_image(
129 |     image: str, prompt: str = "Please describe this image in detail."
130 | ) -> str:
131 |     """Describe the contents of an image using vision AI.
132 | 
133 |     Args:
134 |         image: Image data and MIME type
135 |         prompt: Optional prompt to use for the description.
136 | 
137 |     Returns:
138 |         str: Detailed description of the image
139 |     """
140 |     try:
141 |         logger.info(f"Processing image description request with prompt: {prompt}")
142 |         logger.debug(f"Image data length: {len(image)}")
143 | 
144 |         # Validate image data
145 |         if not validate_base64_image(image):
146 |             raise ValueError("Invalid base64 image data")
147 | 
148 |         result = await process_image_with_ocr(image, prompt)
149 |         if not result:
150 |             raise ValueError("Received empty response from processing")
151 | 
152 |         logger.info("Successfully processed image")
153 |         return sanitize_output(result)
154 |     except ValueError as e:
155 |         logger.error(f"Input error: {str(e)}")
156 |         raise
157 |     except Exception as e:
158 |         logger.error(f"Error describing image: {str(e)}", exc_info=True)
159 |         raise
160 | 
161 | 
162 | @mcp.tool()
163 | async def describe_image_from_file(
164 |     filepath: str, prompt: str = "Please describe this image in detail."
165 | ) -> str:
166 |     """Describe the contents of an image file using vision AI.
167 | 
168 |     Args:
169 |         filepath: Path to the image file
170 |         prompt: Optional prompt to use for the description.
171 | 
172 |     Returns:
173 |         str: Detailed description of the image
174 |     """
175 |     try:
176 |         logger.info(f"Processing image file: {filepath}")
177 | 
178 |         # Convert image to base64
179 |         image_data, mime_type = image_to_base64(filepath)
180 |         logger.info(f"Successfully converted image to base64. MIME type: {mime_type}")
181 |         logger.debug(f"Base64 data length: {len(image_data)}")
182 | 
183 |         # Use describe_image tool
184 |         result = await describe_image(image=image_data, prompt=prompt)
185 | 
186 |         if not result:
187 |             raise ValueError("Received empty response from processing")
188 | 
189 |         return sanitize_output(result)
190 |     except FileNotFoundError:
191 |         logger.error(f"Image file not found: {filepath}")
192 |         raise
193 |     except ValueError as e:
194 |         logger.error(f"Input error: {str(e)}")
195 |         raise
196 |     except Exception as e:
197 |         logger.error(f"Error processing image file: {str(e)}", exc_info=True)
198 |         raise
199 | 
200 | 
201 | if __name__ == "__main__":
202 |     mcp.run()
203 | 


--------------------------------------------------------------------------------
/src/image_recognition_server/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Utility functions for image handling and processing."""
2 | 
3 | from .image import image_to_base64, validate_base64_image
4 | 
5 | __all__ = ["image_to_base64", "validate_base64_image"]
6 | 


--------------------------------------------------------------------------------
/src/image_recognition_server/utils/image.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import io
 3 | import logging
 4 | from pathlib import Path
 5 | from typing import Tuple
 6 | 
 7 | from PIL import Image, UnidentifiedImageError
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | def image_to_base64(image_path: str) -> Tuple[str, str]:
13 |     """Convert an image file to base64 string and detect its MIME type.
14 | 
15 |     Args:
16 |         image_path: Path to the image file
17 | 
18 |     Returns:
19 |         Tuple of (base64_string, mime_type)
20 | 
21 |     Raises:
22 |         FileNotFoundError: If image file doesn't exist
23 |         ValueError: If file is not a valid image
24 |     """
25 |     path = Path(image_path)
26 |     if not path.exists():
27 |         logger.error(f"Image file not found: {image_path}")
28 |         raise FileNotFoundError(f"Image file not found: {image_path}")
29 | 
30 |     try:
31 |         # Try to open and validate the image
32 |         with Image.open(path) as img:
33 |             # Get image format and convert to MIME type
34 |             format_to_mime = {
35 |                 "JPEG": "image/jpeg",
36 |                 "PNG": "image/png",
37 |                 "GIF": "image/gif",
38 |                 "WEBP": "image/webp",
39 |             }
40 |             mime_type = format_to_mime.get(img.format, "application/octet-stream")
41 |             logger.info(
42 |                 f"Processing image: {image_path}, format: {img.format}, size: {img.size}"
43 |             )
44 | 
45 |             # Convert to base64
46 |             with path.open("rb") as f:
47 |                 base64_data = base64.b64encode(f.read()).decode("utf-8")
48 |                 logger.debug(f"Base64 data length: {len(base64_data)}")
49 | 
50 |             return base64_data, mime_type
51 | 
52 |     except UnidentifiedImageError as e:
53 |         logger.error(f"Invalid image format: {str(e)}")
54 |         raise ValueError(f"Invalid image format: {str(e)}")
55 |     except OSError as e:
56 |         logger.error(f"Failed to read image file: {str(e)}")
57 |         raise ValueError(f"Failed to read image file: {str(e)}")
58 |     except Exception as e:
59 |         logger.error(f"Unexpected error processing image: {str(e)}", exc_info=True)
60 |         raise ValueError(f"Failed to process image: {str(e)}")
61 | 
62 | 
63 | def validate_base64_image(base64_string: str) -> bool:
64 |     """Validate if a string is a valid base64-encoded image.
65 | 
66 |     Args:
67 |         base64_string: The base64 string to validate
68 | 
69 |     Returns:
70 |         bool: True if valid, False otherwise
71 |     """
72 |     try:
73 |         # Try to decode base64
74 |         image_data = base64.b64decode(base64_string)
75 | 
76 |         # Try to open as image
77 |         with Image.open(io.BytesIO(image_data)) as img:
78 |             logger.debug(
79 |                 f"Validated base64 image, format: {img.format}, size: {img.size}"
80 |             )
81 |             return True
82 | 
83 |     except Exception as e:
84 |         logger.warning(f"Invalid base64 image: {str(e)}")
85 |         return False
86 | 


--------------------------------------------------------------------------------
/src/image_recognition_server/utils/ocr.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from typing import Optional
 4 | 
 5 | import pytesseract  # type: ignore
 6 | from PIL import Image
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class OCRError(Exception):
12 |     """Exception raised for OCR-related errors."""
13 | 
14 |     pass
15 | 
16 | 
17 | def extract_text_from_image(
18 |     image: Image.Image, ocr_required: bool = False
19 | ) -> Optional[str]:
20 |     """Extract text from an image using Tesseract OCR.
21 | 
22 |     Args:
23 |         image: PIL Image object to process
24 |         ocr_required: If True, raise error when OCR fails. If False, return None.
25 | 
26 |     Returns:
27 |         Optional[str]: Extracted text if successful, None if Tesseract is not available
28 |                       and ocr_required is False
29 | 
30 |     Raises:
31 |         OCRError: If OCR fails and ocr_required is True
32 |     """
33 |     try:
34 |         # Check if custom tesseract path is set in environment and not empty
35 |         if tesseract_cmd := os.getenv("TESSERACT_CMD"):
36 |             if tesseract_cmd.strip():  # Only set if path is non-empty
37 |                 pytesseract.pytesseract.tesseract_cmd = tesseract_cmd
38 | 
39 |         # Extract text from image
40 |         text = pytesseract.image_to_string(image)
41 | 
42 |         # Clean and validate result
43 |         text = text.strip()
44 |         if text:
45 |             logger.info("Successfully extracted text from image using Tesseract")
46 |             logger.debug(f"Extracted text length: {len(text)}")
47 |             return text
48 |         else:
49 |             logger.info("No text found in image")
50 |             return None
51 | 
52 |     except Exception as e:
53 |         error_msg = f"Failed to extract text using Tesseract: {str(e)}"
54 |         if "not installed" in str(e) or "not in your PATH" in str(e):
55 |             error_msg = (
56 |                 "Tesseract OCR is not installed or not in PATH. "
57 |                 "Please install Tesseract and ensure it's in your system PATH, "
58 |                 "or set TESSERACT_CMD environment variable to the executable path."
59 |             )
60 | 
61 |         logger.warning(error_msg)
62 |         if ocr_required:
63 |             raise OCRError(error_msg)
64 |         return None
65 | 


--------------------------------------------------------------------------------
/src/image_recognition_server/vision/__init__.py:
--------------------------------------------------------------------------------
1 | """Vision API integrations for image recognition."""
2 | 
3 | from .anthropic import AnthropicVision
4 | from .openai import OpenAIVision
5 | 
6 | __all__ = ["AnthropicVision", "OpenAIVision"]
7 | 


--------------------------------------------------------------------------------
/src/image_recognition_server/vision/anthropic.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from typing import Optional
 4 | 
 5 | from anthropic import Anthropic, APIConnectionError, APIError, APITimeoutError
 6 | from anthropic.types import ImageBlockParam, MessageParam, TextBlockParam
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class AnthropicVision:
12 |     def __init__(self, api_key: Optional[str] = None):
13 |         """Initialize Anthropic Vision client.
14 | 
15 |         Args:
16 |             api_key: Optional API key. If not provided, will try to get from environment.
17 |         """
18 |         self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
19 |         if not self.api_key:
20 |             raise ValueError(
21 |                 "Anthropic API key not provided and not found in environment"
22 |             )
23 | 
24 |         self.client = Anthropic(api_key=self.api_key)
25 | 
26 |     def describe_image(
27 |         self,
28 |         image: str,
29 |         prompt: str = "Please describe this image in detail.",
30 |         mime_type="image/png",
31 |     ) -> str:
32 |         """Describe an image using Anthropic's Claude Vision.
33 | 
34 |         Args:
35 |             image: string containing the base64 encoded image.
36 |             prompt: Optional string containing the prompt.
37 | 
38 | 
39 |         Returns:
40 |             str: Description of the image
41 | 
42 |         Raises:
43 |             Exception: If API call fails
44 |         """
45 |         try:
46 | 
47 |             image_block = ImageBlockParam(
48 |                 type="image",
49 |                 source={"type": "base64", "media_type": mime_type, "data": image},
50 |             )
51 | 
52 |             text_block = TextBlockParam(type="text", text=prompt)
53 | 
54 |             messages: list[MessageParam] = [
55 |                 {
56 |                     "role": "user",
57 |                     "content": [image_block, text_block],
58 |                 }
59 |             ]
60 | 
61 |             # Get model from environment, default to claude-3.5-sonnet-beta
62 |             model = os.getenv("ANTHROPIC_MODEL", "claude-3.5-sonnet-beta")
63 | 
64 |             # Make API call
65 |             response = self.client.messages.create(
66 |                 model=model, max_tokens=1024, messages=messages
67 |             )
68 | 
69 |             # Extract text from content blocks
70 |             description = []
71 |             for block in response.content:
72 |                 if hasattr(block, "text"):
73 |                     description.append(block.text)
74 | 
75 |             # Return combined description or default message
76 |             if description:
77 |                 return " ".join(description)
78 |             return "No description available."
79 | 
80 |         except APITimeoutError as e:
81 |             logger.error(f"Anthropic API timeout: {str(e)}")
82 |             raise Exception(f"Request timed out: {str(e)}")
83 |         except APIConnectionError as e:
84 |             logger.error(f"Anthropic API connection error: {str(e)}")
85 |             raise Exception(f"Connection error: {str(e)}")
86 |         except APIError as e:
87 |             logger.error(f"Anthropic API error: {str(e)}")
88 |             raise Exception(f"API error: {str(e)}")
89 |         except Exception as e:
90 |             logger.error(
91 |                 f"Unexpected error in Anthropic Vision: {str(e)}", exc_info=True
92 |             )
93 |             raise Exception(f"Unexpected error: {str(e)}")
94 | 


--------------------------------------------------------------------------------
/src/image_recognition_server/vision/openai.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from typing import Optional
 4 | 
 5 | from openai import (APIConnectionError, APIError, APITimeoutError, AsyncOpenAI,
 6 |                     RateLimitError)
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class OpenAIVision:
12 |     def __init__(self, api_key: Optional[str] = None):
13 |         """Initialize OpenAI Vision client.
14 | 
15 |         Args:
16 |             api_key: Optional API key. If not provided, will try to get from environment.
17 |         """
18 |         self.api_key = api_key or os.getenv("OPENAI_API_KEY")
19 |         if not self.api_key:
20 |             raise ValueError("OpenAI API key not provided and not found in environment")
21 | 
22 |         self.base_url = os.getenv("OPENAI_BASE_URL")
23 |         timeout_value = os.getenv("OPENAI_TIMEOUT", 60)
24 |         self.timeout = float(timeout_value)
25 |         self.client = AsyncOpenAI(
26 |             api_key=self.api_key, base_url=self.base_url, timeout=self.timeout
27 |         )
28 | 
29 |     async def describe_image(
30 |         self,
31 |         image: str,
32 |         prompt: str = "Please describe this image in detail.",
33 |         mime_type="image/png",
34 |     ) -> str:
35 |         """Describe an image using OpenAI's GPT-4 Vision.
36 | 
37 |         Args:
38 |             image: String containing base64 encoded image.
39 |             prompt: String containing the prompt.
40 | 
41 |         Returns:
42 |             str: Description of the image
43 | 
44 |         Raises:
45 |             Exception: If API call fails
46 |         """
47 |         try:
48 |             # Get model from environment, default to gpt-4o-mini
49 |             model = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
50 | 
51 |             # Create message content
52 |             response = await self.client.chat.completions.create(
53 |                 model=model,
54 |                 messages=[
55 |                     {
56 |                         "role": "user",
57 |                         "content": [
58 |                             {
59 |                                 "type": "image_url",
60 |                                 "image_url": {
61 |                                     "url": f"data:{mime_type};base64,{image}"
62 |                                 },
63 |                             },
64 |                             {"type": "text", "text": prompt},
65 |                         ],
66 |                     }
67 |                 ],
68 |                 max_tokens=1024,
69 |             )
70 | 
71 |             # Extract and return description
72 |             return response.choices[0].message.content or "No description available."
73 | 
74 |         except APITimeoutError as e:
75 |             logger.error(f"OpenAI API timeout: {str(e)}")
76 |             raise Exception(f"Request timed out: {str(e)}")
77 |         except APIConnectionError as e:
78 |             logger.error(f"OpenAI API connection error: {str(e)}")
79 |             raise Exception(f"Connection error: {str(e)}")
80 |         except RateLimitError as e:
81 |             logger.error(f"OpenAI API rate limit exceeded: {str(e)}")
82 |             raise Exception(f"Rate limit exceeded: {str(e)}")
83 |         except APIError as e:
84 |             logger.error(f"OpenAI API error: {str(e)}")
85 |             raise Exception(f"API error: {str(e)}")
86 |         except Exception as e:
87 |             logger.error(f"Unexpected error in OpenAI Vision: {str(e)}", exc_info=True)
88 |             raise Exception(f"Unexpected error: {str(e)}")
89 | 


--------------------------------------------------------------------------------
/tests/test_ocr.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from PIL import Image, ImageDraw, ImageFont
 4 | from src.image_recognition_server.utils.ocr import extract_text_from_image, OCRError
 5 | 
 6 | @pytest.fixture
 7 | def text_image():
 8 |     """Create a test image with text."""
 9 |     # Create a larger image with high contrast
10 |     img = Image.new('RGB', (800, 200), color='white')
11 |     d = ImageDraw.Draw(img)
12 |     
13 |     # Create a simple test string that's easier for OCR
14 |     test_string = "TEST"
15 |     
16 |     # Draw text in large, clear font
17 |     d.text((100, 50), test_string, fill='black', font=None)
18 |     return img, test_string
19 | 
20 | 
21 | @pytest.fixture
22 | def empty_image():
23 |     """Create a blank test image."""
24 |     return Image.new('RGB', (100, 100), color='white')
25 | 
26 | def test_basic_text_extraction(text_image):
27 |     """Test extracting text from an image with clear text."""
28 |     img, expected_text = text_image
29 |     result = extract_text_from_image(img)
30 |     assert result is not None
31 |     assert expected_text in result.upper()  # Convert to uppercase for comparison
32 | 
33 | def test_empty_image(empty_image):
34 |     """Test handling of image with no text."""
35 |     result = extract_text_from_image(empty_image)
36 |     assert result is None
37 | 
38 | def test_tesseract_not_available(monkeypatch):
39 |     """Test error handling when Tesseract isn't accessible."""
40 |     # Create a simple test image
41 |     img = Image.new('RGB', (100, 100), color='white')
42 |     
43 |     # Mock pytesseract to raise an error
44 |     def mock_image_to_string(*args, **kwargs):
45 |         raise Exception("tesseract is not installed or it's not in your PATH")
46 |     
47 |     monkeypatch.setattr("pytesseract.image_to_string", mock_image_to_string)
48 |     
49 |     # Test with ocr_required=False
50 |     result = extract_text_from_image(img, ocr_required=False)
51 |     assert result is None
52 |     
53 |     # Test with ocr_required=True
54 |     with pytest.raises(OCRError) as exc_info:
55 |         extract_text_from_image(img, ocr_required=True)
56 |     assert "Tesseract OCR is not installed" in str(exc_info.value)
57 | 
58 | def test_custom_tesseract_path(monkeypatch):
59 |     """Test using custom Tesseract path via env var."""
60 |     custom_path = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
61 |     
62 |     # Mock environment variable
63 |     monkeypatch.setenv("TESSERACT_CMD", custom_path)
64 |     
65 |     # Mock pytesseract to verify the custom path was set
66 |     def mock_image_to_string(*args, **kwargs):
67 |         import pytesseract
68 |         assert pytesseract.pytesseract.tesseract_cmd == custom_path
69 |         return "Hello World"
70 |     
71 |     monkeypatch.setattr("pytesseract.image_to_string", mock_image_to_string)
72 |     
73 |     # Create a simple test image
74 |     img = Image.new('RGB', (100, 100), color='white')
75 |     result = extract_text_from_image(img)
76 |     assert result == "Hello World"
77 | 
78 | def test_ocr_required_flag(monkeypatch):
79 |     """Test both True/False behaviors of ocr_required flag."""
80 |     img = Image.new('RGB', (100, 100), color='white')
81 |     
82 |     def mock_image_to_string(*args, **kwargs):
83 |         return ""  # Simulate no text found
84 |     
85 |     monkeypatch.setattr("pytesseract.image_to_string", mock_image_to_string)
86 |     
87 |     # Test with ocr_required=False (default)
88 |     result = extract_text_from_image(img)
89 |     assert result is None
90 |     
91 |     # Test with ocr_required=True
92 |     result = extract_text_from_image(img, ocr_required=True)
93 |     assert result is None  # Should still be None since empty string is converted to None
94 | 


--------------------------------------------------------------------------------
/tests/test_server.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import os
 3 | from pathlib import Path
 4 | from typing import AsyncGenerator
 5 | 
 6 | import pytest
 7 | import pytest_asyncio
 8 | from mcp import ClientSession, StdioServerParameters, Tool, stdio_client
 9 | 
10 | # Test image (a simple 1x1 pixel PNG)
11 | TEST_IMAGE_DATA = base64.b64encode(
12 |     bytes.fromhex(
13 |         "89504e470d0a1a0a0000000d494844520000000100000001080600000001f15c"
14 |         "4a00000009704859730000000ec400000ec401952b0e1b0000001c4944415478"
15 |         "9c636460606062626060606060600000000000ffff030000060001f5f7e3c000"
16 |         "00000049454e44ae426082"
17 |     )
18 | ).decode()
19 | 
20 | 
21 | @pytest_asyncio.fixture
22 | async def client() -> AsyncGenerator[ClientSession, None]:
23 |     """Create a test client connected to the server."""
24 |     server_params = StdioServerParameters(
25 |         command="python",
26 |         args=["-m", "src.image_recognition_server.server"],
27 |         env={
28 |             "ANTHROPIC_API_KEY": os.getenv("ANTHROPIC_API_KEY", "test_key"),
29 |             "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", "test_key"),
30 |             "VISION_PROVIDER": "anthropic",
31 |             "LOG_LEVEL": "DEBUG",
32 |         },
33 |     )
34 | 
35 |     async with stdio_client(server_params) as (read, write):
36 |         async with ClientSession(read, write) as session:
37 |             await session.initialize()
38 |             yield session
39 | 
40 | 
41 | @pytest.mark.asyncio
42 | async def test_list_tools(client: ClientSession):
43 |     """Test that the server exposes the expected tools."""
44 |     tools: list[Tool] = await client.list_tools()
45 |     tool_names = {tool.name for tool in tools}
46 |     assert "describe_image" in tool_names
47 |     assert "describe_image_from_file" in tool_names
48 | 
49 | 
50 | @pytest.mark.asyncio
51 | async def test_describe_image(client: ClientSession) -> None:
52 |     """Test the describe_image tool with a test image."""
53 |     result = await client.call_tool(
54 |         "describe_image",
55 |         arguments={"image": {"data": TEST_IMAGE_DATA, "mime_type": "image/png"}},
56 |     )
57 |     assert isinstance(result, str)
58 |     assert len(result) > 0
59 | 
60 | 
61 | @pytest.mark.asyncio
62 | async def test_describe_image_from_file(client: ClientSession, tmp_path: Path) -> None:
63 |     """Test the describe_image_from_file tool with a test image file."""
64 |     # Create a test image file
65 |     image_path = tmp_path / "test.png"
66 |     image_data = base64.b64decode(TEST_IMAGE_DATA)
67 |     image_path.write_bytes(image_data)
68 | 
69 |     result = await client.call_tool(
70 |         "describe_image_from_file", arguments={"filepath": str(image_path)}
71 |     )
72 |     assert isinstance(result, str)
73 |     assert len(result) > 0
74 | 
75 | 
76 | @pytest.mark.asyncio
77 | async def test_invalid_image_data(client: ClientSession) -> None:
78 |     """Test that the server handles invalid image data appropriately."""
79 |     with pytest.raises(Exception):
80 |         await client.call_tool(
81 |             "describe_image",
82 |             arguments={"image": {"data": "invalid_base64", "mime_type": "image/png"}},
83 |         )
84 | 
85 | 
86 | @pytest.mark.asyncio
87 | async def test_invalid_file_path(client: ClientSession) -> None:
88 |     """Test that the server handles invalid file paths appropriately."""
89 |     with pytest.raises(Exception):
90 |         await client.call_tool(
91 |             "describe_image_from_file", arguments={"filepath": "/nonexistent/path.png"}
92 |         )
93 | 


--------------------------------------------------------------------------------