├── vlmrun └── hub │ ├── schemas │ ├── __init__.py │ ├── media │ │ └── tv_news.py │ ├── retail │ │ ├── product_catalog.py │ │ └── ecommerce_product_caption.py │ ├── contrib │ │ ├── media │ │ │ ├── nba_game_state.py │ │ │ └── nfl_game_state.py │ │ ├── document │ │ │ ├── india │ │ │ │ ├── pan_card.py │ │ │ │ └── aadhaar_card.py │ │ │ ├── us_passport.py │ │ │ ├── business_card.py │ │ │ ├── bank_check.py │ │ │ ├── request_for_proposal.py │ │ │ └── form_work_order.py │ │ ├── social │ │ │ └── twitter_card.py │ │ ├── accounting │ │ │ └── form_payslip.py │ │ ├── food │ │ │ └── nutrition_facts_label.py │ │ ├── logistics │ │ │ └── bill_of_lading.py │ │ ├── finance │ │ │ └── balance_sheet.py │ │ └── healthcare │ │ │ └── pathology_report.py │ ├── aerospace │ │ └── remote_sensing.py │ ├── healthcare │ │ ├── medical_insurance_card.py │ │ └── hipaa_release.py │ ├── document │ │ ├── us_drivers_license.py │ │ ├── receipt.py │ │ ├── bank_statement.py │ │ ├── resume.py │ │ ├── utility_bill.py │ │ └── invoice.py │ └── accounting │ │ └── w2_form.py │ ├── server │ ├── __init__.py │ ├── app.py │ ├── README.md │ └── routes.py │ ├── version.py │ ├── full-catalog.yaml │ ├── constants.py │ ├── dataset.py │ ├── utils.py │ └── registry.py ├── requirements ├── requirements.extra.txt ├── requirements.build.txt ├── requirements.server.txt ├── requirements.txt └── requirements.test.txt ├── MANIFEST.in ├── tests ├── test_imports.py ├── test_dataset.py ├── test_server.py ├── test_vlmrun.py ├── test_ollama.py ├── conftest.py ├── test_utils.py ├── test_catalog.py ├── test_openai.py ├── benchmarks │ ├── 2025-02-20-bsahane-Qwen2.5-VL-7B-Instruct-Q4_K_M_benxh-instructor-results.md │ ├── 2025-01-10-llama3.2-vision-11b-instructor-results.md │ ├── 2025-01-10-gemini-2.0-flash-exp-instructor-results.md │ └── 2025-01-10-gpt-4o-mini-2024-07-18-instructor-results.md ├── test_registry.py └── test_instructor.py ├── .env.template ├── docs ├── RELEASE.md ├── catalog-spec.yaml ├── CONTRIBUTING.md ├── CONTRIBUTING-SCHEMA.md └── SCHEMA-GUIDELINES.md ├── .github ├── ISSUE_TEMPLATE │ ├── config.yaml │ └── documentation.yaml ├── workflows │ ├── ci.yml │ └── python-publish.yml └── PULL_REQUEST_TEMPLATE │ └── schema-request.yaml ├── .pre-commit-config.yaml ├── makefiles └── Makefile.admin.mk ├── scripts └── bump_version.py ├── Makefile ├── pyproject.toml └── .gitignore /vlmrun/hub/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vlmrun/hub/server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vlmrun/hub/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.35" 2 | -------------------------------------------------------------------------------- /requirements/requirements.extra.txt: -------------------------------------------------------------------------------- 1 | ollama>=0.4.4 2 | -------------------------------------------------------------------------------- /requirements/requirements.build.txt: -------------------------------------------------------------------------------- 1 | build>=1.2.2 2 | twine>=6.0.1 3 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include vlmrun/hub/catalog.yaml 2 | include vlmrun/hub/schemas/contrib/catalog.yaml 3 | -------------------------------------------------------------------------------- /tests/test_imports.py: -------------------------------------------------------------------------------- 1 | def test_standalone_import(): 2 | from vlmrun import hub # noqa: F401 3 | -------------------------------------------------------------------------------- /requirements/requirements.server.txt: -------------------------------------------------------------------------------- 1 | fastapi>=0.115.8 2 | httpx>=0.28.0 3 | uvicorn[standard]>=0.34.0 4 | -------------------------------------------------------------------------------- /vlmrun/hub/full-catalog.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | catalogs: 3 | - ./catalog.yaml 4 | - ./schemas/contrib/catalog.yaml 5 | -------------------------------------------------------------------------------- /.env.template: -------------------------------------------------------------------------------- 1 | OPENAI_BASE_URL=https://api.openai.com/v1 2 | OPENAI_API_KEY= 3 | GEMINI_API_KEY= 4 | VLMRUN_API_KEY= 5 | FIREWORKS_API_KEY= 6 | -------------------------------------------------------------------------------- /docs/RELEASE.md: -------------------------------------------------------------------------------- 1 | # Release 2 | 3 | 1. Checkout the main branch 4 | 2. Create a new tag and push it 5 | 6 | ``` 7 | git tag v0.0.1 8 | git push origin v0.0.1 9 | ``` 10 | -------------------------------------------------------------------------------- /requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types>=0.7.0 2 | datamodel-code-generator>=0.26.1,<=0.39.0 3 | loguru>=0.7.3 4 | Pillow>=10.2.0 5 | pydantic>=2.5,<3 6 | pydantic_core>=2.23.4 7 | pydantic_yaml>=1.2.0 8 | typing_extensions>=4.12.2 9 | -------------------------------------------------------------------------------- /vlmrun/hub/constants.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | VLMRUN_HUB_PATH = Path(__file__).parent 4 | VLMRUN_HUB_CATALOG_PATH = VLMRUN_HUB_PATH / "catalog.yaml" 5 | VLMRUN_HUB_CONTRIB_CATALOG_PATH = VLMRUN_HUB_PATH / "schemas" / "contrib" / "catalog.yaml" 6 | -------------------------------------------------------------------------------- /requirements/requirements.test.txt: -------------------------------------------------------------------------------- 1 | fastapi>=0.115.8 2 | httpx>=0.28.0 3 | instructor>=1.7.0 4 | openai>=1.58.1 5 | pandas>=2.1.1 6 | pre-commit>=4.0.1 7 | pytest>=8.3.4 8 | python-dotenv>=1.0.1 9 | requests>=2.32.3 10 | ruff>=0.2.2 11 | tqdm>=4.67.1 12 | twine>=6.0.1 13 | vlmrun[doc]>=0.1.14 14 | -------------------------------------------------------------------------------- /vlmrun/hub/server/app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | 3 | from vlmrun.hub.server.routes import router 4 | from vlmrun.hub.version import __version__ 5 | 6 | app = FastAPI( 7 | title="VLM Run Hub", 8 | description="API server for VLM Run Hub schema registry", 9 | version=__version__, 10 | docs_url="/docs", 11 | ) 12 | 13 | app.include_router(router, prefix="") 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yaml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Schema Request 4 | about: Suggest a schema or an idea 5 | url: https://www.github.com/vlm-run/vlmrun-hub/discussions/categories/schema-request 6 | - name: Documentation Request 7 | about: Suggest a documentation improvement 8 | url: https://www.github.com/vlm-run/vlmrun-hub/discussions/categories/documentation 9 | -------------------------------------------------------------------------------- /tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | def test_dataset_samples(): 2 | from vlmrun.hub.dataset import VLMRUN_HUB_DATASET 3 | 4 | for domain, sample in VLMRUN_HUB_DATASET.items(): 5 | assert sample.domain == domain, "Domain must match the sample domain" 6 | assert sample.prompt is not None, "Sample prompt must be present" 7 | assert sample.data is not None, "Sample data must be present" 8 | assert sample.response_model is not None, "Sample response model must be present" 9 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/media/tv_news.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class TVNews(BaseModel): 7 | description: Optional[str] = Field(None, description="Description of the scene contents and visual elements") 8 | chyron: Optional[str] = Field( 9 | None, description="Text displayed in the lower third of the screen (chyron/news ticker)" 10 | ) 11 | network: Optional[str] = Field(None, description="Name of the news network broadcasting the content") 12 | reporters: Optional[List[str]] = Field(None, description="List of reporter names appearing in the news broadcast") 13 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/retail/product_catalog.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class ProductCatalog(BaseModel): 7 | description: str = Field( 8 | ..., description="A 2-sentence general visual description of the product embedded as an image." 9 | ) 10 | category: str = Field( 11 | ..., description="One or two-word category of the product (i.e, apparel, accessories, footwear etc)." 12 | ) 13 | season: Literal["fall", "spring", "summer", "winter"] = Field( 14 | ..., description="The season the product is intended for." 15 | ) 16 | gender: Literal["men", "women", "boys", "girls"] = Field( 17 | ..., description="Gender or audience the product is intended for." 18 | ) 19 | -------------------------------------------------------------------------------- /vlmrun/hub/server/README.md: -------------------------------------------------------------------------------- 1 | ### 🌐 Server Usage 2 | 3 | The hub includes a FastAPI server for easy access to schemas: 4 | 5 | ```bash 6 | # Install with server dependencies 7 | pip install "vlmrun-hub[server]" 8 | 9 | # Run the server 10 | uvicorn vlmrun.hub.server.app:app --reload 11 | ``` 12 | 13 | Access the API: 14 | ```python 15 | import requests 16 | 17 | # Get hub info 18 | response = requests.get("http://localhost:8000/info") 19 | print(response.json()) 20 | 21 | # List all domains 22 | response = requests.get("http://localhost:8000/domains") 23 | print(response.json()) 24 | 25 | # Get schema for specific domain 26 | response = requests.post( 27 | "http://localhost:8000/schema", 28 | json={"domain": "document.invoice"} 29 | ) 30 | print(response.json()) 31 | ``` 32 | 33 | API documentation is available at http://localhost:8000/docs 34 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/retail/ecommerce_product_caption.py: -------------------------------------------------------------------------------- 1 | """Schema for retail product captions.""" 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class RetailEcommerceProductCaption(BaseModel): 7 | description: str = Field( 8 | ..., 9 | description="A 2-sentence general visual description of the product embedded as an image.", 10 | ) 11 | rating: int = Field( 12 | ..., 13 | description="The visual rating or appeal of the product between 0 and 100.", 14 | ge=0, 15 | le=100, 16 | ) 17 | name: str = Field(..., description="The name of the product.") 18 | brand: str = Field(..., description="The brand of the product.") 19 | category: str = Field(..., description="The category of the product, e.g. 'Electronics / E-readers'.") 20 | price: str = Field(..., description="The price of the product.") 21 | color: str = Field(..., description="The color of the product.") 22 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autofix_prs: true 3 | autoupdate_commit_msg: '[pre-commit.ci] pre-commit autofix suggestions' 4 | 5 | repos: 6 | - repo: https://github.com/charliermarsh/ruff-pre-commit 7 | rev: 'v0.5.5' 8 | hooks: 9 | - id: ruff 10 | args: ['--fix', '--exit-non-zero-on-fix'] 11 | 12 | - repo: https://github.com/psf/black 13 | rev: 24.3.0 14 | hooks: 15 | - id: black 16 | exclude: notebooks|^tests/test_data$ 17 | args: ['--config=./pyproject.toml'] 18 | 19 | - repo: https://github.com/pre-commit/pre-commit-hooks 20 | rev: v3.1.0 21 | hooks: 22 | - id: check-ast 23 | - id: check-docstring-first 24 | - id: check-json 25 | - id: check-merge-conflict 26 | - id: debug-statements 27 | - id: detect-private-key 28 | - id: end-of-file-fixer 29 | - id: pretty-format-json 30 | - id: trailing-whitespace 31 | - id: check-added-large-files 32 | args: ['--maxkb=100'] 33 | - id: requirements-txt-fixer 34 | -------------------------------------------------------------------------------- /makefiles/Makefile.admin.mk: -------------------------------------------------------------------------------- 1 | VLMRUN_HUB_VERSION := $(shell python -c 'from vlmrun.hub.version import __version__; print(__version__.replace("-", "."))') 2 | PYPI_USERNAME := 3 | PYPI_PASSWORD := 4 | 5 | WHL_GREP_PATTERN := .*\$(VLMRUN_HUB_VERSION).*\.whl 6 | 7 | create-pypi-release-test: 8 | @echo "looking for vlmrun-hub whl file..." 9 | @for file in dist/*; do \ 10 | echo "examining file: $$file"; \ 11 | if [ -f "$$file" ] && echo "$$file" | grep -qE "$(WHL_GREP_PATTERN)"; then \ 12 | echo "Uploading: $$file"; \ 13 | twine upload --repository testpypi "$$file"; \ 14 | fi; \ 15 | done 16 | @echo "Upload completed" 17 | 18 | 19 | create-pypi-release: 20 | @echo "looking for vlmrun-hub whl file..." 21 | @for file in dist/*; do \ 22 | echo "examining file: $$file"; \ 23 | if [ -f "$$file" ] && echo "$$file" | grep -qE "$(WHL_GREP_PATTERN)"; then \ 24 | echo "Uploading: $$file"; \ 25 | twine upload "$$file"; \ 26 | fi; \ 27 | done 28 | @echo "Upload completed" 29 | 30 | create-tag: 31 | git tag -a ${VLMRUN_HUB_VERSION} -m "Release ${VLMRUN_HUB_VERSION}" 32 | git push origin ${VLMRUN_HUB_VERSION} 33 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/media/nba_game_state.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class TeamInfo(BaseModel): 7 | name: Optional[str] = Field(None, description="Name of the team") 8 | score: Optional[int] = Field(None, description="Current score of the team") 9 | 10 | 11 | class NBAGameState(BaseModel): 12 | description: Optional[str] = Field(None, description="Text description of the current game state") 13 | teams: Optional[List[TeamInfo]] = Field(None, description="List of teams playing in the game") 14 | status: Optional[str] = Field(None, description="Current status of the game, e.g., 'in_progress', 'final'") 15 | quarter: Optional[int] = Field(None, description="Current quarter of the game (1-4, or 5+ for overtime)") 16 | clock_time: Optional[str] = Field(None, description="Time remaining in the current quarter, e.g., '9:09'") 17 | shot_clock: Optional[int] = Field(None, description="Shot clock time remaining") 18 | network: Optional[str] = Field(None, description="TV network broadcasting the game") 19 | is_shown: Optional[bool] = Field(None, description="Whether the game is currently being shown") 20 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/document/india/pan_card.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class PANCard(BaseModel): 7 | """PAN (Permanent Account Number) Card schema for extracting information from India's tax identity document.""" 8 | 9 | # Core PAN information 10 | pan_number: str = Field(..., description="10-character alphanumeric PAN (Permanent Account Number)") 11 | name: str = Field(..., description="Full name of the PAN card holder") 12 | father_name: Optional[str] = Field(None, description="Father's name as printed on the PAN card") 13 | date_of_birth: Optional[str] = Field(None, description="Date of birth of the PAN card holder") 14 | 15 | # Security features and visual elements 16 | has_photo: Optional[bool] = Field(None, description="Whether the card has a photo of the holder") 17 | has_signature: Optional[bool] = Field(None, description="Whether the card has a signature") 18 | has_income_tax_logo: Optional[bool] = Field(None, description="Whether the Income Tax Department logo is visible") 19 | has_govt_of_india_text: Optional[bool] = Field( 20 | None, description="Whether 'GOVT. OF INDIA' or similar text is visible" 21 | ) 22 | 23 | # Additional information 24 | languages: List[str] = Field( 25 | default_factory=list, 26 | description="Languages in which the card information is printed (e.g., Hindi, English, etc.)", 27 | ) 28 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/media/nfl_game_state.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class TeamInfo(BaseModel): 7 | name: Optional[str] = Field(None, description="Name of the team") 8 | score: Optional[int] = Field(None, description="Current score of the team") 9 | 10 | 11 | class NFLGameState(BaseModel): 12 | description: Optional[str] = Field(None, description="Text description of the current game state") 13 | teams: Optional[List[TeamInfo]] = Field(None, description="List of teams playing in the game") 14 | status: Optional[str] = Field(None, description="Current status of the game, e.g., 'in_progress', 'final'") 15 | quarter: Optional[int] = Field(None, description="Current quarter of the game (1-4, or 5 for overtime)") 16 | clock_time: Optional[str] = Field(None, description="Time remaining in the current quarter, e.g., '14:56'") 17 | possession_team: Optional[str] = Field(None, description="Name of the team currently in possession") 18 | down: Optional[str] = Field(None, description="Current down (1st, 2nd, 3rd, 4th)") 19 | distance: Optional[int] = Field(None, description="Yards needed for first down") 20 | yard_line: Optional[int] = Field(None, description="Current yard line position") 21 | network: Optional[str] = Field(None, description="TV network broadcasting the game") 22 | is_shown: Optional[bool] = Field(None, description="Whether the game is currently being shown") 23 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/document/us_passport.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class USPassport(BaseModel): 8 | """US Passport schema for extracting information from passport documents.""" 9 | 10 | family_name: str = Field(..., description="Family name (surname) of the passport holder") 11 | given_names: str = Field(..., description="Given names (first and middle names) of the passport holder") 12 | document_id: str = Field(..., description="Passport document identification number") 13 | expiration_date: date = Field(..., description="Expiration date of the passport") 14 | date_of_birth: date = Field(..., description="Date of birth of the passport holder") 15 | issue_date: date = Field(..., description="Issue date of the passport") 16 | mrz_code: str = Field(..., description="Machine Readable Zone (MRZ) code from the passport") 17 | 18 | # Additional optional fields that might be present 19 | nationality: Optional[str] = Field(None, description="Nationality of the passport holder") 20 | place_of_birth: Optional[str] = Field(None, description="Place of birth of the passport holder") 21 | sex: Optional[str] = Field(None, description="Sex of the passport holder (M/F)") 22 | authority: Optional[str] = Field(None, description="Issuing authority of the passport") 23 | place_of_issue: Optional[str] = Field(None, description="Place where the passport was issued") 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation.yaml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | description: Report an issue or suggest improvements for VLM Run Hub documentation 3 | title: "DOC: " 4 | labels: [documentation] 5 | 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: > 10 | Thank you for helping improve the VLM Run Hub documentation! 11 | 12 | This template is specifically for: 13 | - Reporting documentation errors or unclear content 14 | - Suggesting new documentation topics 15 | - Improving existing documentation 16 | 17 | **Helpful Resources:** 18 | - [Contribution Guide](./docs/CONTRIBUTING.md) 19 | 20 | - type: dropdown 21 | id: doc_type 22 | attributes: 23 | label: Documentation Type 24 | description: What type of documentation issue are you reporting? 25 | options: 26 | - General Improvements / Fixes 27 | - Usage Guides / Examples 28 | - Other 29 | validations: 30 | required: true 31 | 32 | - type: textarea 33 | attributes: 34 | label: Issue Description and Proposed Changes 35 | description: > 36 | Please describe the issue with the current documentation and your suggested changes. Include: 37 | placeholder: | 38 | E.g.: 39 | 40 | In the CustomerInvoice schema documentation: 41 | - The `status` field description is unclear about valid enum values 42 | - Missing example for nested Item model 43 | 44 | validations: 45 | required: true 46 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Main CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | workflow_call: 8 | 9 | jobs: 10 | test: 11 | name: Test Python ${{ matrix.python-version }} 12 | runs-on: ubuntu-latest 13 | timeout-minutes: 20 14 | environment: dev 15 | strategy: 16 | matrix: 17 | python-version: ['3.9', '3.10', '3.11', '3.12'] 18 | fail-fast: false 19 | defaults: 20 | run: 21 | shell: bash -el {0} 22 | env: 23 | CACHE_NUMBER: 0 24 | 25 | steps: 26 | - name: Checkout git repo 27 | uses: actions/checkout@v3 28 | 29 | - uses: actions/setup-python@v5 30 | with: 31 | python-version: ${{ matrix.python-version }} 32 | 33 | - uses: actions/cache@v4 34 | with: 35 | path: ~/.cache/pip 36 | key: pip-${{ hashFiles('requirements/requirements*.txt') }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('MANIFEST.in') }}-${{ env.CACHE_NUMBER }} 37 | restore-keys: | 38 | pip- 39 | 40 | - name: Install dependencies 41 | run: | 42 | which python 43 | python -m pip install --upgrade pip 44 | pip install -e '.[test]' 45 | if: steps.cache.outputs.cache-hit != 'true' 46 | 47 | - name: Quality Check 48 | uses: pre-commit/action@v3.0.1 49 | continue-on-error: true 50 | 51 | - name: Run tests 52 | env: 53 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 54 | run: | 55 | make test 56 | -------------------------------------------------------------------------------- /tests/test_server.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from fastapi.testclient import TestClient 3 | 4 | from vlmrun.hub.server.app import app 5 | from vlmrun.hub.version import __version__ 6 | 7 | 8 | @pytest.fixture 9 | def client(): 10 | return TestClient(app) 11 | 12 | 13 | def test_info(client): 14 | response = client.get("/info") 15 | assert response.status_code == 200 16 | data = response.json() 17 | assert data["version"] == __version__ 18 | 19 | 20 | def test_list_domains(client): 21 | response = client.get("/domains") 22 | assert response.status_code == 200 23 | domains = response.json() 24 | assert isinstance(domains, list) 25 | assert len(domains) > 0 26 | assert all(isinstance(d["domain"], str) for d in domains) 27 | 28 | 29 | def test_has_domain(client): 30 | response = client.get("/domains/document.invoice") 31 | assert response.status_code == 200 32 | assert response.json() is True 33 | 34 | response = client.get("/domains/invalid.domain") 35 | assert response.status_code == 200 36 | assert response.json() is False 37 | 38 | 39 | def test_get_schema_valid_domain(client): 40 | response = client.post("/schema", json={"domain": "document.invoice"}) 41 | assert response.status_code == 200 42 | data = response.json() 43 | assert "json_schema" in data 44 | assert "schema_version" in data 45 | assert "schema_hash" in data 46 | 47 | 48 | def test_get_schema_invalid_domain(client): 49 | response = client.post("/schema", json={"domain": "invalid.domain"}) 50 | assert response.status_code == 404 51 | -------------------------------------------------------------------------------- /scripts/bump_version.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import Path 3 | 4 | 5 | def bump_version(version_file: str, bump_type: str = "patch") -> str: 6 | """ 7 | Bump the version number in the specified file. 8 | 9 | Args: 10 | version_file: Path to the version file 11 | bump_type: One of 'major', 'minor', or 'patch' 12 | 13 | Returns: 14 | The new version string 15 | """ 16 | version_path = Path(version_file) 17 | content = version_path.read_text() 18 | 19 | # Extract current version 20 | version_match = re.search(r'__version__ = ["\']([^"\']+)["\']', content) 21 | if not version_match: 22 | raise ValueError("Could not find version string") 23 | 24 | current_version = version_match.group(1) 25 | major, minor, patch = map(int, current_version.split(".")) 26 | 27 | # Bump version according to type 28 | if bump_type == "major": 29 | major += 1 30 | minor = 0 31 | patch = 0 32 | elif bump_type == "minor": 33 | minor += 1 34 | patch = 0 35 | elif bump_type == "patch": 36 | patch += 1 37 | else: 38 | raise ValueError(f"Invalid bump type: {bump_type}") 39 | 40 | new_version = f"{major}.{minor}.{patch}" 41 | 42 | # Update the file 43 | new_content = content.replace(f'__version__ = "{current_version}"', f'__version__ = "{new_version}"') 44 | version_path.write_text(new_content) 45 | 46 | return new_version 47 | 48 | 49 | if __name__ == "__main__": 50 | import sys 51 | 52 | version_file = sys.argv[1] 53 | bump_type = sys.argv[2] if len(sys.argv) > 2 else "patch" 54 | new_version = bump_version(version_file, bump_type) 55 | print(new_version) 56 | -------------------------------------------------------------------------------- /tests/test_vlmrun.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import pytest 5 | import requests 6 | from dotenv import load_dotenv 7 | from loguru import logger 8 | 9 | from vlmrun.common.image import encode_image 10 | from vlmrun.common.utils import remote_image 11 | from vlmrun.hub.schemas.document.invoice import Invoice 12 | 13 | load_dotenv() 14 | 15 | 16 | VLMRUN_API_KEY = os.getenv("VLMRUN_API_KEY", None) 17 | VLMRUN_BASE_URL = os.getenv("VLMRUN_BASE_URL", None) 18 | 19 | pytestmark = pytest.mark.skipif(not VLMRUN_API_KEY, reason="This test requires VLMRUN_API_KEY to be set") 20 | 21 | 22 | def test_vlmrun_invoice(): 23 | invoice_url = "https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg" 24 | invoice_image = remote_image(invoice_url) 25 | domain = "document.invoice" 26 | 27 | json_data = { 28 | "file_id": invoice_url, 29 | "images": [encode_image(invoice_image, format="JPEG")], 30 | "json_schema": Invoice.model_json_schema(), 31 | "model": "vlm-1", 32 | "domain": domain, 33 | } 34 | 35 | response = requests.post( 36 | f"{VLMRUN_BASE_URL}/v1/image/generate", 37 | json=json_data, 38 | headers={"Authorization": f"Bearer {VLMRUN_API_KEY}"}, 39 | ) 40 | assert response.status_code == 201, f"Response failed: {response.text}" 41 | json_response = response.json() 42 | assert isinstance(json_response, dict), "Expected a dict response" 43 | assert "response" in json_response, "Failed to fetch 'response' key" 44 | logger.debug(f"Document: {json.dumps(json_response, indent=2)}") 45 | 46 | invoice_data = json_response["response"] 47 | invoice = Invoice.model_validate_json(json.dumps(invoice_data)) 48 | logger.debug(invoice) 49 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/schema-request.yaml: -------------------------------------------------------------------------------- 1 | name: Schema Request 2 | description: Request a new schema or suggest updates to an existing schema 3 | title: "schema: New schema for `/`" 4 | labels: [schema] 5 | 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: > 10 | Thank you for contributing to the VLM Run Hub schema! 11 | 12 | This template is specifically for: 13 | - Requesting new schemas 14 | - Suggesting updates to existing schemas 15 | 16 | - type: dropdown 17 | id: schema_action 18 | attributes: 19 | label: Schema Action 20 | description: What action are you requesting? 21 | options: 22 | - New Schema 23 | - Update Existing Schema 24 | validations: 25 | required: true 26 | 27 | - type: textarea 28 | attributes: 29 | label: Schema Motivation 30 | description: > 31 | Please provide a motivation for the new schema or the updates needed for an existing schema. Include: 32 | placeholder: | 33 | E.g.: 34 | 35 | New Schema Request: 36 | - Name: CustomerInvoice 37 | - Fields: id, status, amount, date 38 | 39 | Update Existing Schema: 40 | - Schema: CustomerInvoice 41 | - Changes: Add `due_date` field 42 | 43 | - type: url 44 | attributes: 45 | label: Sample Image 46 | description: Link to an example image to run the schema on 47 | placeholder: "https://example.com/image.png" 48 | optional: true 49 | 50 | - type: textarea 51 | attributes: 52 | label: Sample JSON Output 53 | description: The JSON output of the structured extraction from the sample image 54 | placeholder: | 55 | ```json 56 | { 57 | "id": "123", 58 | "status": "paid", 59 | "amount": 100, 60 | "date": "2024-01-01" 61 | } 62 | ``` 63 | -------------------------------------------------------------------------------- /docs/catalog-spec.yaml: -------------------------------------------------------------------------------- 1 | # Catalog Specification Guidelines 2 | 3 | # Define the API version for the catalog 4 | apiVersion: v1 5 | 6 | # List of schemas to be included in the catalog 7 | schemas: 8 | # Each schema entry should define a unique domain, typically hyphenated 9 | # The format is . or . for industry-specific schemas 10 | - domain: # Specify the domain name, e.g., document.utility-bill 11 | 12 | # Define the schema path using the format: vlmrun.hub.schemas... 13 | # Note that this is the fully qualified path to the schema class, and contains underscores instead of hyphens 14 | schema: # e.g., vlmrun.hub.schemas.document.utility_bill.UtilityBill 15 | 16 | # Provide a prompt that describes the full prompt to use for the call into the VLM. 17 | prompt: # e.g., "You are a detail-oriented invoice analyst..." 18 | 19 | # Give a brief description or motivation for the schema 20 | description: # e.g., "Comprehensive invoice data extraction system..." 21 | 22 | # Provide a publicly accessible URL to sample data for testing and reference 23 | sample_data: # e.g., "https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg" 24 | 25 | # Metadata section for additional information 26 | metadata: 27 | # List the types of inputs supported by the schema 28 | # Currently, only `image`, `video`, and `document` are supported. 29 | supported_inputs: # e.g., ["image", "document"] 30 | 31 | # Add relevant tags for categorization and searchability in the catalog 32 | tags: # e.g., ["document", "finance"] 33 | 34 | # Note: Ensure each schema entry is complete and follows the structure above. 35 | # Use consistent naming conventions and provide meaningful descriptions. 36 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | default: help; 2 | 3 | help: 4 | @echo "🔥 Official VLM Run Hub" 5 | @echo "" 6 | @echo "Usage: make " 7 | @echo "" 8 | @echo "Targets:" 9 | @echo " clean Remove all build, test, coverage and Python artifacts" 10 | @echo " clean-build Remove build artifacts" 11 | @echo " clean-pyc Remove Python file artifacts" 12 | @echo " clean-test Remove test and coverage artifacts" 13 | @echo " lint Format source code automatically" 14 | @echo " test Basic testing" 15 | @echo " dist Builds source and wheel package" 16 | @echo "" 17 | 18 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 19 | 20 | clean-build: ## remove build artifacts 21 | rm -fr build/ 22 | rm -fr dist/ 23 | rm -fr .eggs/ 24 | rm -fr site/ 25 | find . -name '*.egg-info' -exec rm -fr {} + 26 | find . -name '*.egg' -exec rm -f {} + 27 | 28 | 29 | clean-pyc: ## remove Python file artifacts 30 | find . -name '*.pyc' -exec rm -f {} + 31 | find . -name '*.pyo' -exec rm -f {} + 32 | find . -name '*~' -exec rm -f {} + 33 | find . -name '__pycache__' -exec rm -fr {} + 34 | 35 | clean-test: ## remove test and coverage artifacts 36 | rm -fr .tox/ 37 | rm -f .coverage 38 | rm -fr htmlcov/ 39 | rm -fr .pytest_cache 40 | 41 | lint: ## Format source code automatically 42 | pre-commit run --all-files # Uses pyproject.toml 43 | 44 | test: ## Basic CPU testing 45 | pytest -sv tests 46 | 47 | test-benchmark: ## Benchmark testing 48 | pytest -sv tests -m benchmark 49 | 50 | dist: clean ## builds source and wheel package 51 | python -m build --sdist --wheel 52 | ls -lh dist 53 | 54 | bump-version: 55 | python scripts/bump_version.py vlmrun/hub/version.py patch 56 | 57 | generate-benchmark-html: 58 | pytest -sv tests/test_instructor.py -m benchmark 59 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/document/business_card.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from pydantic import BaseModel, Field 3 | 4 | 5 | class Address(BaseModel): 6 | """Address information that may be present on a business card.""" 7 | street: Optional[str] = Field(None, description="Street address") 8 | city: Optional[str] = Field(None, description="City") 9 | state: Optional[str] = Field(None, description="State or province") 10 | postal_code: Optional[str] = Field(None, description="Postal or ZIP code") 11 | country: Optional[str] = Field(None, description="Country") 12 | 13 | 14 | class BusinessCard(BaseModel): 15 | """Business card schema for extracting information from business card images or documents.""" 16 | 17 | # Personal information 18 | name: Optional[str] = Field(None, description="Full name of the person on the business card") 19 | job_title: Optional[str] = Field(None, description="Job title or position of the person") 20 | 21 | # Company information 22 | company_name: Optional[str] = Field(None, description="Name of the company or organization") 23 | 24 | # Contact information 25 | phone: Optional[str] = Field(None, description="Phone number, may include country code and formatting") 26 | email: Optional[str] = Field(None, description="Email address") 27 | website: Optional[str] = Field(None, description="Website URL") 28 | address: Optional[Address] = Field(None, description="Physical address information") 29 | 30 | # Visual elements 31 | has_logo: Optional[bool] = Field(None, description="Indicates if the business card has a company logo") 32 | has_photo: Optional[bool] = Field(None, description="Indicates if the business card has a photo of the person") 33 | 34 | # Additional information 35 | social_media: Optional[dict] = Field(None, description="Dictionary of social media handles or URLs") 36 | additional_info: Optional[str] = Field(None, description="Any additional information present on the card") 37 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/aerospace/remote_sensing.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class RemoteSensingCategory(str, Enum): 8 | airport = "airport" 9 | baseball_field = "baseball-field" 10 | beach = "beach" 11 | bridge = "bridge" 12 | cemetery = "cemetery" 13 | commercial_area = "commercial-area" 14 | dam = "dam" 15 | desert = "desert" 16 | factory = "factory" 17 | farmlands = "farmlands" 18 | forest = "forest" 19 | golf_course = "golf-course" 20 | greenhouse = "greenhouse" 21 | hospital = "hospital" 22 | industrial_area = "industrial-area" 23 | lake = "lake" 24 | landfill = "landfill" 25 | military_base = "military-base" 26 | mining_site = "mining-site" 27 | mountain = "mountain" 28 | oil_field = "oil-field" 29 | other = "other" 30 | park = "park" 31 | parking_lot = "parking-lot" 32 | port = "port" 33 | power_plant = "power-plant" 34 | quarry = "quarry" 35 | railway_station = "railway-station" 36 | residential_area = "residential-area" 37 | resort = "resort" 38 | river = "river" 39 | runway = "runway" 40 | school_campus = "school-campus" 41 | shopping_mall = "shopping-mall" 42 | solar_farm = "solar-farm" 43 | stadium = "stadium" 44 | storage_tanks = "storage-tanks" 45 | vineyard = "vineyard" 46 | water_treatment = "water-treatment" 47 | wetland = "wetland" 48 | wind_farm = "wind-farm" 49 | 50 | 51 | class RemoteSensing(BaseModel): 52 | description: Optional[str] = Field(None, description="2-3 sentence description of the satellite image.") 53 | objects: Optional[List[str]] = Field(None, description="List of unique objects in the scene") 54 | categories: Optional[List[RemoteSensingCategory]] = Field( 55 | None, description="List of categories that pertain to the scene." 56 | ) 57 | is_visible: Optional[bool] = Field( 58 | None, description="Whether the land mass is visible from space, or if it is obscured by clouds." 59 | ) 60 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to VLM Run Hub 2 | 3 | We welcome contributions to the VLM Run Hub! Whether you're reporting bugs, suggesting features, or contributing code, your input is valuable to us. 4 | 5 | ## Reporting Bugs and Asking Questions 6 | 7 | - **GitHub Issues**: Use the [GitHub Issues](https://github.com/vlm-run-hub/issues) to report bugs or request features. 8 | - **Discussions**: Join our [Discord forum](https://discord.gg/4jgyECY4rq) for general questions and discussions. 9 | 10 | ## Contributing Code 11 | 12 | 1. **Fork the Repository**: Start by forking the repository and cloning it to your local machine. 13 | 14 | 2. **Set Up Your Development Environment**: Follow the instructions in the `README.md` to set up your development environment. 15 | 16 | 3. **Create a Branch**: Create a new branch for your feature or bug fix. 17 | 18 | 4. **Write Tests**: Ensure your code is well-tested. We use `pytest` for testing. Use `make test` to run all the tests. 19 | 20 | 5. **Submit a Pull Request**: Once your changes are ready, submit a pull request. Make sure to follow the [Schema Guidelines](./SCHEMA-GUIDELINES.md) if your contribution involves Pydantic schema changes. 21 | 22 | ## Schema Contributions 23 | 24 | For contributions involving Pydantic schemas, please refer to the [Schema Guidelines](./SCHEMA-GUIDELINES.md) for detailed instructions on creating and submitting schemas. 25 | 26 | ## Review Process 27 | 28 | - **For Members**: Assign a reviewer to your pull request. Address any feedback and ensure all tests pass before merging. 29 | - **For Non-Members**: A project member will be assigned to review your pull request. Please address their feedback promptly. 30 | 31 | ## PR Checklist 32 | 33 | Before submitting your changes, ensure: 34 | 35 | - Make any relevant changes to the repository. 36 | - Run `make lint` to ensure your code is linted. 37 | - Add any relevant tests under `tests/`, and run `make test` to ensure all tests pass. 38 | - If you are contributing a new schema, follow the [Contributing Schemas](./CONTRIBUTING-SCHEMA.md) instead of the general contributing guidelines. 39 | 40 | Thank you for helping us maintain high standards for schema contributions! 41 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/healthcare/medical_insurance_card.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class ProviderService(BaseModel): 7 | provider_service_number: Optional[str] = Field(None, description="Provider service number.") 8 | precertification_number: Optional[str] = Field(None, description="Precertification number.") 9 | 10 | 11 | class MemberInformation(BaseModel): 12 | member_name: str = Field(..., description="Name of the member.") 13 | member_id: Optional[str] = Field(None, description="Member ID.") 14 | group_number: Optional[str] = Field(None, description="Group number.") 15 | 16 | 17 | class PharmacyPlan(BaseModel): 18 | rx_bin: Optional[str] = Field(None, description="Rx bin.") 19 | rx_pcn: Optional[str] = Field(None, description="Rx pcn.") 20 | rx_grp: Optional[str] = Field(None, description="Rx grp.") 21 | pharmacy_help_desk: Optional[str] = Field(None, description="Pharmacy help desk.") 22 | 23 | 24 | class InsuranceProvider(BaseModel): 25 | provider_name: Optional[str] = Field(None, description="Provider name.") 26 | network: Optional[str] = Field(None, description="Network.") 27 | 28 | 29 | class Coverage(BaseModel): 30 | office_visit: Optional[str] = Field(None, description="Office visit.") 31 | specialist_visit: Optional[str] = Field(None, description="Specialist visit.") 32 | urgent_care: Optional[str] = Field(None, description="Urgent care.") 33 | emergency_room: Optional[str] = Field(None, description="Emergency room.") 34 | inpatient_hospital: Optional[str] = Field(None, description="Inpatient hospital.") 35 | 36 | 37 | class MedicalInsuranceCard(BaseModel): 38 | provider_service: Optional[ProviderService] = Field(None, description="Provider service information.") 39 | member_information: Optional[MemberInformation] = Field(None, description="Member information.") 40 | pharmacy_plan: Optional[PharmacyPlan] = Field(None, description="Pharmacy plan information.") 41 | insurance_provider: Optional[InsuranceProvider] = Field(None, description="Insurance provider information.") 42 | coverage: Optional[Coverage] = Field(None, description="Coverage information.") 43 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/document/us_drivers_license.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class Gender(str, Enum): 8 | MALE = "M" 9 | FEMALE = "F" 10 | OTHER = "X" 11 | 12 | 13 | class LicenseClass(str, Enum): 14 | A = "A" 15 | B = "B" 16 | C = "C" 17 | D = "D" 18 | E = "E" 19 | F = "F" 20 | G = "G" 21 | M = "M" 22 | 23 | 24 | class Address(BaseModel): 25 | street: str = Field(..., description="Street address") 26 | city: str = Field(..., description="City") 27 | state: str = Field(..., description="Two-letter state code") 28 | zip_code: str = Field(..., description="ZIP code") 29 | 30 | 31 | class USDriversLicense(BaseModel): 32 | issuing_state: str = Field(..., description="Two-letter code of the issuing state") 33 | license_number: str = Field(..., description="Driver's license number") 34 | 35 | full_name: str = Field(..., description="Full name of the license holder") 36 | first_name: Optional[str] = Field(None, description="First name of the license holder") 37 | middle_name: Optional[str] = Field(None, description="Middle name of the license holder") 38 | last_name: Optional[str] = Field(None, description="Last name of the license holder") 39 | 40 | address: Address = Field(..., description="Address of the license holder") 41 | 42 | date_of_birth: str = Field(..., description="Date of birth") 43 | gender: Gender = Field(..., description="Gender of the license holder") 44 | 45 | height: Optional[str] = Field(None, description="Height of the license holder in the format X'Y\" (e.g. 5'7\")") 46 | weight: Optional[float] = Field(None, description="Weight (in lbs) of the license holder (e.g. 150.5 lbs)") 47 | eye_color: Optional[str] = Field(None, description="Eye color code of the license holder") 48 | hair_color: Optional[str] = Field(None, description="Hair color code of the license holder") 49 | 50 | issue_date: str = Field(..., description="Date the license was issued") 51 | expiration_date: str = Field(..., description="Expiration date of the license") 52 | 53 | license_class: LicenseClass = Field(..., description="Class of the driver's license") 54 | 55 | donor: Optional[bool] = Field(None, description="Indicates if the holder is an organ donor") 56 | veteran: Optional[bool] = Field(None, description="Indicates if the holder is a veteran") 57 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/social/twitter_card.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import List, Literal, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class User(BaseModel): 8 | """Twitter user information.""" 9 | 10 | username: str = Field(..., description="The Twitter handle of the user.") 11 | display_name: str = Field(..., description="The display name of the user.") 12 | 13 | 14 | class Media(BaseModel): 15 | """Media content attached to a tweet.""" 16 | 17 | description: Optional[str] = Field(default=None, description="A description of the media content linked.") 18 | type: Optional[Literal["image", "video", "url"]] = Field( 19 | default=None, description="The type of media (image, video, etc.)." 20 | ) 21 | 22 | 23 | class Tweet(BaseModel): 24 | """Individual tweet information including content and engagement metrics.""" 25 | 26 | content: Optional[str] = Field(default=None, description="The text content of the tweet.") 27 | created_at: Optional[date] = Field(default=None, description="The timestamp when the tweet was created.") 28 | user: Optional[User] = Field(default=None, description="The user who posted the tweet.") 29 | media: Optional[List[Media]] = Field(default=None, description="List of media items attached to the tweet, if any.") 30 | retweet_count: Optional[int] = Field( 31 | default=None, description="The approximate number of times this tweet has been retweeted." 32 | ) 33 | like_count: Optional[int] = Field( 34 | default=None, description="The approximate number of likes this tweet has received (icon is a heart)." 35 | ) 36 | reply_count: Optional[int] = Field( 37 | default=None, description="The approximate number of replies to this tweet (icon is a reply arrow)." 38 | ) 39 | view_count: Optional[int] = Field( 40 | default=None, 41 | description="The approximate number of views this tweet has received (icon is a vertical bar chart).", 42 | ) 43 | quote_count: Optional[int] = Field( 44 | default=None, description="The approximate number of times this tweet has been quoted." 45 | ) 46 | 47 | 48 | class TwitterCard(BaseModel): 49 | """A Twitter card containing tweet information and any quoted tweets.""" 50 | 51 | tweet: Tweet = Field(..., description="The main tweet content and metadata.") 52 | quoted_tweet: Optional[Tweet] = Field(default=None, description="A tweet that is quoted by the main tweet, if any.") 53 | -------------------------------------------------------------------------------- /tests/test_ollama.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | import os 3 | from typing import Type 4 | 5 | import pytest 6 | import requests 7 | from conftest import BenchmarkResult, create_benchmark 8 | from dotenv import load_dotenv 9 | from loguru import logger 10 | from pydantic import BaseModel 11 | 12 | from vlmrun.hub.dataset import VLMRUN_HUB_DATASET 13 | 14 | load_dotenv() 15 | 16 | OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") 17 | 18 | 19 | @pytest.mark.benchmark 20 | @pytest.mark.skipif(not importlib.util.find_spec("ollama"), reason="Ollama is not installed") 21 | def test_local_ollama(): 22 | from ollama import chat 23 | 24 | from vlmrun.common.image import encode_image 25 | 26 | try: 27 | requests.get(f"{OLLAMA_BASE_URL}/api/version") 28 | except requests.exceptions.ConnectionError: 29 | pytest.skip("Ollama server is not running") 30 | 31 | results = [] 32 | model = "bsahane/Qwen2.5-VL-7B-Instruct:Q4_K_M_benxh" # "llama3.2-vision:11b", 33 | for sample in VLMRUN_HUB_DATASET.values(): 34 | response_model: Type[BaseModel] = sample.response_model 35 | try: 36 | chat_response = chat( 37 | model=model, 38 | format=response_model.model_json_schema(), # Pass in the schema for the response 39 | messages=[ 40 | { 41 | "role": "user", 42 | "content": sample.prompt, 43 | "images": [encode_image(img, format="JPEG").split(",")[1] for img in sample.images], 44 | }, 45 | ], 46 | options={"temperature": 0}, # Set temperature to 0 for more deterministic output 47 | ) 48 | response: Type[BaseModel] = response_model.model_validate_json(chat_response.message.content) 49 | except Exception as e: 50 | response = None 51 | logger.error(f"Error processing sample {sample.domain}: {e}") 52 | 53 | results.append( 54 | BenchmarkResult( 55 | domain=sample.domain, 56 | sample=sample.data, 57 | response_model=sample.response_model.__name__, 58 | response_json=response.model_dump_json(indent=2, exclude_none=False) if response else None, 59 | ) 60 | ) 61 | if response: 62 | logger.debug(response.model_dump_json(indent=2)) 63 | 64 | create_benchmark(results, model, suffix="ollama") 65 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/document/bank_check.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class Address(BaseModel): 8 | street: Optional[str] = Field(None, description="Street address") 9 | city: Optional[str] = Field(None, description="City") 10 | state: Optional[str] = Field(None, description="State") 11 | postal_code: Optional[str] = Field(None, description="Postal code") 12 | country: Optional[str] = Field(None, description="Country") 13 | 14 | 15 | class BankInformation(BaseModel): 16 | name: Optional[str] = Field(None, description="Name of the bank") 17 | address: Optional[Address] = Field(None, description="Address of the bank") 18 | routing_number: Optional[str] = Field(None, description="Bank routing number") 19 | account_number: Optional[str] = Field(None, description="Bank account number") 20 | 21 | 22 | class BankCheck(BaseModel): 23 | check_number: Optional[str] = Field(None, description="Check number, typically printed in the top right corner of the check") 24 | payment_date: Optional[str] = Field(None, description="Date written on the check") 25 | payee: Optional[str] = Field(None, description="Name of the person or entity to whom the check is payable (Pay to the order of)") 26 | amount_numeric: Optional[float] = Field(None, description="Amount of the check in numeric form") 27 | amount_text: Optional[str] = Field(None, description="Amount of the check written out in words") 28 | bank_info: Optional[BankInformation] = Field(None, description="Information about the bank issuing the check") 29 | drawer_name: Optional[str] = Field(None, description="Name of the person writing the check (drawer)") 30 | drawer_address: Optional[Address] = Field(None, description="Address of the person writing the check") 31 | drawer_signature: Optional[bool] = Field(None, description="Whether the check is signed by the drawer") 32 | memo: Optional[str] = Field(None, description="Memo or note written on the check") 33 | micr_line: Optional[str] = Field(None, description="MICR (Magnetic Ink Character Recognition) line at the bottom of the check containing routing and account numbers") 34 | is_void: Optional[bool] = Field(None, description="Whether the check is marked as void") 35 | is_post_dated: Optional[bool] = Field(None, description="Whether the check is post-dated (date is in the future)") 36 | currency: Optional[str] = Field(None, description="Currency of the check") 37 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING-SCHEMA.md: -------------------------------------------------------------------------------- 1 | # Contributing Schemas to VLM Run Hub 2 | 3 | Thank you for your interest in contributing schemas to the VLM Run Hub! To ensure consistency and quality, please follow these guidelines. 4 | 5 | ## Guidelines for Writing a Schema 6 | 7 | Please refer to the [Schema Guidelines](./SCHEMA-GUIDELINES.md) for comprehensive instructions on creating schemas. Key points include: 8 | 9 | - **Use Pydantic’s BaseModel**: All schemas should inherit from Pydantic’s `BaseModel`. 10 | - **Strongly-Typed Fields**: Ensure all fields are strongly-typed with precise annotations. 11 | - **Field Metadata**: Include descriptions and constraints for each field. 12 | - **Examples**: Provide example data using `Config.schema_extra`. 13 | 14 | ## Adding a New Schema 15 | 16 | 1. **Define domain**: Create a new domain for your schema. Checkout the [Catalog](../vlmrun/hub/catalog.yaml) for existing domains. eg. `document.invoice`, `document.receipt`, `accounting.form-payslip`, `healthcare.pathology-report`, `real-estate.lease-agreement`. If it's the document is a form, second part of the domain should be `form-`. 17 | 2. **Create a New Schema File**: Place your schema in `schemas/contrib//.py`, following the appropriate industry and use case structure defined in the [Schema Guidelines](./SCHEMA-GUIDELINES.md). 18 | 19 | 3. **Add Tests**: Include tests for your schema in `tests/test_schemas.py`. 20 | 21 | 4. **Add to the contrib Catalog**: Add your schema to the [`vlmrun/hub/schemas/contrib/catalog.yaml`](../vlmrun/hub/schemas/contrib/catalog.yaml) file in the `schemas` section, and test it with `pytest -sv tests/test_instructor.py --domain=""`. 22 | 23 | 5. **Submit a Pull Request**: Once your schema is complete and tested, submit a pull request with the [`schema-request`](../.github/PULL_REQUEST_TEMPLATE/schema-request.yaml) template for review. You can take a look at a previous PR for reference. 24 | 25 | ## PR Checklist 26 | 27 | Before submitting your schema, ensure: 28 | 29 | - Follow the [Schema Review Checklist](./SCHEMA-GUIDELINES.md#✅-schema-review-checklist) 30 | - Add the schema to the [`vlmrun/hub/schemas/contrib/catalog.yaml`](../vlmrun/hub/schemas/contrib/catalog.yaml) file, following the [Adding a New Schema to the Hub](./SCHEMA-GUIDELINES.md#👩‍💻-adding-a-new-schema-to-the-hub) section 31 | - Make sure the sample image is publicly accessible. 32 | - Test the schema with `pytest -sv tests/test_instructor.py. --domain=""`. 33 | 34 | Thank you for helping us maintain high standards for schema contributions! 35 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from datetime import datetime 3 | from pathlib import Path 4 | 5 | import pytest 6 | from loguru import logger 7 | 8 | 9 | def pytest_addoption(parser): 10 | parser.addoption("--domain", action="store", default="document.us-drivers-license", help="Domain to test") 11 | parser.addoption("--provider", action="store", default="openai", help="Provider to use for testing") 12 | parser.addoption("--model", action="store", default="gpt-4o-mini-2024-07-18", help="Model to use for testing") 13 | 14 | 15 | @pytest.fixture 16 | def domain_arg(request): 17 | """Domain fixture for testing""" 18 | return request.config.getoption("--domain") 19 | 20 | 21 | @pytest.fixture 22 | def provider_arg(request): 23 | """Provider fixture for testing""" 24 | return request.config.getoption("--provider") 25 | 26 | 27 | @pytest.fixture 28 | def model_arg(request): 29 | """Model fixture for testing""" 30 | return request.config.getoption("--model") 31 | 32 | 33 | BenchmarkResult = namedtuple("BenchmarkResult", ["domain", "sample", "response_model", "response_json"]) 34 | 35 | 36 | def create_benchmark(results: list[BenchmarkResult], model: str, suffix: str): 37 | # Write the results to a pandas dataframe -> HTML 38 | # render the data_url in a new column 39 | BENCHMARK_DIR = Path(__file__).parent / "benchmarks" 40 | BENCHMARK_DIR.mkdir(parents=True, exist_ok=True) 41 | date_str = datetime.now().strftime("%Y-%m-%d") 42 | benchmark_path = BENCHMARK_DIR / f"{date_str}-{model}-{suffix}-results.md".replace("/", "-") 43 | 44 | # Render the results in markdown 45 | def parse_json(x): 46 | return x.replace("\n", "
") if x is not None else "❌" 47 | 48 | markdown_str = f"## Benchmark Results (model={model}, date={date_str})\n\n" 49 | markdown_str += """ 50 | 51 | 52 | 53 | 54 | 55 | 56 | """ 57 | for result in results: 58 | markdown_str += "" 59 | markdown_str += f"\n" 60 | markdown_str += f"\n" 61 | markdown_str += f"\n" 62 | markdown_str += "\n".format(x=parse_json(result.response_json)) 63 | markdown_str += "" 64 | markdown_str += "\n
Domain Response Model Sample Response JSON
{result.domain} {result.response_model}
{x}
" 65 | 66 | with benchmark_path.open("w") as f: 67 | f.write(markdown_str) 68 | logger.debug(f"Results written to {benchmark_path}") 69 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/document/india/aadhaar_card.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class Address(BaseModel): 8 | """Address structure found on Aadhaar PVC cards.""" 9 | 10 | full_address: str = Field(..., description="Complete address as printed on the card") 11 | pin_code: Optional[str] = Field(None, description="PIN code (postal code) extracted from the address") 12 | state: Optional[str] = Field(None, description="State extracted from the address") 13 | district: Optional[str] = Field(None, description="District extracted from the address") 14 | 15 | 16 | class CardSide(Enum): 17 | FRONT = "front" 18 | BACK = "back" 19 | BOTH = "both" 20 | UNKNOWN = "unknown" 21 | 22 | 23 | class AadhaarCard(BaseModel): 24 | """Aadhaar PVC Card schema for extracting information from India's national identity document.""" 25 | 26 | # Metadata about the extraction 27 | detected_side: CardSide = Field( 28 | ..., description="Which side of the Aadhaar card is visible in the image (front/back/both/unknown)" 29 | ) 30 | 31 | # Front side information 32 | aadhaar_number: Optional[str] = Field( 33 | None, description="12-digit unique Aadhaar identification number (may be partially masked)" 34 | ) 35 | name: Optional[str] = Field(None, description="Full name of the Aadhaar card holder") 36 | date_of_birth: Optional[str] = Field(None, description="Date of birth of the Aadhaar card holder") 37 | gender: Optional[str] = Field(None, description="Gender of the Aadhaar card holder (Male/Female/Transgender)") 38 | 39 | # Back side information 40 | address: Optional[Address] = Field(None, description="Address details as printed on the back of the card") 41 | 42 | # Security features and other elements 43 | has_photo: Optional[bool] = Field(None, description="Whether the card has a photo of the holder (front side)") 44 | has_qr_code: Optional[bool] = Field(None, description="Whether the card has a QR code") 45 | has_emblem: Optional[bool] = Field(None, description="Whether the card has the Government of India emblem") 46 | has_uidai_logo: Optional[bool] = Field(None, description="Whether the UIDAI logo is visible") 47 | 48 | # Additional information 49 | issue_date: Optional[str] = Field(None, description="Date of issue if visible on the card") 50 | print_date: Optional[str] = Field(None, description="Date when the PVC card was printed, if visible") 51 | 52 | # Language information 53 | languages: List[str] = Field( 54 | default_factory=list, 55 | description="Languages in which the card information is printed (e.g., Hindi, English, etc.)", 56 | ) 57 | -------------------------------------------------------------------------------- /vlmrun/hub/server/routes.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import json 3 | from typing import List, Optional 4 | 5 | from fastapi import APIRouter, HTTPException 6 | from pydantic import BaseModel, Field 7 | 8 | from vlmrun.hub.registry import registry 9 | from vlmrun.hub.version import __version__ 10 | 11 | router = APIRouter() 12 | 13 | 14 | class HubInfoResponse(BaseModel): 15 | version: str = Field(..., description="The version of the hub") 16 | 17 | 18 | class HubDomainInfo(BaseModel): 19 | domain: str = Field(..., description="The domain identifier") 20 | description: Optional[str] = Field(None, description="Description of the schema's purpose") 21 | supported_inputs: Optional[List[str]] = Field(None, description="List of supported input types") 22 | tags: Optional[List[str]] = Field(None, description="List of tags for the schema") 23 | sample_data: Optional[List[str]] = Field(None, description="URLs to sample data") 24 | 25 | 26 | class HubSchemaRequest(BaseModel): 27 | domain: str = Field(..., description="The domain to get the schema for") 28 | 29 | 30 | class HubSchemaResponse(BaseModel): 31 | json_schema: dict = Field(..., description="The JSON schema for the domain") 32 | schema_version: str = Field(..., description="The specific version of the schema") 33 | schema_hash: str = Field(..., description="The first 8 characters of the sha256 hash") 34 | 35 | 36 | @router.get("/info", response_model=HubInfoResponse) 37 | def info() -> HubInfoResponse: 38 | """Get hub version information.""" 39 | return HubInfoResponse(version=__version__) 40 | 41 | 42 | @router.get("/domains", response_model=List[HubDomainInfo]) 43 | def list_domains() -> List[HubDomainInfo]: 44 | """List available domains.""" 45 | return [HubDomainInfo(domain=domain, **registry.get_domain_info(domain)) for domain in registry.list_schemas()] 46 | 47 | 48 | @router.get("/domains/{domain}", response_model=bool) 49 | def has_domain(domain: str) -> bool: 50 | """Check if domain exists.""" 51 | return domain in registry 52 | 53 | 54 | @router.post("/schema", response_model=HubSchemaResponse) 55 | async def get_domain_schema(request: HubSchemaRequest) -> HubSchemaResponse: 56 | """Get schema for domain.""" 57 | try: 58 | schema_class = registry[request.domain] 59 | json_schema = schema_class.model_json_schema() 60 | schema_hash = hashlib.sha256(json.dumps(json_schema, sort_keys=True).encode()).hexdigest()[:8] 61 | 62 | return HubSchemaResponse( 63 | json_schema=json_schema, 64 | schema_version=__version__, 65 | schema_hash=schema_hash, 66 | ) 67 | except KeyError: 68 | raise HTTPException(status_code=404, detail=f"Schema '{request.domain}' not found") 69 | except Exception as e: 70 | raise HTTPException(status_code=500, detail=str(e)) 71 | -------------------------------------------------------------------------------- /vlmrun/hub/dataset.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | from typing import List, Type, Union 4 | 5 | from PIL import Image 6 | from pydantic import BaseModel 7 | from typing_extensions import TypeAlias 8 | 9 | from vlmrun.common.pdf import pdf_images 10 | from vlmrun.common.utils import download_artifact, remote_image 11 | from vlmrun.hub.constants import ( 12 | VLMRUN_HUB_CATALOG_PATH, 13 | VLMRUN_HUB_CONTRIB_CATALOG_PATH, 14 | ) 15 | from vlmrun.hub.registry import SchemaCatalogYaml 16 | 17 | ImageType: TypeAlias = Union[str, Path] 18 | PDFType: TypeAlias = Union[str, Path] 19 | 20 | 21 | @dataclass 22 | class HubSample: 23 | domain: str 24 | """The domain / identifier of the sample""" 25 | response_model: Type[BaseModel] 26 | """The response model to use for the sample""" 27 | prompt: str 28 | """The prompt to use for the sample""" 29 | data: str 30 | """The images or image URLs associated with the sample""" 31 | 32 | def _handle_image(self, image: ImageType) -> Image.Image: 33 | if isinstance(image, str): 34 | if image.startswith("http"): 35 | return remote_image(image) 36 | return Image.open(image) 37 | else: 38 | raise ValueError(f"Invalid image type: {type(image)}") 39 | 40 | def _handle_pdf(self, url: PDFType) -> List[Image.Image]: 41 | if url.endswith(".pdf"): 42 | if url.startswith("http"): 43 | path: Path = download_artifact(url, format="file") 44 | else: 45 | path: Path = Path(str(url)) 46 | return [p.image for p in pdf_images(path, dpi=72)] 47 | else: 48 | raise ValueError(f"Invalid PDF type: {type(url)}") 49 | 50 | def _handle_url(self, url: str) -> List[Image.Image]: 51 | if url.endswith(".pdf"): 52 | return self._handle_pdf(url) 53 | elif url.endswith(".jpg") or url.endswith(".jpeg") or url.endswith(".png") or url.endswith(".webp"): 54 | return [self._handle_image(url)] 55 | else: 56 | raise ValueError(f"Invalid data extension: {url}") 57 | 58 | @property 59 | def images(self) -> List[Image.Image]: 60 | return self._handle_url(self.data) 61 | 62 | 63 | catalog = SchemaCatalogYaml.from_yaml(VLMRUN_HUB_CATALOG_PATH) 64 | VLMRUN_HUB_DATASET = { 65 | schema.domain: HubSample( 66 | domain=schema.domain, 67 | response_model=schema.schema_class, 68 | prompt=schema.prompt, 69 | data=schema.sample_data, 70 | ) 71 | for schema in catalog.schemas 72 | } | { 73 | schema.domain: HubSample( 74 | domain=schema.domain, 75 | response_model=schema.schema_class, 76 | prompt=schema.prompt, 77 | data=schema.sample_data, 78 | ) 79 | for schema in SchemaCatalogYaml.from_yaml(VLMRUN_HUB_CONTRIB_CATALOG_PATH).schemas 80 | } 81 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/accounting/form_payslip.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class Address(BaseModel): 8 | street: Optional[str] = Field(None, description="Street address including building number") 9 | city: Optional[str] = Field(None, description="City name") 10 | state: Optional[str] = Field(None, description="State or province") 11 | zip_code: Optional[str] = Field(None, description="ZIP or postal code") 12 | 13 | 14 | class EmployerInfo(BaseModel): 15 | name: Optional[str] = Field(None, description="Name of the employer or company") 16 | address: Optional[Address] = Field(None, description="Employer's address") 17 | 18 | 19 | class EmployeeInfo(BaseModel): 20 | name: Optional[str] = Field(None, description="Full name of the employee") 21 | employee_id: Optional[str] = Field(None, description="Employee identification number") 22 | department: Optional[str] = Field(None, description="Employee's department") 23 | position: Optional[str] = Field(None, description="Employee's job title or position") 24 | date_of_joining: Optional[date] = Field(None, description="Date when employee joined the company") 25 | 26 | 27 | class PayPeriod(BaseModel): 28 | period: Optional[str] = Field(None, description="Pay period (e.g., 'August 2021')") 29 | days_worked: Optional[int] = Field(None, description="Number of days worked in the period") 30 | 31 | 32 | class EarningsItem(BaseModel): 33 | description: str = Field(..., description="Description of the earnings (e.g., 'Basic', 'Incentive Pay')") 34 | amount: Optional[float] = Field(None, description="Amount for this earnings type") 35 | 36 | 37 | class DeductionItem(BaseModel): 38 | description: str = Field(..., description="Description of the deduction (e.g., 'Provident Fund', 'Tax')") 39 | amount: Optional[float] = Field(None, description="Amount deducted") 40 | 41 | 42 | class Payslip(BaseModel): 43 | employer: Optional[EmployerInfo] = Field(None, description="Information about the employer") 44 | employee: Optional[EmployeeInfo] = Field(None, description="Information about the employee") 45 | pay_period: Optional[PayPeriod] = Field(None, description="Pay period details") 46 | earnings: Optional[List[EarningsItem]] = Field(None, description="List of earnings items") 47 | deductions: Optional[List[DeductionItem]] = Field(None, description="List of deduction items") 48 | total_earnings: Optional[float] = Field(None, description="Total earnings amount") 49 | total_deductions: Optional[float] = Field(None, description="Total deductions amount") 50 | net_pay: Optional[float] = Field(None, description="Net pay amount after all deductions") 51 | currency: Optional[str] = Field(None, description="Currency code (e.g., 'USD', 'EUR', 'INR')") 52 | net_pay_in_words: Optional[str] = Field(None, description="Net pay amount expressed in words") 53 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from datetime import date, datetime, time, timedelta 2 | 3 | import pytest 4 | from loguru import logger 5 | from pydantic import BaseModel 6 | 7 | from vlmrun.hub.registry import registry 8 | from vlmrun.hub.utils import jsonschema_to_model, patch_response_format 9 | 10 | 11 | def test_patch_response_format(): 12 | class OriginalModel(BaseModel): 13 | date_field: date 14 | datetime_field: datetime 15 | time_field: time 16 | timedelta_field: timedelta 17 | 18 | # Patch the model 19 | PatchedModel = patch_response_format(OriginalModel) 20 | 21 | # Check that the fields have been converted to str 22 | assert PatchedModel.model_fields["date_field"].annotation is str 23 | assert PatchedModel.model_fields["datetime_field"].annotation is str 24 | assert PatchedModel.model_fields["time_field"].annotation is str 25 | assert PatchedModel.model_fields["timedelta_field"].annotation is str 26 | 27 | # Check that the patched model can be instantiated with string values 28 | instance = PatchedModel( 29 | date_field="2023-01-01", 30 | datetime_field="2023-01-01T12:00:00", 31 | time_field="12:00:00", 32 | timedelta_field="1 day, 0:00:00", 33 | ) 34 | 35 | # Verify the instance is created successfully 36 | assert instance.date_field == "2023-01-01" 37 | assert instance.datetime_field == "2023-01-01T12:00:00" 38 | assert instance.time_field == "12:00:00" 39 | assert instance.timedelta_field == "1 day, 0:00:00" 40 | 41 | 42 | def test_patch_response_format_models(): 43 | from typing import Type 44 | 45 | from vlmrun.hub.dataset import VLMRUN_HUB_DATASET 46 | 47 | for sample in VLMRUN_HUB_DATASET.values(): 48 | logger.debug(f"Patching model {sample.response_model.__name__}") 49 | response_model: Type[BaseModel] = sample.response_model 50 | patched_model = patch_response_format(response_model) 51 | assert issubclass(patched_model, BaseModel) 52 | 53 | 54 | def test_jsonschema_to_model_with_registry_schemas(): 55 | """Test that jsonschema_to_model works with all schemas in the registry.""" 56 | registry.load_schemas() 57 | 58 | for domain, schema_class in registry.schemas.items(): 59 | json_schema = schema_class.model_json_schema() 60 | 61 | try: 62 | generated_model = jsonschema_to_model(json_schema) 63 | 64 | original_fields = set(schema_class.model_fields.keys()) 65 | generated_fields = set(generated_model.model_fields.keys()) 66 | 67 | assert original_fields == generated_fields, ( 68 | f"Field mismatch for {domain}:\n" 69 | f"Original fields: {original_fields}\n" 70 | f"Generated fields: {generated_fields}\n" 71 | f"Missing: {original_fields - generated_fields}\n" 72 | f"Extra: {generated_fields - original_fields}" 73 | ) 74 | 75 | except Exception as e: 76 | pytest.fail(f"Failed to process schema for {domain}: {str(e)}") 77 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "vlmrun-hub" 7 | description = "VLM Run Hub for various industry-specific schemas" 8 | authors = [{name = "VLM Support", email = "support@vlm.com"}] 9 | readme = "README.md" 10 | requires-python = ">=3.9" 11 | classifiers = [ 12 | "Development Status :: 4 - Beta", 13 | "Programming Language :: Python", 14 | "Environment :: Console", 15 | "Intended Audience :: Developers", 16 | "Intended Audience :: Education", 17 | "Intended Audience :: Information Technology", 18 | "Intended Audience :: Science/Research","Topic :: Software Development :: Libraries", 19 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 20 | "Topic :: Scientific/Engineering :: Image Processing", 21 | "License :: OSI Approved :: Apache Software License", 22 | "Programming Language :: Python :: 3 :: Only", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | ] 27 | license = {text = "Apache-2.0"} 28 | dynamic = ["version", "dependencies", "optional-dependencies"] 29 | 30 | [project.urls] 31 | homepage = "https://github.com/vlm-run/vlmrun-hub" 32 | repository = "https://github.com/vlm-run/vlmrun-hub" 33 | documentation = "https://docs.vlm.run/hub" 34 | 35 | [pytest] 36 | log_cli = true 37 | log_cli_level = "INFO" 38 | 39 | [tool.pytest.ini_options] 40 | addopts = "--show-capture=no -sv -m 'not (skip) and not (benchmark)' -p no:warnings" 41 | markers = [ 42 | "skip: Skip test", 43 | "benchmark: Benchmark tests", 44 | ] 45 | 46 | [tool.setuptools.dynamic] 47 | version = {attr = "vlmrun.hub.version.__version__"} 48 | dependencies = {file = ["requirements/requirements.txt"]} 49 | 50 | [tool.setuptools.dynamic.optional-dependencies] 51 | test = {file = ["requirements/requirements.test.txt"]} 52 | build = {file = ["requirements/requirements.build.txt"]} 53 | extra = {file = ["requirements/requirements.extra.txt"]} 54 | server = {file = ["requirements/requirements.server.txt"]} 55 | all = {file = ["requirements/requirements.test.txt", "requirements/requirements.build.txt", "requirements/requirements.extra.txt", "requirements/requirements.server.txt"]} 56 | 57 | [tool.setuptools.packages.find] 58 | where = ["."] 59 | include = [ 60 | "vlmrun.hub*", 61 | "vlmrun.hub.schemas*", 62 | "vlmrun.hub.schemas.contrib*" 63 | ] 64 | namespaces = true 65 | 66 | 67 | [tool.black] 68 | line-length = 120 69 | 70 | [tool.ruff] 71 | # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default. 72 | lint.select = [ 73 | "E", # pycodestyle errors 74 | "W", # pycodestyle warnings 75 | "F", # pyflakes 76 | "I", # isort 77 | "C", # flake8-comprehensions 78 | "B", # flake8-bugbear 79 | ] 80 | lint.ignore = [ 81 | "E501", # E501: Line too long 82 | "C901", # C901: Function is too complex 83 | "B008", # do not perform function calls in argument defaults 84 | "B904", # B904: check for raise from None 85 | ] 86 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Python Publish 2 | 3 | on: 4 | push: 5 | paths: 6 | - "vlmrun/hub/version.py" 7 | branches: 8 | - main 9 | 10 | env: 11 | CACHE_NUMBER: 1 # increase to reset cache manually 12 | 13 | jobs: 14 | test: 15 | name: Test 16 | runs-on: ubuntu-latest 17 | timeout-minutes: 20 18 | environment: dev 19 | steps: 20 | - name: Checkout git repo 21 | uses: actions/checkout@v3 22 | 23 | - uses: actions/setup-python@v5 24 | with: 25 | python-version: "3.10" 26 | 27 | - uses: actions/cache@v4 28 | with: 29 | path: ~/.cache/pip 30 | key: pip-${{ hashFiles('requirements/requirements*.txt') }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('MANIFEST.in') }}-${{ env.CACHE_NUMBER }} 31 | restore-keys: | 32 | pip- 33 | 34 | - name: Install dependencies 35 | if: steps.cache.outputs.cache-hit != 'true' 36 | run: | 37 | python -m pip install --upgrade pip 38 | pip install -e '.[test]' 39 | 40 | - name: Quality Check 41 | uses: pre-commit/action@v3.0.1 42 | continue-on-error: true 43 | 44 | - name: Run tests 45 | env: 46 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 47 | VLMRUN_BASE_URL: ${{ vars.VLMRUN_BASE_URL }} 48 | VLMRUN_API_KEY: ${{ secrets.VLMRUN_API_KEY }} 49 | run: | 50 | make test 51 | 52 | publish: 53 | name: Publish 54 | runs-on: ubuntu-latest 55 | timeout-minutes: 20 56 | environment: prod 57 | needs: test 58 | steps: 59 | - name: Checkout git repo 60 | uses: actions/checkout@v3 61 | with: 62 | fetch-depth: 0 63 | token: ${{ secrets.GH_TOKEN }} 64 | 65 | - uses: actions/setup-python@v5 66 | with: 67 | python-version: "3.10" 68 | 69 | - uses: actions/cache@v4 70 | with: 71 | path: ~/.cache/pip 72 | key: pip-${{ hashFiles('requirements/requirements*.txt') }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('MANIFEST.in') }}-${{ env.CACHE_NUMBER }} 73 | restore-keys: | 74 | pip- 75 | 76 | - name: Install dependencies 77 | if: steps.cache.outputs.cache-hit != 'true' 78 | run: | 79 | python -m pip install --upgrade pip 80 | pip install -e '.[test,build]' 81 | 82 | - name: Bump version 83 | if: success() 84 | run: | 85 | version=$(grep -oP '__version__ = "\K[^"]+' vlmrun/hub/version.py) 86 | echo "Current version: ${version}" 87 | 88 | git config --local user.email "github-actions[bot]@users.noreply.github.com" 89 | git config --local user.name "github-actions[bot]" 90 | 91 | git tag -a "v${version}" -m "Version ${version}" 92 | git push origin main 93 | git push origin "v${version}" 94 | 95 | - name: Build package 96 | run: | 97 | python -m build 98 | 99 | - name: Publish to PyPI 100 | uses: pypa/gh-action-pypi-publish@release/v1 101 | with: 102 | password: ${{ secrets.PYPI_TOKEN }} 103 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/food/nutrition_facts_label.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class NutrientInfo(BaseModel): 7 | amount: Optional[float] = Field(None, description="The amount of the nutrient") 8 | unit: Optional[str] = Field(None, description="The unit of measurement (g, mg, mcg, etc.)") 9 | daily_value_percent: Optional[float] = Field(None, description="The percentage of daily value") 10 | 11 | 12 | class NutritionFactsLabel(BaseModel): 13 | serving_size: Optional[str] = Field(None, description="The serving size, typically represented as a quantity with unit") 14 | servings_per_container: Optional[float] = Field(None, description="Number of servings per container") 15 | calories: Optional[int] = Field(None, description="Total calories per serving") 16 | 17 | total_fat: Optional[NutrientInfo] = Field(None, description="Total fat content per serving") 18 | saturated_fat: Optional[NutrientInfo] = Field(None, description="Saturated fat content per serving") 19 | trans_fat: Optional[NutrientInfo] = Field(None, description="Trans fat content per serving") 20 | polyunsaturated_fat: Optional[NutrientInfo] = Field(None, description="Polyunsaturated fat content per serving") 21 | monounsaturated_fat: Optional[NutrientInfo] = Field(None, description="Monounsaturated fat content per serving") 22 | 23 | cholesterol: Optional[NutrientInfo] = Field(None, description="Cholesterol content per serving") 24 | sodium: Optional[NutrientInfo] = Field(None, description="Sodium content per serving") 25 | 26 | total_carbohydrate: Optional[NutrientInfo] = Field(None, description="Total carbohydrate content per serving") 27 | dietary_fiber: Optional[NutrientInfo] = Field(None, description="Dietary fiber content per serving") 28 | total_sugars: Optional[NutrientInfo] = Field(None, description="Total sugars content per serving") 29 | added_sugars: Optional[NutrientInfo] = Field(None, description="Added sugars content per serving") 30 | sugar_alcohols: Optional[NutrientInfo] = Field(None, description="Sugar alcohols content per serving") 31 | 32 | protein: Optional[NutrientInfo] = Field(None, description="Protein content per serving") 33 | 34 | # Vitamins and minerals 35 | vitamin_d: Optional[NutrientInfo] = Field(None, description="Vitamin D content per serving") 36 | calcium: Optional[NutrientInfo] = Field(None, description="Calcium content per serving") 37 | iron: Optional[NutrientInfo] = Field(None, description="Iron content per serving") 38 | potassium: Optional[NutrientInfo] = Field(None, description="Potassium content per serving") 39 | vitamin_a: Optional[NutrientInfo] = Field(None, description="Vitamin A content per serving") 40 | vitamin_c: Optional[NutrientInfo] = Field(None, description="Vitamin C content per serving") 41 | 42 | # Additional nutrients that might be present 43 | additional_nutrients: Optional[Dict[str, NutrientInfo]] = Field(None, description="Additional nutrients not covered by standard fields") 44 | 45 | # Additional information 46 | ingredients: Optional[str] = Field(None, description="List of ingredients") 47 | allergens: Optional[List[str]] = Field(None, description="List of allergens") 48 | manufacturer: Optional[str] = Field(None, description="Manufacturer or distributor of the product") 49 | product_name: Optional[str] = Field(None, description="Name of the product") 50 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/document/receipt.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Any, Dict, List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class Address(BaseModel): 8 | street: Optional[str] = Field(None, description="Street address") 9 | city: Optional[str] = Field(None, description="City") 10 | state: Optional[str] = Field(None, description="State") 11 | postal_code: Optional[str] = Field(None, description="Postal code") 12 | country: Optional[str] = Field(None, description="Country") 13 | 14 | 15 | class Item(BaseModel): 16 | description: str = Field(..., description="Description or name of the item") 17 | quantity: Optional[float] = Field(None, description="Quantity of the item") 18 | unit_price: Optional[float] = Field(None, description="Unit price of the item") 19 | total_price: Optional[float] = Field(None, description="Total price of the item") 20 | 21 | 22 | class PaymentMethod(BaseModel): 23 | type: str = Field(..., description="Type of payment (e.g., cash, credit card, debit card)") 24 | card_last_4: Optional[str] = Field(None, description="Last 4 digits of the card if applicable") 25 | card_type: Optional[str] = Field(None, description="Type of card if applicable") 26 | 27 | 28 | class Receipt(BaseModel): 29 | receipt_id: Optional[str] = Field(None, description="Unique receipt identifier") 30 | transaction_date: Optional[datetime] = Field(None, description="Date and time of the transaction") 31 | 32 | merchant_name: Optional[str] = Field(None, description="Name of the merchant") 33 | merchant_address: Optional[Address] = Field(None, description="Address of the merchant") 34 | merchant_phone: Optional[str] = Field(None, description="Phone number of the merchant") 35 | 36 | cashier_name: Optional[str] = Field(None, description="Name of the cashier") 37 | register_number: Optional[str] = Field(None, description="Register or POS terminal number") 38 | 39 | customer_name: Optional[str] = Field(None, description="Name of the customer if provided") 40 | customer_id: Optional[str] = Field(None, description="Customer ID or loyalty number if applicable") 41 | 42 | items: List[Item] = Field(..., description="Items purchased") 43 | 44 | subtotal: Optional[float] = Field(None, description="Subtotal of the purchase") 45 | tax: Optional[float] = Field(None, description="Tax amount") 46 | total: float = Field(..., description="Total amount of the purchase") 47 | currency: str = Field(..., description="Currency of the transaction") 48 | 49 | payment_method: PaymentMethod = Field(..., description="Method of payment") 50 | 51 | discount_amount: Optional[float] = Field(None, description="Amount of discount applied") 52 | discount_description: Optional[str] = Field(None, description="Description of the discount") 53 | 54 | tip_amount: Optional[float] = Field(None, description="Tip amount if applicable") 55 | 56 | return_policy: Optional[str] = Field(None, description="Return policy information") 57 | 58 | barcode: Optional[str] = Field(None, description="Barcode or QR code data if present") 59 | 60 | additional_charges: Optional[List[Dict]] = Field(None, description="Any additional charges (e.g., service fees)") 61 | 62 | notes: Optional[str] = Field(None, description="Any additional notes or comments") 63 | others: Optional[Dict[str, Any]] = Field( 64 | None, description="Other information on the receipt not captured by other fields" 65 | ) 66 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/document/request_for_proposal.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class ContactPerson(BaseModel): 8 | name: Optional[str] = Field(None, description="Name of the contact person for the RFP") 9 | title: Optional[str] = Field(None, description="Title or position of the contact person") 10 | email: Optional[str] = Field(None, description="Email address of the contact person") 11 | phone: Optional[str] = Field(None, description="Phone number of the contact person") 12 | 13 | 14 | class Responsibility(BaseModel): 15 | description: str = Field(..., description="Description of the contractor responsibility") 16 | 17 | 18 | class EvaluationCriterion(BaseModel): 19 | description: str = Field(..., description="Description of the evaluation criterion") 20 | weight: Optional[float] = Field(None, description="Weight or importance of this criterion (if specified)") 21 | 22 | 23 | class RFP(BaseModel): 24 | """Request for Proposal (RFP) schema for extracting information from RFP documents.""" 25 | 26 | title: Optional[str] = Field(None, description="Title of the Request for Proposal") 27 | 28 | submission_deadline: Optional[date] = Field(None, description="Deadline date for proposal submissions") 29 | 30 | governing_law: Optional[str] = Field(None, description="Governing law or jurisdiction that applies to the contract") 31 | 32 | duration_of_contract: Optional[str] = Field(None, description="Overall period of performance for the contract") 33 | 34 | budget_cost_estimate: Optional[str] = Field(None, description="Estimated budget or cost range for the project") 35 | 36 | rfp_contact_person: Optional[ContactPerson] = Field( 37 | None, description="Contact person information for inquiries about the RFP" 38 | ) 39 | 40 | responsibilities_of_contractor: Optional[List[Responsibility]] = Field( 41 | None, description="List of responsibilities expected from the contractor" 42 | ) 43 | 44 | evaluation_criteria: Optional[List[EvaluationCriterion]] = Field( 45 | None, description="Criteria used to evaluate and score proposals" 46 | ) 47 | 48 | proposal_submission_location: Optional[str] = Field( 49 | None, description="Physical or electronic location where proposals should be submitted" 50 | ) 51 | 52 | insurance_requirements: Optional[str] = Field(None, description="Insurance requirements for the contractor") 53 | 54 | project_timeline: Optional[str] = Field(None, description="Expected timeline for project completion") 55 | 56 | eligibility_requirements: Optional[str] = Field( 57 | None, description="Requirements that bidders must meet to be eligible" 58 | ) 59 | 60 | proposal_format_requirements: Optional[str] = Field( 61 | None, description="Required format, structure, or content for submitted proposals" 62 | ) 63 | 64 | question_submission_deadline: Optional[date] = Field( 65 | None, description="Deadline for potential bidders to submit questions" 66 | ) 67 | 68 | pre_proposal_conference_details: Optional[str] = Field( 69 | None, description="Details about any pre-proposal meetings or conferences" 70 | ) 71 | 72 | issuing_organization: Optional[str] = Field(None, description="Organization that issued the RFP") 73 | 74 | amendment_history: Optional[List[str]] = Field( 75 | None, description="History of amendments or changes to the original RFP" 76 | ) 77 | -------------------------------------------------------------------------------- /tests/test_catalog.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from pydantic import BaseModel 5 | 6 | from vlmrun.hub.registry import SchemaCatalogYaml 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "catalog_path", 11 | [ 12 | Path(__file__).parent.parent / "vlmrun" / "hub" / "catalog.yaml", 13 | Path(__file__).parent.parent / "vlmrun" / "hub" / "schemas" / "contrib" / "catalog.yaml", 14 | ], 15 | ) 16 | def test_catalog_yaml(catalog_path): 17 | """Test that catalog.yaml is valid and follows the expected structure.""" 18 | assert catalog_path.exists(), "catalog.yaml file not found" 19 | 20 | # Load the catalog 21 | catalog = SchemaCatalogYaml.from_yaml(catalog_path) 22 | 23 | # Basic validation 24 | assert catalog.apiVersion == "v1", "API version must be v1" 25 | assert len(catalog.schemas) > 0, "Catalog must contain at least one schema" 26 | 27 | # Schema-specific validation 28 | for entry in catalog.schemas: 29 | # Domain format validation 30 | assert "." in entry.domain, "Domain must be in format: category.name" 31 | category, name = entry.domain.split(".", 1) 32 | assert category and name, "Both category and name must be non-empty" 33 | 34 | # Schema path validation 35 | assert entry.schema_path.startswith("vlmrun.hub.schemas."), "Schema must be in vlmrun.hub.schemas package" 36 | 37 | # Version format validation (basic semver check) 38 | if entry.version: 39 | version_parts = entry.version.split(".") 40 | assert len(version_parts) == 3, "Version must follow semver format (X.Y.Z)" 41 | assert all(part.isdigit() for part in version_parts), "Version parts must be numeric" 42 | 43 | # Metadata validation 44 | if entry.metadata: 45 | if entry.metadata.supported_inputs: 46 | assert isinstance(entry.metadata.supported_inputs, list), "Supported inputs must be a list" 47 | assert len(entry.metadata.supported_inputs) > 0, "Must have at least one supported input" 48 | if entry.metadata.tags: 49 | assert isinstance(entry.metadata.tags, list), "Tags must be a list" 50 | assert len(entry.metadata.tags) > 0, "Must have at least one tag" 51 | assert all(isinstance(tag, str) for tag in entry.metadata.tags), "All tags must be strings" 52 | 53 | # Content validation 54 | assert len(entry.prompt) >= 10, "Prompt must be descriptive (min 10 chars)" 55 | assert len(entry.description) >= 20, "Description must be detailed (min 20 chars)" 56 | 57 | # Dynamic schema validation 58 | try: 59 | schema_class = entry.schema_class 60 | assert issubclass(schema_class, BaseModel), f"Schema {entry.schema} must be a Pydantic model" 61 | except Exception as e: 62 | pytest.fail(f"Unable to import {entry.schema}: {e}") 63 | 64 | 65 | def test_catalog_yaml_with_refs(): 66 | """Test that catalog.yaml with refs is valid and follows the expected structure.""" 67 | catalog_path = Path(__file__).parent.parent / "vlmrun" / "hub" / "full-catalog.yaml" 68 | assert catalog_path.exists(), "full-catalog.yaml file not found" 69 | 70 | # Load the catalog 71 | catalog = SchemaCatalogYaml.from_yaml(catalog_path) 72 | 73 | # Basic validation 74 | assert catalog.apiVersion == "v1", "API version must be v1" 75 | 76 | n_schemas = len( 77 | SchemaCatalogYaml.from_yaml(Path(__file__).parent.parent / "vlmrun" / "hub" / "catalog.yaml").schemas 78 | ) + len( 79 | SchemaCatalogYaml.from_yaml( 80 | Path(__file__).parent.parent / "vlmrun" / "hub" / "schemas" / "contrib" / "catalog.yaml" 81 | ).schemas 82 | ) 83 | assert len(catalog.schemas) == n_schemas, "Catalog must contain the correct number of schemas" 84 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .env.dev 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | # pytype static type analyzer 153 | .pytype/ 154 | 155 | # Cython debug symbols 156 | cython_debug/ 157 | 158 | # PyCharm 159 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 161 | # and can be added to the global gitignore or merged into this file. For a more nuclear 162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 163 | #.idea/ 164 | /helpers 165 | *.env.* 166 | .DS_Store 167 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/healthcare/hipaa_release.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class HealthInformation(BaseModel): 8 | full_disclosure: Optional[bool] = Field( 9 | default=None, description="Indicates if the full health record is disclosed." 10 | ) 11 | excluded_information: Optional[List[str]] = Field( 12 | default=None, description="Types of health records excluded from disclosure." 13 | ) 14 | other_exclusions: Optional[List[str]] = Field( 15 | default=None, description="Additional exclusions specified by the patient." 16 | ) 17 | disclosure_format: Optional[str] = Field( 18 | default=None, description="The preferred format for disclosing the health records." 19 | ) 20 | 21 | 22 | class Recipient(BaseModel): 23 | name: Optional[str] = Field(default=None, description="Name of the recipient.") 24 | organization: Optional[str] = Field(default=None, description="Organization name if applicable.") 25 | address: Optional[str] = Field(default=None, description="Recipient's address.") 26 | 27 | 28 | class AuthorizationDuration(BaseModel): 29 | start_date: Optional[date] = Field(default=None, description="Start date of authorization.") 30 | end_date: Optional[date] = Field(default=None, description="End date of authorization.") 31 | all_time: Optional[bool] = Field( 32 | default=None, description="Indicates if authorization applies to all past, present, and future periods." 33 | ) 34 | event_based: Optional[str] = Field(default=None, description="Event upon which authorization expires.") 35 | 36 | 37 | class RevocationContact(BaseModel): 38 | name: Optional[str] = Field(default=None, description="Name of person handling revocation.") 39 | organization: Optional[str] = Field(default=None, description="Organization responsible for processing revocation.") 40 | address: Optional[str] = Field(default=None, description="Address for sending revocation requests.") 41 | 42 | 43 | class RevocationDetails(BaseModel): 44 | revocation_contact: Optional[RevocationContact] = Field( 45 | default=None, description="Details on how the authorization can be revoked." 46 | ) 47 | 48 | 49 | class LegalRepresentative(BaseModel): 50 | name: Optional[str] = Field(default=None, description="Name of the legal representative.") 51 | signature: Optional[str] = Field(default=None, description="Signature of the legal representative.") 52 | authority_description: Optional[str] = Field( 53 | default=None, description="Description of the legal authority under which they are signing." 54 | ) 55 | 56 | 57 | class Signature(BaseModel): 58 | signed_by: Optional[str] = Field(default=None, description="Name of the individual signing the form.") 59 | is_signed: Optional[bool] = Field(default=None, description="Whether the form has been signed.") 60 | date_signed: Optional[date] = Field(default=None, description="Date the form was signed.") 61 | legal_representative: Optional[LegalRepresentative] = Field( 62 | default=None, description="Details if signed by a legal representative." 63 | ) 64 | 65 | 66 | class HIPAARelease(BaseModel): 67 | """HIPAA Release Form for authorizing disclosure of health information.""" 68 | 69 | patient_name: Optional[str] = Field( 70 | default=None, description="Full name of the individual authorizing the release." 71 | ) 72 | authorized_entity: Optional[str] = Field( 73 | default=None, description="Name of the entity or individual authorized to share information." 74 | ) 75 | health_information: Optional[HealthInformation] = Field( 76 | default=None, description="Details of the health records to be disclosed." 77 | ) 78 | reason_for_disclosure: Optional[str] = Field(default=None, description="Reason for sharing the health information.") 79 | recipient: Optional[Recipient] = Field( 80 | default=None, description="Details of the recipient authorized to receive health information." 81 | ) 82 | authorization_duration: Optional[AuthorizationDuration] = Field( 83 | default=None, description="Duration of authorization for information disclosure." 84 | ) 85 | revocation_details: Optional[RevocationDetails] = Field( 86 | default=None, description="Details on how the authorization can be revoked." 87 | ) 88 | signature: Optional[Signature] = Field(default=None, description="Signature and authorization details.") 89 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/accounting/w2_form.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class Address(BaseModel): 7 | street: Optional[str] = Field(None, description="Street address") 8 | city: Optional[str] = Field(None, description="City") 9 | state: Optional[str] = Field(None, description="State") 10 | zip_code: Optional[str] = Field(None, description="ZIP code") 11 | 12 | 13 | class W2Form(BaseModel): 14 | """W2 Form schema for extracting information from IRS Form W-2 (Wage and Tax Statement).""" 15 | 16 | ssn: Optional[str] = Field(None, description="Employee's Social Security Number (SSN) (Box a)") 17 | ein: Optional[str] = Field(None, description="Employer Identification Number (EIN) (Box b)") 18 | 19 | employer_name: Optional[str] = Field(None, description="Full name of the employer (Box c)") 20 | employer_address: Optional[Address] = Field(None, description="Employer's complete address (Box c)") 21 | 22 | control_number: Optional[str] = Field(None, description="Control number assigned to the W2 form (Box d)") 23 | 24 | employee_name: Optional[str] = Field(None, description="Full name of the employee (Box e)") 25 | employee_address: Optional[Address] = Field(None, description="Employee's complete address (Box e)") 26 | 27 | wages_tips_other_compensation: Optional[float] = Field( 28 | None, description="Wages, tips, and other compensation (Box 1)" 29 | ) 30 | federal_income_tax_withheld: Optional[float] = Field(None, description="Federal income tax withheld (Box 2)") 31 | social_security_wages: Optional[float] = Field(None, description="Social security wages (Box 3)") 32 | social_security_tax_withheld: Optional[float] = Field(None, description="Social security tax withheld (Box 4)") 33 | medicare_wages_and_tips: Optional[float] = Field(None, description="Medicare wages and tips (Box 5)") 34 | medicare_tax_withheld: Optional[float] = Field(None, description="Medicare tax withheld (Box 6)") 35 | social_security_tips: Optional[float] = Field(None, description="Social security tips (Box 7)") 36 | allocated_tips: Optional[float] = Field(None, description="Allocated tips (Box 8)") 37 | dependent_care_benefits: Optional[float] = Field(None, description="Dependent care benefits (Box 10)") 38 | nonqualified_plans: Optional[float] = Field(None, description="Nonqualified plans (Box 11)") 39 | total_wages: Optional[float] = Field(None, description="Total wages (Box 12)") 40 | 41 | statutory_employee: Optional[bool] = Field(None, description="Statutory employee checkbox value (Box 13)") 42 | retirement_plan: Optional[bool] = Field(None, description="Retirement plan checkbox value (Box 13)") 43 | third_party_sick_pay: Optional[bool] = Field(None, description="Third party sick pay checkbox value (Box 13)") 44 | other_wages: Optional[bool] = Field(None, description="Other wages checkbox value (Box 14)") 45 | 46 | employers_state_id_number: Optional[str] = Field(None, description="Employer's state ID number (Box 15)") 47 | state_wages: Optional[float] = Field(None, description="State wages (Box 16)") 48 | state_income_tax_withheld: Optional[float] = Field(None, description="State income tax withheld (Box 17)") 49 | local_wages: Optional[float] = Field(None, description="Local wages (Box 18)") 50 | local_income_tax_withheld: Optional[float] = Field(None, description="Local income tax withheld (Box 19)") 51 | locality_name: Optional[str] = Field(None, description="Locality name (Box 20)") 52 | 53 | form_year: Optional[int] = Field( 54 | None, description="Tax year for which the W2 form is issued, usually on the bottom in bold." 55 | ) 56 | 57 | a_code: Optional[str] = Field(None, description="Code entered on the left side of Box 12a (Box 12a)") 58 | a_value: Optional[float] = Field(None, description="Value entered on the right side of Box 12a (Box 12a)") 59 | b_code: Optional[str] = Field(None, description="Code entered on the left side of Box 12b (Box 12b)") 60 | b_value: Optional[float] = Field(None, description="Value entered on the right side of Box 12b (Box 12b)") 61 | c_code: Optional[str] = Field(None, description="Code entered on the left side of Box 12c (Box 12c)") 62 | c_value: Optional[float] = Field(None, description="Value entered on the right side of Box 12c (Box 12c)") 63 | d_code: Optional[str] = Field(None, description="Code entered on the left side of Box 12d (Box 12d)") 64 | d_value: Optional[float] = Field(None, description="Value entered on the right side of Box 12d (Box 12d)") 65 | -------------------------------------------------------------------------------- /tests/test_openai.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List, Optional, Type 3 | 4 | import pytest 5 | from loguru import logger 6 | from pydantic import BaseModel 7 | 8 | pytestmark = pytest.mark.skipif( 9 | not os.getenv("OPENAI_API_KEY", False), reason="This test requires OPENAI_API_KEY to be set" 10 | ) 11 | 12 | 13 | @pytest.fixture 14 | def openai_client(): 15 | from openai import OpenAI 16 | 17 | return OpenAI() 18 | 19 | 20 | def test_openai_structured_outputs_simple(openai_client): 21 | from pydantic import Field 22 | 23 | from vlmrun.common.image import encode_image 24 | from vlmrun.common.utils import remote_image 25 | 26 | invoice_url = "https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg" 27 | invoice_image = remote_image(invoice_url) 28 | 29 | class Address(BaseModel): 30 | street: Optional[str] = Field(None, description="Street address") 31 | city: Optional[str] = Field(None, description="City") 32 | state: Optional[str] = Field(None, description="State") 33 | postal_code: Optional[str] = Field(None, description="Postal code") 34 | country: Optional[str] = Field(None, description="Country") 35 | 36 | class Item(BaseModel): 37 | description: Optional[str] = Field(None, description="Description or name of the item") 38 | quantity: Optional[int] = Field(None, description="Quantity of the item") 39 | currency: Optional[str] = Field(None, description="3-digit currency code") 40 | unit_price: Optional[float] = Field(None, description="Unit price of the item") 41 | total_price: Optional[float] = Field(None, description="Total price of the item") 42 | 43 | class Invoice(BaseModel): 44 | invoice_id: Optional[str] = Field(None, description="Unique invoice identifier") 45 | invoice_issue_date: Optional[str] = Field(None, description="Issue date of the invoice") 46 | 47 | customer_billing_address: Optional[Address] = Field(None, description="Recipient's billing address") 48 | customer_shipping_address: Optional[Address] = Field(None, description="Recipient's shipping address") 49 | 50 | items: Optional[List[Item]] = Field(None, description="Items in the invoice") 51 | subtotal: Optional[float] = Field(None, description="Subtotal of the invoice") 52 | tax: Optional[float] = Field(None, description="Tax of the invoice") 53 | total: Optional[float] = Field(None, description="Total of the invoice") 54 | currency: Optional[str] = Field(None, description="Currency of the invoice") 55 | 56 | response = openai_client.beta.chat.completions.parse( 57 | model="gpt-4o-mini", 58 | messages=[ 59 | { 60 | "role": "user", 61 | "content": [ 62 | {"type": "text", "text": "Extract the invoice in JSON."}, 63 | *[ 64 | {"type": "image_url", "image_url": {"url": encode_image(img, format="JPEG")}} 65 | for img in [invoice_image] 66 | ], 67 | ], 68 | }, 69 | ], 70 | response_format=Invoice, 71 | temperature=0, 72 | ) 73 | logger.info(response.choices[0].message.parsed.model_dump_json(indent=2)) 74 | 75 | 76 | @pytest.mark.benchmark 77 | @pytest.mark.skip(reason="This test is not working due to the patch_response_format function") 78 | def test_openai_structured_outputs_hub_dataset(openai_client): 79 | from vlmrun.common.image import encode_image 80 | from vlmrun.hub.dataset import VLMRUN_HUB_DATASET 81 | from vlmrun.hub.utils import patch_response_format 82 | 83 | for sample in VLMRUN_HUB_DATASET.values(): 84 | response_model: Type[BaseModel] = sample.response_model 85 | response = openai_client.beta.chat.completions.parse( 86 | model="gpt-4o-mini", 87 | messages=[ 88 | { 89 | "role": "user", 90 | "content": [ 91 | {"type": "text", "text": sample.prompt}, 92 | *[ 93 | {"type": "image_url", "image_url": {"url": encode_image(img, format="JPEG")}} 94 | for img in [ 95 | sample.image, 96 | ] 97 | ], 98 | ], 99 | }, 100 | ], 101 | response_format=patch_response_format(response_model), 102 | temperature=0, 103 | ) 104 | logger.info(response.model_dump_json(indent=2)) 105 | -------------------------------------------------------------------------------- /tests/benchmarks/2025-02-20-bsahane-Qwen2.5-VL-7B-Instruct-Q4_K_M_benxh-instructor-results.md: -------------------------------------------------------------------------------- 1 | ## Benchmark Results (model=bsahane/Qwen2.5-VL-7B-Instruct:Q4_K_M_benxh, date=2025-02-20) 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 |
Domain Response Model Sample Response JSON
document.bank-statement BankStatement
{
"account_number": null,
"account_type": null,
"bank_address": null,
"bank_name": null,
"client_address": null,
"client_name": null,
"ending_balance": null,
"starting_balance": null,
"statement_date": null,
"statement_start_date": null,
"statement_end_date": null,
"table_item": [],
"others": null
}
document.invoice Invoice
document.receipt Receipt
document.resume Resume
document.us-drivers-license USDriversLicense
document.utility-bill UtilityBill
document.w2-form W2Form
aerospace.remote-sensing RemoteSensing
{
"description": "The satellite image shows an airport with visible runways and surrounding infrastructure, indicating a clear view from space.",
"objects": [
"runway"
],
"categories": [
"airport"
],
"is_visible": true
}
healthcare.medical-insurance-card MedicalInsuranceCard
retail.ecommerce-product-caption RetailEcommerceProductCaption
{
"description": "A sleek, modern e-reader with a vibrant display and intuitive interface.",
"rating": 85,
"name": "E-Reader Pro",
"brand": "TechGenius",
"category": "Electronics / E-readers",
"price": "$299.99",
"color": "Black"
}
media.tv-news TVNews
{
"description": "[img-0]",
"chyron": null,
"network": null,
"reporters": []
}
document.us-passport USPassport
media.nfl-game-state NFLGameState
media.nba-game-state NBAGameState
68 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/logistics/bill_of_lading.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class Address(BaseModel): 8 | street: Optional[str] = Field(None, description="Street address") 9 | city: Optional[str] = Field(None, description="City") 10 | state: Optional[str] = Field(None, description="State or province") 11 | postal_code: Optional[str] = Field(None, description="Postal code or ZIP code") 12 | country: Optional[str] = Field(None, description="Country") 13 | 14 | 15 | class Contact(BaseModel): 16 | phone: Optional[str] = Field(None, description="Phone number") 17 | email: Optional[str] = Field(None, description="Email address") 18 | fax: Optional[str] = Field(None, description="Fax number") 19 | 20 | 21 | class Party(BaseModel): 22 | name: Optional[str] = Field(None, description="Name of the party") 23 | address: Optional[Address] = Field(None, description="Address of the party") 24 | contact: Optional[Contact] = Field(None, description="Contact information (phone, email, etc.)") 25 | reference: Optional[str] = Field(None, description="Reference number or identifier") 26 | 27 | 28 | class Container(BaseModel): 29 | number: Optional[str] = Field(None, description="Container number") 30 | seal_number: Optional[str] = Field(None, description="Seal number") 31 | type: Optional[str] = Field(None, description="Container type") 32 | weight: Optional[float] = Field(None, description="Weight of the container") 33 | measurement: Optional[str] = Field(None, description="Measurement or dimensions of the container") 34 | 35 | 36 | class Goods(BaseModel): 37 | description: Optional[str] = Field(None, description="Description of the goods") 38 | packages: Optional[int] = Field(None, description="Number of packages") 39 | package_type: Optional[str] = Field(None, description="Type of packages (cartons, pallets, etc.)") 40 | weight: Optional[float] = Field(None, description="Weight of the goods") 41 | weight_unit: Optional[str] = Field(None, description="Unit of weight measurement (kg, lb, etc.)") 42 | volume: Optional[float] = Field(None, description="Volume of the goods") 43 | volume_unit: Optional[str] = Field(None, description="Unit of volume measurement (cbm, cft, etc.)") 44 | marks_and_numbers: Optional[str] = Field(None, description="Marks and numbers on the packages") 45 | dangerous_goods_info: Optional[str] = Field(None, description="Information about dangerous goods, if applicable") 46 | 47 | 48 | class FreightDetails(BaseModel): 49 | freight_terms: Optional[str] = Field(None, description="Terms of freight (prepaid, collect, etc.)") 50 | freight_charges: Optional[float] = Field(None, description="Freight charges amount") 51 | currency: Optional[str] = Field(None, description="Currency of the freight charges") 52 | additional_charges: Optional[List[dict]] = Field(None, description="Additional charges or fees") 53 | payment_method: Optional[str] = Field(None, description="Method of payment") 54 | 55 | 56 | class BillOfLading(BaseModel): 57 | bill_number: Optional[str] = Field(None, description="Bill of Lading number") 58 | booking_number: Optional[str] = Field(None, description="Booking or reference number") 59 | issue_date: Optional[date] = Field(None, description="Date of issue of the Bill of Lading") 60 | 61 | shipper: Optional[Party] = Field(None, description="Shipper or exporter information") 62 | consignee: Optional[Party] = Field(None, description="Consignee or importer information") 63 | notify_party: Optional[Party] = Field(None, description="Notify party information") 64 | forwarding_agent: Optional[Party] = Field(None, description="Forwarding agent information") 65 | 66 | vessel_name: Optional[str] = Field(None, description="Name of the vessel") 67 | voyage_number: Optional[str] = Field(None, description="Voyage number") 68 | carrier: Optional[str] = Field(None, description="Carrier or shipping line") 69 | 70 | port_of_loading: Optional[str] = Field(None, description="Port of loading") 71 | port_of_discharge: Optional[str] = Field(None, description="Port of discharge") 72 | place_of_receipt: Optional[str] = Field(None, description="Place of receipt") 73 | place_of_delivery: Optional[str] = Field(None, description="Place of delivery") 74 | 75 | containers: Optional[List[Container]] = Field(None, description="List of containers") 76 | goods: Optional[Goods] = Field(None, description="Details of the goods being shipped") 77 | 78 | freight_details: Optional[FreightDetails] = Field(None, description="Freight and payment details") 79 | 80 | special_instructions: Optional[str] = Field(None, description="Special instructions or remarks") 81 | 82 | number_of_original_bills: Optional[int] = Field(None, description="Number of original Bills of Lading issued") 83 | 84 | signature_place: Optional[str] = Field(None, description="Place of signature") 85 | signature_date: Optional[date] = Field(None, description="Date of signature") 86 | signatory: Optional[str] = Field(None, description="Name or title of the signatory") 87 | -------------------------------------------------------------------------------- /tests/test_registry.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from pydantic import BaseModel 5 | from ruamel.yaml.parser import ParserError 6 | 7 | from vlmrun.hub.registry import Registry, SchemaCatalogItem, SchemaCatalogYaml 8 | 9 | 10 | @pytest.fixture 11 | def registry(): 12 | """Create a fresh registry instance for each test""" 13 | return Registry() 14 | 15 | 16 | def test_registry_singleton(): 17 | """Test that Registry behaves as a singleton""" 18 | from vlmrun.hub.registry import registry as registry1 19 | from vlmrun.hub.registry import registry as registry2 20 | 21 | assert registry1 is registry2 22 | assert isinstance(registry1, Registry) 23 | 24 | 25 | def test_registry_load_schemas(registry): 26 | """Test loading schemas from catalog""" 27 | registry.load_schemas() 28 | assert len(registry.schemas) > 0 29 | 30 | assert "document.receipt" in registry.schemas 31 | assert "document.resume" in registry.schemas 32 | assert "document.us-drivers-license" in registry.schemas 33 | 34 | 35 | def test_registry_getitem(registry): 36 | """Test accessing schemas using dictionary syntax""" 37 | schema = registry["document.receipt"] 38 | assert issubclass(schema, BaseModel) 39 | 40 | with pytest.raises(KeyError): 41 | _ = registry["non.existent.schema"] 42 | 43 | 44 | def test_registry_repr(registry): 45 | """Test string representation of registry""" 46 | repr_str = repr(registry) 47 | assert "Registry [schemas=" in repr_str 48 | assert "document.receipt" in repr_str 49 | assert "document.resume" in repr_str 50 | 51 | 52 | def test_registry_list_schemas(registry): 53 | """Test listing available schemas""" 54 | schemas = registry.list_schemas() 55 | assert isinstance(schemas, list) 56 | assert len(schemas) > 0 57 | assert "document.receipt" in schemas 58 | assert "document.resume" in schemas 59 | 60 | 61 | def test_schema_catalog_item_validation(): 62 | """Test SchemaCatalogItem validation""" 63 | item = SchemaCatalogItem( 64 | domain="test.domain", 65 | schema="vlmrun.hub.schemas.document.Receipt", 66 | prompt="Test prompt", 67 | description="Test description that is sufficiently detailed", 68 | supported_inputs=["document"], 69 | tags=["test"], 70 | ) 71 | assert item.domain == "test.domain" 72 | assert item.schema_path == "vlmrun.hub.schemas.document.Receipt" 73 | assert len(item.prompt) >= 10 74 | assert len(item.description) >= 20 75 | 76 | 77 | def test_schema_catalog_yaml_loading(): 78 | """Test loading catalog from YAML""" 79 | catalog_path = Path(__file__).parent.parent / "vlmrun" / "hub" / "catalog.yaml" 80 | catalog = SchemaCatalogYaml.from_yaml(catalog_path) 81 | 82 | assert catalog.apiVersion == "v1" 83 | assert isinstance(catalog.schemas, list) 84 | assert len(catalog.schemas) > 0 85 | 86 | if catalog.catalogs: 87 | assert isinstance(catalog.catalogs, list) 88 | for ref in catalog.catalogs: 89 | ref_path = Path(__file__).parent.parent / "vlmrun" / "hub" / ref 90 | assert ref_path.exists() 91 | 92 | 93 | def test_ensure_schemas_loaded_decorator(registry): 94 | """Test the ensure_schemas_loaded decorator""" 95 | schema = registry["document.receipt"] 96 | assert schema is not None 97 | 98 | assert len(registry.schemas) > 0 99 | 100 | 101 | def test_registry_load_schemas_with_invalid_path(registry): 102 | """Test loading schemas from non-existent path raises FileNotFoundError""" 103 | with pytest.raises(FileNotFoundError, match="Catalog file not found"): 104 | registry.load_schemas(catalog_paths=("nonexistent.yaml",)) 105 | 106 | 107 | def test_registry_load_schemas_with_invalid_yaml(registry, tmp_path): 108 | """Test loading schemas with invalid YAML content raises YAMLError""" 109 | invalid_yaml = tmp_path / "invalid.yaml" 110 | invalid_yaml.write_text("invalid: [\nyaml: content") 111 | 112 | with pytest.raises(ParserError): 113 | registry.load_schemas(catalog_paths=(invalid_yaml,)) 114 | 115 | 116 | def test_registry_load_schemas_with_invalid_schema(registry, tmp_path): 117 | """Test loading schemas with invalid schema definition raises ValueError""" 118 | invalid_schema_yaml = """ 119 | apiVersion: v1 120 | schemas: 121 | - domain: test.invalid 122 | schema: nonexistent.module.Schema 123 | prompt: Test prompt 124 | description: Test description that is sufficiently detailed 125 | metadata: 126 | supported_inputs: ["document"] 127 | tags: ["test"] 128 | """ 129 | test_yaml = tmp_path / "test.yaml" 130 | test_yaml.write_text(invalid_schema_yaml) 131 | 132 | with pytest.raises(ValueError, match="Unable to import nonexistent.module.Schema"): 133 | registry.load_schemas(catalog_paths=(test_yaml,)) 134 | 135 | 136 | def test_registry_detailed_key_error(registry): 137 | """Test that KeyError includes available schemas in message""" 138 | with pytest.raises(KeyError) as exc_info: 139 | _ = registry["non.existent.schema"] 140 | 141 | error_msg = str(exc_info.value) 142 | assert "Available schemas:" in error_msg 143 | assert "document.receipt" in error_msg 144 | assert "document.resume" in error_msg 145 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/document/bank_statement.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import Dict, List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class Address(BaseModel): 8 | street: Optional[str] = Field(None, description="Street address") 9 | city: Optional[str] = Field(None, description="City") 10 | state: Optional[str] = Field(None, description="State/Province code or name") 11 | zip_code: Optional[str] = Field(None, description="Postal code") 12 | 13 | 14 | class BankTransaction(BaseModel): 15 | # Original fields - descriptions updated for clarity 16 | transaction_deposit: Optional[float] = Field(None, description="Deposit amount, if this transaction is a deposit.") 17 | transaction_deposit_date: Optional[date] = Field(None, description="Date of the deposit, if this transaction is a deposit.") 18 | transaction_deposit_description: Optional[str] = Field(None, description="Description of the deposit, if this transaction is a deposit.") 19 | transaction_withdrawal: Optional[float] = Field(None, description="Withdrawal amount, if this transaction is a withdrawal.") 20 | transaction_withdrawal_date: Optional[date] = Field(None, description="Date of the withdrawal, if this transaction is a withdrawal.") 21 | transaction_withdrawal_description: Optional[str] = Field(None, description="Description of the withdrawal, if this transaction is a withdrawal.") 22 | 23 | # New fields added based on examples 24 | check_number: Optional[str] = Field(None, description="Check number associated with the transaction, if applicable (e.g., for check payments or cashed checks).") 25 | ending_daily_balance: Optional[float] = Field(None, description="The running daily balance of the account after this transaction, if provided on the transaction line.") 26 | reference_number: Optional[str] = Field(None, description="A specific reference number or transaction ID for this item, if provided in the transaction line (e.g., for deposits, ATM transactions).") 27 | 28 | 29 | class BankStatement(BaseModel): 30 | # Existing fields - descriptions may be slightly enhanced for clarity 31 | account_number: Optional[str] = Field(None, description="Bank account number associated with the statement.") 32 | account_type: Optional[str] = Field(None, description="Type of the bank account (e.g., Checking, Savings, 'CONNECTIONS CHECKING').") 33 | bank_address: Optional[Address] = Field(None, description="Address of the banking institution.") 34 | bank_name: Optional[str] = Field(None, description="Name of the banking institution.") 35 | client_address: Optional[Address] = Field(None, description="Address of the account holder(s).") 36 | client_name: Optional[str] = Field(None, description="Name of the account holder(s). If multiple, may be a concatenated string (e.g., 'Rachael Dean, Calvin Carrillo').") 37 | ending_balance: Optional[float] = Field(None, description="The final balance of the account at the end of the statement period.") 38 | starting_balance: Optional[float] = Field(None, description="The balance of the account at the beginning of the statement period.") 39 | statement_date: Optional[date] = Field(None, description="The date the statement was issued or generated.") 40 | statement_start_date: Optional[date] = Field(None, description="The first day of the period covered by this statement.") 41 | statement_end_date: Optional[date] = Field(None, description="The last day of the period covered by this statement.") 42 | table_item: Optional[List[BankTransaction]] = Field(None, description="A list of individual financial transactions (deposits, withdrawals, checks, etc.) detailed in the statement.") 43 | 44 | # New fields added based on examples 45 | routing_number: Optional[str] = Field(None, description="Bank's routing transit number (RTN), if provided on the statement.") 46 | total_deposits: Optional[float] = Field(None, description="Summary total of all deposits and other credits for the statement period, as per the statement's summary section.") 47 | total_withdrawals: Optional[float] = Field(None, description="Summary total of all withdrawals, payments, and other debits for the statement period, as per the statement's summary section.") 48 | 49 | # More specific summary totals if available 50 | summary_total_atm_withdrawals: Optional[float] = Field(None, description="Total amount of ATM withdrawals as reported in a summary section of the statement, if available.") 51 | summary_total_debit_card_purchases: Optional[float] = Field(None, description="Total amount of debit card purchases (e.g., VISA Check Card) as reported in a summary section, if available.") 52 | summary_total_checks_paid: Optional[float] = Field(None, description="Total amount of checks paid as reported in a summary section, if available (this may differ from a sum of individual check transactions if the summary is specific).") 53 | 54 | monthly_service_fee: Optional[float] = Field(None, description="Amount of the monthly service fee charged during the statement period, if any.") 55 | overdraft_protection_status: Optional[str] = Field(None, description="Textual description of the overdraft protection status or related services on the account, if mentioned.") 56 | 57 | # Existing 'others' field for flexibility 58 | others: Optional[Dict] = Field(None, description="A dictionary for any other relevant data extracted from the statement that does not fit into the predefined fields.") 59 | -------------------------------------------------------------------------------- /tests/test_instructor.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Literal 3 | 4 | import pytest 5 | from conftest import BenchmarkResult, create_benchmark 6 | from dotenv import load_dotenv 7 | from loguru import logger 8 | 9 | from vlmrun.common.image import encode_image 10 | from vlmrun.hub.dataset import VLMRUN_HUB_DATASET, HubSample 11 | 12 | load_dotenv() 13 | 14 | 15 | def get_instructor_client(provider: Literal["openai", "gemini", "fireworks", "ollama"] = "openai"): 16 | import instructor 17 | from openai import OpenAI 18 | 19 | client = None 20 | if provider == "openai": 21 | api_key = os.getenv("OPENAI_API_KEY", None) 22 | if not api_key: 23 | raise ValueError("OPENAI_API_KEY is not set") 24 | client = OpenAI( 25 | api_key=api_key, 26 | base_url="https://api.openai.com/v1", 27 | ) 28 | elif provider == "gemini": 29 | api_key = os.getenv("GEMINI_API_KEY", None) 30 | if not api_key: 31 | raise ValueError("GEMINI_API_KEY is not set") 32 | client = OpenAI( 33 | api_key=api_key, 34 | base_url="https://generativelanguage.googleapis.com/v1beta/openai/", 35 | ) 36 | elif provider == "fireworks": 37 | api_key = os.getenv("FIREWORKS_API_KEY", None) 38 | if not api_key: 39 | raise ValueError("FIREWORKS_API_KEY is not set") 40 | client = OpenAI( 41 | api_key=api_key, 42 | base_url="https://api.fireworks.ai/inference/v1", 43 | ) 44 | elif provider == "ollama": 45 | client = OpenAI( 46 | api_key="ollama", 47 | base_url="http://localhost:11434/v1/", 48 | ) 49 | client.models.list() # check if ollama is running, otherwise raise an error 50 | else: 51 | raise ValueError(f"Invalid provider: {provider}") 52 | 53 | return instructor.from_openai( 54 | client, 55 | mode=instructor.Mode.MD_JSON, 56 | ) 57 | 58 | 59 | def process_sample(client, sample: HubSample, model: str): 60 | return client.chat.completions.create( 61 | model=model, 62 | messages=[ 63 | { 64 | "role": "user", 65 | "content": [ 66 | {"type": "text", "text": sample.prompt}, 67 | *[ 68 | {"type": "image_url", "image_url": {"url": encode_image(img, format="JPEG")}} 69 | for img in sample.images 70 | ], 71 | ], 72 | }, 73 | ], 74 | response_model=sample.response_model, 75 | temperature=0, 76 | max_retries=0, 77 | ) 78 | 79 | 80 | PROVIDER_MODELS = [ 81 | ("openai", "gpt-4o-mini-2024-07-18"), 82 | # ("openai", "gpt-4o-2024-08-06"), 83 | # ("openai", "gpt-4o-2024-11-20"), 84 | # ("openai", "o1-2024-12-17"), 85 | # ("openai", "o1-mini-2024-09-12"), 86 | # ("openai", "o3-mini-2025-01-31"), 87 | # ("gemini", "gemini-2.0-flash-exp"), 88 | # ("fireworks", "accounts/fireworks/models/llama-v3p2-11b-vision-instruct"), 89 | # ("ollama", "llama3.2-vision:11b"), 90 | # ("ollama", "bsahane/Qwen2.5-VL-7B-Instruct:Q4_K_M_benxh"), 91 | ] 92 | 93 | 94 | def test_instructor_hub_sample(provider_arg: str, model_arg: str, domain_arg: str): 95 | from rich import print 96 | 97 | provider, model, domain = provider_arg, model_arg, domain_arg 98 | 99 | # Get the client (based on provider) 100 | try: 101 | instructor_client = get_instructor_client(provider) 102 | except Exception as e: 103 | pytest.skip(f"Error getting instructor client: {e}") 104 | 105 | logger.debug(f"Testing provider={provider}, model={model}, domain={domain}") 106 | sample = VLMRUN_HUB_DATASET[domain] 107 | logger.debug(f"Testing domain={sample.domain}, sample={sample}") 108 | logger.debug(f"sample.images={sample.images}") 109 | response = process_sample(instructor_client, sample, model=model) 110 | print(response.model_dump_json(indent=2)) 111 | assert response is not None 112 | 113 | 114 | @pytest.mark.benchmark 115 | @pytest.mark.parametrize("provider_model", PROVIDER_MODELS) 116 | def test_instructor_hub_dataset(provider_model: tuple[str, str]): 117 | provider, model = provider_model 118 | 119 | # Get the client (based on provider) 120 | try: 121 | instructor_client = get_instructor_client(provider) 122 | except Exception as e: 123 | pytest.skip(f"Error getting instructor client: {e}") 124 | 125 | # Process all samples 126 | results = [] 127 | for sample in VLMRUN_HUB_DATASET.values(): 128 | logger.debug(f"Testing domain={sample.domain}, sample={sample}") 129 | logger.debug(f"sample.images={sample.images}") 130 | 131 | # Try to process the sample 132 | try: 133 | response = process_sample(instructor_client, sample, model=model) 134 | except Exception as e: 135 | response = None 136 | logger.error(f"Error processing sample {sample.domain}: {e}") 137 | 138 | results.append( 139 | BenchmarkResult( 140 | domain=sample.domain, 141 | sample=sample.data, 142 | response_model=sample.response_model.__name__, 143 | response_json=response.model_dump_json(indent=2, exclude_none=False) if response else None, 144 | ) 145 | ) 146 | if response: 147 | logger.debug(response.model_dump_json(indent=2)) 148 | 149 | create_benchmark(results, model, suffix="instructor") 150 | -------------------------------------------------------------------------------- /vlmrun/hub/utils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import json 3 | import sys 4 | from functools import lru_cache 5 | from pathlib import Path 6 | from tempfile import TemporaryDirectory 7 | from typing import Any, Dict, List, Tuple, Type, Union, get_args, get_origin 8 | 9 | from datamodel_code_generator import DataModelType, InputFileType, generate 10 | from pydantic import BaseModel, create_model 11 | from typing_extensions import TypeAlias 12 | 13 | ResponseFormat: TypeAlias = Type[BaseModel] 14 | AnnotationType: TypeAlias = Union[Type, Any] 15 | 16 | # Regex patterns that use look-around constructs unsupported by pydantic-core's Rust regex engine 17 | UNSUPPORTED_LOOKAROUND_TOKENS = ("(?=", "(?!", "(?<=", "(? Any: 21 | """Recursively strip regex patterns with unsupported look-around constructs from JSON schema.""" 22 | if isinstance(node, dict): 23 | pattern = node.get("pattern") 24 | if isinstance(pattern, str) and any(tok in pattern for tok in UNSUPPORTED_LOOKAROUND_TOKENS): 25 | node = dict(node) # copy so we don't mutate callers' dicts 26 | node.pop("pattern", None) 27 | for key, value in list(node.items()): 28 | node[key] = _strip_unsupported_patterns(value) 29 | return node 30 | elif isinstance(node, list): 31 | return [_strip_unsupported_patterns(item) for item in node] 32 | return node 33 | 34 | 35 | def sanitize_json_schema_for_pydantic_core(schema: Dict[str, Any]) -> Dict[str, Any]: 36 | """Sanitize JSON schema by removing regex patterns unsupported by pydantic-core. 37 | 38 | pydantic-core uses a Rust regex engine that doesn't support look-around constructs 39 | (look-ahead and look-behind). This function removes such patterns to allow model 40 | generation to succeed. 41 | """ 42 | return _strip_unsupported_patterns(schema) 43 | 44 | 45 | def patch_response_format(response_format: ResponseFormat) -> ResponseFormat: 46 | """Patch the OpenAI response format to handle Pydantic models, including nested models. 47 | 48 | The following fields are not supported by OpenAI: 49 | - date 50 | - datetime 51 | - time 52 | - timedelta 53 | 54 | This function patches the response format to handle these fields. We convert them to strings and 55 | then convert them back to the original type. 56 | """ 57 | from datetime import date, datetime, time, timedelta 58 | 59 | def patch_pydantic_field_annotation(annotation: AnnotationType) -> AnnotationType: 60 | if annotation in [date, datetime, time, timedelta]: 61 | return str 62 | elif get_origin(annotation) is Union: 63 | return Union[tuple([patch_pydantic_field_annotation(a) for a in get_args(annotation)])] 64 | elif get_origin(annotation) is List: 65 | return List[patch_pydantic_field_annotation(get_args(annotation)[0])] 66 | elif isinstance(annotation, type) and issubclass(annotation, BaseModel): 67 | return patch_pydantic_model(annotation) 68 | else: 69 | return annotation 70 | 71 | def patch_pydantic_model(model: Type[BaseModel]) -> Type[BaseModel]: 72 | # Copy the fields from the base class 73 | fields = model.model_fields.copy() 74 | new_fields: Dict[str, Tuple[AnnotationType, Any]] = { 75 | field_name: (patch_pydantic_field_annotation(field.annotation), field) 76 | for field_name, field in fields.items() 77 | } 78 | # Create a new model with the subset of fields 79 | return create_model(f"{model.__name__}_patched", __base__=BaseModel, **new_fields) 80 | 81 | return patch_pydantic_model(response_format) 82 | 83 | 84 | def jsonschema_to_model(schema: Dict) -> Type[BaseModel]: 85 | """Generate a Pydantic Model from a json schema. 86 | 87 | Args: 88 | schema: Source json schema to create Pydantic model from 89 | 90 | Returns: 91 | The newly created and loaded Pydantic class 92 | """ 93 | class_name = schema.get("title", "Model") 94 | # Sanitize the schema to remove regex patterns unsupported by pydantic-core 95 | sanitized_schema = sanitize_json_schema_for_pydantic_core(schema) 96 | json_schema = json.dumps(sanitized_schema) 97 | model = jsonschemastr_to_model(json_schema, class_name) 98 | return model 99 | 100 | 101 | @lru_cache(maxsize=16) 102 | def jsonschemastr_to_model(json_schema: str, class_name: str) -> Type[BaseModel]: 103 | """Generate a Pydantic Model from a json schema string. 104 | 105 | Note (spillai): We use this to cache the generated models to avoid recompiling them. 106 | 107 | Args: 108 | schema: Source json schema to create Pydantic model from 109 | 110 | Returns: 111 | The newly created and loaded Pydantic class 112 | """ 113 | # Ref: https://github.com/koxudaxi/datamodel-code-generator/issues/278 114 | with TemporaryDirectory() as tmp_dirname: 115 | tmp_dir = Path(tmp_dirname) 116 | tmp_path = Path(tmp_dir / "tempmodel.py") 117 | generate( 118 | json_schema, 119 | input_file_type=InputFileType.JsonSchema, 120 | class_name=class_name, 121 | output=tmp_path, 122 | output_model_type=DataModelType.PydanticV2BaseModel, 123 | ) 124 | spec = importlib.util.spec_from_file_location("models", str(tmp_path)) 125 | if spec and spec.loader: 126 | module = importlib.util.module_from_spec(spec) 127 | sys.modules[spec.name] = module 128 | spec.loader.exec_module(module) 129 | return getattr(module, class_name) 130 | raise ImportError("Failed to import generated model") # pragma: no cover 131 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/finance/balance_sheet.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from datetime import date 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class HeaderInformation(BaseModel): 7 | company_name: Optional[str] = Field(None, description="Name of the company") 8 | report_title: Optional[str] = Field(None, description="Title of the report (e.g., 'Consolidated Balance Sheet')") 9 | reporting_date: Optional[date] = Field(None, description="Date of the balance sheet") 10 | reporting_period: Optional[str] = Field(None, description="Period covered by the report (e.g., 'December 31, 2023')") 11 | currency: Optional[str] = Field(None, description="Currency used in the report (e.g., 'USD', 'EUR')") 12 | units: Optional[str] = Field(None, description="Units of measurement (e.g., 'millions', 'thousands')") 13 | accounting_standard: Optional[str] = Field(None, description="Accounting standard used (e.g., 'IFRS', 'GAAP')") 14 | is_consolidated: Optional[bool] = Field(None, description="Whether this is a consolidated statement") 15 | auditor: Optional[str] = Field(None, description="Name of the auditing firm") 16 | audit_opinion: Optional[str] = Field(None, description="Type of audit opinion (e.g., 'Unqualified', 'Qualified')") 17 | 18 | 19 | class CurrentAssets(BaseModel): 20 | cash_and_equivalents: Optional[float] = Field(None, description="Cash and highly liquid assets convertible to cash within 90 days") 21 | marketable_securities: Optional[float] = Field(None, description="Short-term investments that can be readily converted to cash") 22 | accounts_receivable: Optional[float] = Field(None, description="Money owed to the company by customers for goods/services delivered") 23 | inventories: Optional[float] = Field(None, description="Raw materials, work-in-progress, and finished goods held for sale") 24 | prepaid_expenses: Optional[float] = Field(None, description="Expenses paid in advance that haven't yet been incurred") 25 | other_current_assets: Optional[float] = Field(None, description="Any other assets expected to be converted to cash within one year") 26 | total_current_assets: Optional[float] = Field(None, description="Sum of all current assets") 27 | 28 | 29 | class NonCurrentAssets(BaseModel): 30 | property_plant_equipment: Optional[float] = Field(None, description="Net value of physical assets like buildings, machinery, and equipment") 31 | intangible_assets: Optional[float] = Field(None, description="Non-physical assets like patents, trademarks, goodwill, and software") 32 | long_term_investments: Optional[float] = Field(None, description="Investments intended to be held for more than one year") 33 | other_non_current_assets: Optional[float] = Field(None, description="Any other assets not expected to be converted to cash within one year") 34 | total_non_current_assets: Optional[float] = Field(None, description="Sum of all non-current assets") 35 | 36 | 37 | class Assets(BaseModel): 38 | current_assets: Optional[CurrentAssets] = Field(None, description="Assets expected to be converted to cash within one year") 39 | non_current_assets: Optional[NonCurrentAssets] = Field(None, description="Assets expected to provide economic benefits beyond one year") 40 | total_assets: Optional[float] = Field(None, description="Sum of all assets (current and non-current)") 41 | 42 | 43 | class CurrentLiabilities(BaseModel): 44 | accounts_payable: Optional[float] = Field(None, description="Money owed to suppliers for goods/services received") 45 | short_term_debt: Optional[float] = Field(None, description="Debt due within one year, including current portion of long-term debt") 46 | accrued_expenses: Optional[float] = Field(None, description="Expenses recognized but not yet paid") 47 | income_taxes_payable: Optional[float] = Field(None, description="Taxes owed but not yet paid") 48 | deposits: Optional[float] = Field(None, description="Customer deposits or other funds held temporarily") 49 | other_current_liabilities: Optional[float] = Field(None, description="Any other obligations due within one year") 50 | total_current_liabilities: Optional[float] = Field(None, description="Sum of all current liabilities") 51 | 52 | 53 | class NonCurrentLiabilities(BaseModel): 54 | long_term_debt: Optional[float] = Field(None, description="Debt obligations due beyond one year") 55 | lease_liabilities: Optional[float] = Field(None, description="Long-term lease obligations") 56 | other_non_current_liabilities: Optional[float] = Field(None, description="Any other obligations due beyond one year") 57 | total_non_current_liabilities: Optional[float] = Field(None, description="Sum of all non-current liabilities") 58 | 59 | 60 | class Liabilities(BaseModel): 61 | current_liabilities: Optional[CurrentLiabilities] = Field(None, description="Obligations due within one year") 62 | non_current_liabilities: Optional[NonCurrentLiabilities] = Field(None, description="Obligations due beyond one year") 63 | total_liabilities: Optional[float] = Field(None, description="Sum of all liabilities (current and non-current)") 64 | 65 | 66 | class ShareholdersEquity(BaseModel): 67 | common_stock: Optional[float] = Field(None, description="Par value of issued common shares") 68 | preferred_stock: Optional[float] = Field(None, description="Par value of issued preferred shares") 69 | additional_paid_in_capital: Optional[float] = Field(None, description="Amount paid by shareholders above par value") 70 | retained_earnings: Optional[float] = Field(None, description="Accumulated profits not distributed to shareholders") 71 | total_equity: Optional[float] = Field(None, description="Sum of all shareholders' equity components") 72 | 73 | 74 | class BalanceSheet(BaseModel): 75 | header: Optional[HeaderInformation] = Field(None, description="General information about the company and report") 76 | assets: Optional[Assets] = Field(None, description="Resources owned or controlled by the company") 77 | liabilities: Optional[Liabilities] = Field(None, description="Obligations and debts owed by the company") 78 | equity: Optional[ShareholdersEquity] = Field(None, description="Residual interest in the assets after deducting liabilities") 79 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/document/resume.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import Dict, List, Optional 3 | 4 | from pydantic import BaseModel, Field, HttpUrl 5 | 6 | 7 | class ContactInfo(BaseModel): 8 | full_name: str = Field(..., description="Full name of the individual.") 9 | email: Optional[str] = Field(None, description="Email address.") 10 | phone: Optional[str] = Field(None, description="Phone number.") 11 | address: Optional[str] = Field(None, description="Physical address.") 12 | linkedin: Optional[HttpUrl] = Field(None, description="LinkedIn profile URL.") 13 | github: Optional[HttpUrl] = Field(None, description="GitHub profile URL.") 14 | portfolio: Optional[HttpUrl] = Field(None, description="Portfolio website URL.") 15 | google_scholar: Optional[HttpUrl] = Field(None, description="Google Scholar profile URL.") 16 | 17 | 18 | class Education(BaseModel): 19 | institution: Optional[str] = Field(None, description="Name of the educational institution.") 20 | degree: Optional[str] = Field(None, description="Degree obtained or pursued.") 21 | field_of_study: Optional[str] = Field(None, description="Field of study or major.") 22 | graduation_date: Optional[date] = Field(None, description="Date of graduation.") 23 | gpa: Optional[float] = Field(None, description="Grade Point Average.") 24 | honors: Optional[List[str]] = Field(None, description="Honors or awards received.") 25 | relevant_courses: Optional[List[str]] = Field(None, description="Relevant courses taken.") 26 | 27 | 28 | class WorkExperience(BaseModel): 29 | company: Optional[str] = Field(None, description="Name of the company.") 30 | position: Optional[str] = Field(None, description="Job title or position held.") 31 | start_date: Optional[date] = Field(None, description="Start date of employment.") 32 | end_date: Optional[date] = Field(None, description="End date of employment.") 33 | is_current: bool = Field(False, description="Indicates if this is the current job.") 34 | responsibilities: List[str] = Field(..., description="Key responsibilities and achievements.") 35 | technologies: Optional[List[str]] = Field(None, description="Technologies or tools used.") 36 | 37 | 38 | class Skill(BaseModel): 39 | name: str = Field(..., description="Name of the skill.") 40 | level: Optional[str] = Field(None, description="Proficiency level (e.g., 'Beginner', 'Intermediate', 'Expert')") 41 | years_of_experience: Optional[float] = Field(None, description="Years of experience with this skill.") 42 | 43 | 44 | class TechnicalSkills(BaseModel): 45 | programming_languages: List[Skill] = Field(..., description="Programming languages.") 46 | frameworks_libraries: List[Skill] = Field(..., description="Frameworks and libraries.") 47 | databases: Optional[List[Skill]] = Field(None, description="Database technologies.") 48 | tools: Optional[List[Skill]] = Field(None, description="Development tools and environments.") 49 | cloud_platforms: Optional[List[Skill]] = Field(None, description="Cloud platforms and services.") 50 | other: Optional[List[Skill]] = Field(None, description="Other technical skills.") 51 | 52 | 53 | class Project(BaseModel): 54 | name: str = Field(..., description="Name of the project.") 55 | description: Optional[str] = Field(None, description="Brief description of the project.") 56 | technologies: Optional[List[str]] = Field(None, description="Technologies or tools used.") 57 | url: Optional[HttpUrl] = Field(None, description="URL to the project or its repository.") 58 | github_url: Optional[HttpUrl] = Field(None, description="GitHub repository URL.") 59 | start_date: Optional[date] = Field(None, description="Start date of the project.") 60 | end_date: Optional[date] = Field(None, description="End date of the project.") 61 | role: Optional[str] = Field(None, description="Role in the project.") 62 | key_achievements: Optional[List[str]] = Field(None, description="Key achievements or features implemented.") 63 | 64 | 65 | class Certification(BaseModel): 66 | name: str = Field(..., description="Name of the certification.") 67 | issuer: str = Field(..., description="Organization that issued the certification.") 68 | date_obtained: Optional[date] = Field(None, description="Date the certification was obtained.") 69 | expiration_date: Optional[date] = Field(None, description="Expiration date of the certification.") 70 | credential_id: Optional[str] = Field(None, description="Credential ID or verification URL.") 71 | 72 | 73 | class OpenSourceContribution(BaseModel): 74 | project_name: str = Field(..., description="Name of the open-source project.") 75 | contribution_type: str = Field( 76 | ..., description="Type of contribution (e.g., 'Bug fix', 'Feature', 'Documentation')" 77 | ) 78 | description: str = Field(..., description="Brief description of the contribution.") 79 | url: Optional[HttpUrl] = Field(None, description="URL to the contribution (e.g., pull request).") 80 | 81 | 82 | class Resume(BaseModel): 83 | contact_info: ContactInfo = Field(..., description="Contact information of the individual.") 84 | summary: Optional[str] = Field(None, description="Professional summary or objective statement.") 85 | education: List[Education] = Field(..., description="Educational background.") 86 | work_experience: List[WorkExperience] = Field(..., description="Work experience.") 87 | technical_skills: TechnicalSkills = Field(..., description="Technical skills.") 88 | projects: Optional[List[Project]] = Field(None, description="Notable projects") 89 | open_source_contributions: Optional[List[OpenSourceContribution]] = Field( 90 | None, description="Open source contributions." 91 | ) 92 | certifications: Optional[List[Certification]] = Field(None, description="Professional certifications.") 93 | publications: Optional[List[str]] = Field(None, description="Publications or technical writing.") 94 | conferences: Optional[List[str]] = Field(None, description="Conferences attended or presented at.") 95 | languages: Optional[List[Skill]] = Field(None, description="Languages known (natural languages).") 96 | volunteer_work: Optional[List[str]] = Field(None, description="Volunteer work or community service.") 97 | interests: Optional[List[str]] = Field(None, description="Personal interests or hobbies.") 98 | references: Optional[str] = Field(None, description="References or note about references.") 99 | additional_sections: Optional[Dict[str, List[str]]] = Field( 100 | None, description="Any additional sections in the resume." 101 | ) 102 | -------------------------------------------------------------------------------- /tests/benchmarks/2025-01-10-llama3.2-vision-11b-instructor-results.md: -------------------------------------------------------------------------------- 1 | ## Benchmark Results (model=llama3.2-vision:11b, date=2025-01-10) 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
Domain Response Model Sample Response JSON
document.invoice Invoice
{
"invoice_id": null,
"period_start": null,
"period_end": null,
"invoice_issue_date": null,
"invoice_due_date": null,
"order_id": null,
"customer_id": null,
"issuer": null,
"issuer_address": null,
"customer": null,
"customer_email": null,
"customer_phone": null,
"customer_billing_address": null,
"customer_shipping_address": null,
"items": null,
"subtotal": null,
"tax": null,
"total": null,
"currency": null,
"notes": null
}
document.receipt Receipt
document.resume Resume
document.us-drivers-license USDriversLicense
document.utility-bill UtilityBill
{
"account_number": null,
"date_mailed": null,
"service_for": null,
"service_address": {
"street": null,
"city": null,
"state": null,
"zip_code": null
},
"billing_period_start": null,
"billing_period_end": null,
"date_due": null,
"amount_due": 88.14,
"previous_balance": null,
"payment_received": null,
"current_charges": null,
"breakdown_of_charges": [
{
"description": "Electricity",
"amount": 50.0,
"usage": null,
"rate": null
},
{
"description": "Water",
"amount": 20.0,
"usage": null,
"rate": null
}
],
"payment_options": [
"Check",
"Online Payment"
],
"contact_information": {
"phone_number": "+1-800-123-4567"
}
}
document.w2-form W2Form
{
"control_number": null,
"ein": null,
"ssn": null,
"employee_name": "Anastasia Hodges",
"employee_address": {
"street": null,
"city": null,
"state": null,
"zip_code": null
},
"employer_name": null,
"employer_address": {
"street": null,
"city": null,
"state": null,
"zip_code": null
},
"wages_tips_other_compensation": 0.0,
"federal_income_tax_withheld": 0.0,
"social_security_wages": 0.0,
"social_security_tax_withheld": 0.0,
"medicare_wages_and_tips": 0.0,
"medicare_tax_withheld": 0.0,
"tax_year": null
}
aerospace.remote-sensing RemoteSensing
healthcare.medical-insurance-card MedicalInsuranceCard
{
"provider_service": {
"provider_service_number": null,
"precertification_number": null
},
"member_information": {
"member_name": "John Doe",
"member_id": "1234567890",
"group_number": "ABC123"
},
"pharmacy_plan": {
"rx_bin": null,
"rx_pcn": null,
"rx_grp": null,
"pharmacy_help_desk": null
},
"insurance_provider": {
"provider_name": "Blue Cross Blue Shield",
"network": "PPO"
},
"coverage": {
"office_visit": null,
"specialist_visit": null,
"urgent_care": null,
"emergency_room": null,
"inpatient_hospital": null
}
}
retail.ecommerce-product-caption RetailEcommerceProductCaption
media.tv-news TVNews
{
"description": "Biden criticises Netanyahu in an interview",
"chyron": null,
"network": "CNN",
"reporters": null
}
media.nfl-game-state NFLGameState
{
"description": null,
"teams": [
{
"name": "GB",
"score": null
},
{
"name": "AZ",
"score": 7
}
],
"status": "in_progress",
"quarter": 2,
"clock_time": "14:56",
"possession_team": "GB",
"down": "1st",
"distance": null,
"yard_line": null,
"network": null,
"is_shown": true
}
media.nba-game-state NBAGameState
{
"description": null,
"teams": [
{
"name": "MIA",
"score": 7
},
{
"name": "SA",
"score": 6
}
],
"status": "in_progress",
"quarter": 1,
"clock_time": "9:09",
"shot_clock": null,
"network": "ESPN",
"is_shown": true
}
60 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/document/form_work_order.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from decimal import Decimal 3 | from enum import Enum 4 | from typing import Optional, List, Union 5 | 6 | from pydantic import BaseModel, Field 7 | 8 | 9 | class CompanyInfo(BaseModel): 10 | """Company information header""" 11 | name: Optional[str] = Field(None, description="Name of the company") 12 | address: Optional[str] = Field(None, description="Street address of the company") 13 | suite: Optional[str] = Field(None, description="Suite or unit number") 14 | city: Optional[str] = Field(None, description="City name") 15 | state: Optional[str] = Field(None, description="State abbreviation") 16 | zip_code: Optional[str] = Field(None, description="ZIP/Postal code") 17 | phone: Optional[str] = Field(None, description="Company phone number") 18 | email: Optional[str] = Field(None, description="Company email address") 19 | 20 | 21 | class ServiceType(str, Enum): 22 | """Types of services offered""" 23 | # Automotive Services 24 | LUBRICATE = "Lubricate" 25 | CHANGE_OIL = "Change Oil" 26 | TRANSMISSION_SERVICE = "Transmission Service" 27 | BATTERY_SERVICE = "Battery Replacement / Check" 28 | DIFFERENTIAL_SERVICE = "Differential Service" 29 | FLAT_TIRE_REPAIR = "Flat Tire Repair" 30 | ADAS_CALIBRATION = "ADAS Calibration" 31 | WIPER_REPLACEMENT = "Wiper Replacement" 32 | CAR_WASH = "Car Wash" 33 | POLISH_DETAILING = "Polish / Detailing" 34 | TIRE_SERVICE = "Tire Rotation and Alignment" 35 | BRAKE_SERVICE = "Brake Inspection / Replacement" 36 | COOLANT_FLUSH = "Coolant System Flush" 37 | AIR_FILTER = "Air Filter Replacement" 38 | SPARK_PLUG = "Spark Plug Replacement" 39 | EXHAUST_CHECK = "Exhaust System Check" 40 | ELECTRONICS = "Electronics" 41 | SUSPENSION_CHECK = "Suspension and Steering Check" 42 | BATTERY_CHECK = "Electric/Hybrid Vehicle Battery Svc" 43 | 44 | # Landscaping Services 45 | AERATION = "Aeration" 46 | BORDERS_EDGING = "Borders / Edging" 47 | FERTILIZATION = "Fertilization" 48 | GARDEN_DESIGN = "Garden Design" 49 | GARDEN_INSTALLATION = "Garden Installation" 50 | HEDGE_TRIMMING = "Hedge Trimming" 51 | IRRIGATION_INSTALL = "Irrigation Installation" 52 | IRRIGATION_MAINTENANCE = "Irrigation Maintenance" 53 | LANDSCAPE_LIGHTING = "Landscape Lighting Installation" 54 | MOWING = "Mowing" 55 | TRIM_MOWING = "Trim Mowing" 56 | MULCHING = "Mulching" 57 | PATIO_DECK_CONSTRUCTION = "Patio / Deck Construction" 58 | PATIO_DECK_MAINTENANCE = "Patio / Deck Maintenance" 59 | PAVING = "Paving / Walkway Installation" 60 | RETAINING_WALL = "Retaining Wall Construction" 61 | SEASONAL_CLEANUP = "Seasonal Clean-up" 62 | SOD_INSTALLATION = "Sod Installation" 63 | TREE_PRUNING = "Tree Pruning" 64 | TREE_TRIMMING = "Tree Trimming" 65 | WEED_CONTROL = "Weed Control" 66 | WINTERIZING = "Winterizing" 67 | 68 | # Electrical Services 69 | ELECTRICAL_REPAIR = "Electrical Repair" 70 | CIRCUIT_BREAKER = "Circuit Breaker Service" 71 | FAN_INSTALLATION = "Fan Installation" 72 | OUTLET_REPAIR = "Outlet Repair" 73 | WIRING_INSTALLATION = "Wiring Installation" 74 | 75 | 76 | class PriorityLevel(str, Enum): 77 | """Priority levels for work orders""" 78 | LOW = "Low" 79 | MEDIUM = "Medium" 80 | HIGH = "High" 81 | 82 | 83 | class VehicleInfo(BaseModel): 84 | """Vehicle information for automotive work orders""" 85 | vin: Optional[str] = Field(None, description="Vehicle Identification Number") 86 | make_model: Optional[str] = Field(None, description="Make and model of the vehicle") 87 | year: Optional[int] = Field(None, description="Year of the vehicle") 88 | odometer: Optional[float] = Field(None, description="Current odometer reading") 89 | license_number: Optional[str] = Field(None, description="License plate number") 90 | state: Optional[str] = Field(None, description="State of registration") 91 | motor_number: Optional[str] = Field(None, description="Motor/Engine number") 92 | 93 | 94 | class LineItem(BaseModel): 95 | """Line item for materials, parts, or labor""" 96 | description: Optional[str] = Field(None, description="Description of the item or service") 97 | quantity: Optional[Decimal] = Field(None, description="Quantity of the item") 98 | price_per_unit: Optional[Decimal] = Field(None, description="Price per unit") 99 | amount: Optional[Decimal] = Field(None, description="Total amount (quantity * price_per_unit)") 100 | part_number: Optional[str] = Field(None, description="Part number if applicable") 101 | 102 | 103 | class WorkOrder(BaseModel): 104 | """Unified schema for all types of work orders""" 105 | 106 | # Company Information 107 | company_info: Optional[CompanyInfo] = Field(None, description="Company information") 108 | 109 | # Basic Information 110 | order_number: Optional[str] = Field(None, description="Work order number/identifier") 111 | order_type: Optional[str] = Field(None, description="Type of work order (Automotive/Landscaping/Electrical)") 112 | 113 | # Client Information 114 | client_name: Optional[str] = Field(None, description="Name of the client") 115 | client_phone: Optional[str] = Field(None, description="Client's phone number") 116 | client_email: Optional[str] = Field(None, description="Client's email address") 117 | service_location: Optional[str] = Field(None, description="Address where service will be performed") 118 | 119 | # Timing Information 120 | order_date: Optional[datetime] = Field(None, description="Date and time the order was created") 121 | start_date: Optional[datetime] = Field(None, description="Expected start date") 122 | end_date: Optional[datetime] = Field(None, description="Expected end date") 123 | date_completed: Optional[datetime] = Field(None, description="Actual completion date") 124 | 125 | # Vehicle Information (for automotive work orders) 126 | vehicle_info: Optional[VehicleInfo] = Field(None, description="Vehicle information for automotive work orders") 127 | 128 | # Service Details 129 | services_requested: Optional[List[ServiceType]] = Field(default_factory=list, description="List of services requested") 130 | job_description: Optional[str] = Field(None, description="Detailed description of work to be performed") 131 | priority_level: Optional[PriorityLevel] = Field(None, description="Priority level of the work order") 132 | 133 | # Cost Information 134 | materials: Optional[List[LineItem]] = Field(default_factory=list, description="List of materials used") 135 | labor_items: Optional[List[LineItem]] = Field(default_factory=list, description="List of labor charges") 136 | materials_total: Optional[Decimal] = Field(None, description="Total cost of materials") 137 | labor_total: Optional[Decimal] = Field(None, description="Total cost of labor") 138 | subtotal: Optional[Decimal] = Field(None, description="Subtotal before tax") 139 | tax_rate: Optional[Decimal] = Field(None, description="Tax rate as a percentage") 140 | tax_amount: Optional[Decimal] = Field(None, description="Calculated tax amount") 141 | total_amount: Optional[Decimal] = Field(None, description="Final total amount") 142 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/contrib/healthcare/pathology_report.py: -------------------------------------------------------------------------------- 1 | from datetime import date, datetime 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class Addendum(BaseModel): 8 | addendum_date: Optional[datetime] = Field(None, description="Date and time the addendum/amendment was created") 9 | reason: Optional[str] = Field(None, description="Reason for the addendum (e.g., corrected data, additional info)") 10 | text: Optional[str] = Field(None, description="Content or changes described in the addendum") 11 | 12 | 13 | class PatientInformation(BaseModel): 14 | patient_name: Optional[str] = Field(None, description="Full name of the patient") 15 | patient_id: Optional[str] = Field(None, description="Medical record number or patient identifier") 16 | date_of_birth: Optional[date] = Field(None, description="Patient's date of birth") 17 | gender: Optional[str] = Field(None, description="Patient's gender") 18 | clinical_history: Optional[str] = Field(None, description="Relevant clinical history") 19 | 20 | 21 | class FacilityInformation(BaseModel): 22 | facility_name: Optional[str] = Field(None, description="Name of the medical facility") 23 | facility_address: Optional[str] = Field(None, description="Facility address or contact information") 24 | 25 | 26 | class GrossExamination(BaseModel): 27 | description: Optional[str] = Field(None, description="Narrative description of the specimen at gross examination") 28 | specimen_measurements: Optional[List[str]] = Field( 29 | None, description="List of measurement strings (e.g. '3 x 2 x 1 cm')" 30 | ) 31 | specimen_weight: Optional[float] = Field(None, description="Weight in grams if relevant") 32 | gross_margins: Optional[str] = Field(None, description="Gross margin description (if applicable)") 33 | cassette_details: Optional[List[str]] = Field( 34 | None, description="Details of cassettes used, how tissue was sectioned" 35 | ) 36 | gross_findings: Optional[List[str]] = Field(None, description="Key gross findings (e.g., necrosis, hemorrhage)") 37 | 38 | 39 | class MicroscopicExamination(BaseModel): 40 | description: Optional[str] = Field(None, description="Detailed microscopic or histological findings") 41 | cellular_features: Optional[str] = Field(None, description="Cellular characteristics (atypia, pleomorphism, etc.)") 42 | architectural_features: Optional[str] = Field(None, description="Architecture (glandular, papillary, etc.)") 43 | microscopic_margins: Optional[str] = Field(None, description="Microscopic margin status or description") 44 | lymphovascular_invasion: Optional[bool] = Field( 45 | None, description="Presence/absence of lymphovascular invasion (LVI)" 46 | ) 47 | perineural_invasion: Optional[bool] = Field(None, description="Presence/absence of perineural invasion (PNI)") 48 | additional_findings: Optional[List[str]] = Field(None, description="Other significant findings") 49 | 50 | 51 | class SpecimenDiagnosis(BaseModel): 52 | diagnosis_text: Optional[str] = Field(None, description="Summary of the pathological diagnosis or interpretation") 53 | additional_notes: Optional[str] = Field(None, description="Any extra notes or classification details") 54 | 55 | 56 | class ImmunohistochemistryResults(BaseModel): 57 | marker_name: Optional[str] = Field(None, description="Name of IHC marker (e.g., ER, PR, CD20)") 58 | result: Optional[str] = Field(None, description="Interpretation (Positive, Negative, etc.)") 59 | percentage: Optional[float] = Field( 60 | None, description="Approx. percentage of positive cells if known (value between 0.0 and 1.0)" 61 | ) 62 | intensity: Optional[str] = Field(None, description="Intensity of staining (e.g., 1+, 2+, 3+)") 63 | pattern: Optional[str] = Field(None, description="Staining pattern (nuclear, cytoplasmic, membranous)") 64 | control_validity: Optional[bool] = Field(None, description="Whether control stain was valid") 65 | 66 | 67 | class MolecularStudies(BaseModel): 68 | test_name: Optional[str] = Field(None, description="Name of molecular test (e.g., EGFR, KRAS, BRAF)") 69 | result: Optional[str] = Field(None, description="Result or interpretation (e.g., Mutated, Wild-type, Negative)") 70 | methodology: Optional[str] = Field(None, description="Method used (PCR, NGS, FISH, etc.)") 71 | interpretation: Optional[str] = Field(None, description="Clinical or pathological significance if known") 72 | 73 | 74 | class SpecimenInformation(BaseModel): 75 | specimen_id: Optional[str] = Field( 76 | None, description="Unique identifier or label for the specimen (e.g., 'Specimen A')" 77 | ) 78 | specimen_source: Optional[str] = Field( 79 | None, description="Combined anatomic site/type (e.g., 'Biopsy of right lung')" 80 | ) 81 | collection_date: Optional[datetime] = Field(None, description="Date/time of specimen collection") 82 | received_date: Optional[datetime] = Field(None, description="Date/time specimen was received in lab") 83 | preservation: Optional[str] = Field(None, description="Preservation method if relevant (e.g., Formalin)") 84 | 85 | gross_examination: Optional[List[GrossExamination]] = Field(None, description="List of gross exam details") 86 | microscopic_examination: Optional[List[MicroscopicExamination]] = Field( 87 | None, description="List of microscopic exam details" 88 | ) 89 | 90 | immunohistochemistry: Optional[List[ImmunohistochemistryResults]] = Field( 91 | None, description="Any IHC results for this specimen" 92 | ) 93 | molecular_studies: Optional[List[MolecularStudies]] = Field( 94 | None, description="Any molecular tests on this specimen" 95 | ) 96 | 97 | diagnosis: Optional[SpecimenDiagnosis] = Field( 98 | None, description="Diagnosis/interpretation specific to this specimen" 99 | ) 100 | 101 | 102 | class PathologyReport(BaseModel): 103 | accession_number: Optional[str] = Field(None, description="Unique report identifier (accession)") 104 | report_type: Optional[str] = Field( 105 | None, description="Type of laboratory report (e.g., 'Surgical Pathology', 'Cytology')" 106 | ) 107 | report_date: Optional[datetime] = Field(None, description="Date/time report was generated or finalized") 108 | report_status: Optional[str] = Field(None, description="Status of the report") 109 | 110 | patient: Optional[PatientInformation] = Field(None, description="Patient demographics (all optional)") 111 | facility: Optional[FacilityInformation] = Field(None, description="Facility/lab info, simplified") 112 | 113 | specimens: Optional[List[SpecimenInformation]] = Field( 114 | None, description="List of all specimens examined in this report" 115 | ) 116 | 117 | integrated_diagnosis: Optional[str] = Field( 118 | None, description="An overall or integrated interpretation across specimens" 119 | ) 120 | clinical_notes: Optional[str] = Field(None, description="High-level clinical notes, if relevant") 121 | comments: Optional[str] = Field(None, description="General comments, disclaimers, or remarks") 122 | 123 | pathologist_name: Optional[str] = Field(None, description="Name of the reporting laboratory professional") 124 | 125 | addenda: Optional[List[Addendum]] = Field(None, description="Any amendments or addenda appended to this report") 126 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/document/utility_bill.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import Dict, List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class ChargeDetail(BaseModel): 8 | description: Optional[str] = Field(None, description="Description of the specific charge or service.") 9 | amount: Optional[float] = Field(None, description="Amount charged for the specific service or item.") 10 | currency: Optional[str] = Field(None, description="3-letter currency code for the amount, if different from the main bill currency.") 11 | usage: Optional[str] = Field(None, description="Usage details, such as '31 kWh' or '10 CCF'.") 12 | rate: Optional[float] = Field(None, description="Rate per unit for the service or item.") 13 | period_start: Optional[date] = Field(None, description="Start date for this specific charge, if applicable.") 14 | period_end: Optional[date] = Field(None, description="End date for this specific charge, if applicable.") 15 | 16 | 17 | class Address(BaseModel): 18 | street: Optional[str] = Field(None, description="Street address, including apartment or suite number.") 19 | city: Optional[str] = Field(None, description="City of the address.") 20 | state: Optional[str] = Field(None, description="State, province, or region of the address.") 21 | zip_code: Optional[str] = Field(None, description="Postal or ZIP code of the address.") 22 | country: Optional[str] = Field(None, description="Country of the address.") 23 | 24 | 25 | class ProviderContactInfo(BaseModel): 26 | phone_numbers: Optional[List[str]] = Field(None, description="List of contact phone numbers for the provider.") 27 | email_addresses: Optional[List[str]] = Field(None, description="List of contact email addresses for the provider.") 28 | websites: Optional[List[str]] = Field( 29 | None, description="List of relevant websites for the provider (e.g., customer portal, payment page)." 30 | ) 31 | customer_service_hours: Optional[str] = Field(None, description="Operating hours for customer service.") 32 | 33 | 34 | class UsageDetail(BaseModel): 35 | period_description: str = Field( 36 | ..., description="Description of the period (e.g., 'Current Period', 'Last Period', 'Year Ago Period')." 37 | ) 38 | usage_value: float = Field(..., description="The usage value for that period.") 39 | # unit is part of the parent UsageSummary 40 | 41 | 42 | class UsageSummary(BaseModel): 43 | service_type: str = Field(..., description="Type of service (e.g., 'Electric', 'Gas', 'Water').") 44 | unit: str = Field(..., description="Unit of measurement for the usage_value (e.g., 'kWh', 'Therms', 'Gallons').") 45 | details: List[UsageDetail] = Field(..., description="List of usage details for different periods or categories.") 46 | meter_number: Optional[str] = Field(None, description="Identifier for the meter associated with this usage.") 47 | reading_type: Optional[str] = Field(None, description="Type of reading (e.g., 'Actual', 'Estimated').") 48 | 49 | 50 | class UtilityBill(BaseModel): 51 | # Provider Information 52 | provider_name: Optional[str] = Field(None, description="Name of the utility provider .") 53 | provider_address: Optional[Address] = Field(None, description="Address of the utility provider.") 54 | provider_logo_description: Optional[str] = Field(None, description="Textual description of the provider's logo if present.") 55 | contact_information: Optional[ProviderContactInfo] = Field( 56 | None, description="Contact details for the utility provider." 57 | ) 58 | 59 | # Account and Statement Information 60 | account_number: Optional[str] = Field(None, description="The unique identifier for the utility account.") 61 | statement_date: Optional[date] = Field(None, description="The date the bill or statement was issued (previously date_mailed).") 62 | statement_title: Optional[str] = Field(None, description="Title of the statement (e.g., 'ENERGY STATEMENT', 'Water Bill').") 63 | invoice_number: Optional[str] = Field(None, description="Invoice or bill number, if different from account number.") # Added for completeness 64 | 65 | # Customer Information 66 | service_for: Optional[str] = Field(None, description="Name of the entity or person the service is billed to.") 67 | service_address: Optional[Address] = Field(None, description="The address where the utility services are provided.") 68 | billing_address: Optional[Address] = Field(None, description="The mailing address for the bill, if different from service address.") # Added for completeness 69 | 70 | # Billing Period and Due Dates 71 | billing_period_start: Optional[date] = Field(None, description="The start date of the billing period covered by this bill.") 72 | billing_period_end: Optional[date] = Field(None, description="The end date of the billing period covered by this bill.") 73 | date_due: Optional[date] = Field(None, description="The due date for bill payment.") 74 | 75 | # Financial Summary 76 | currency: Optional[str] = Field(None, description="3-letter currency code for amounts on the bill (e.g., USD, CAD, EUR).") 77 | amount_due: Optional[float] = Field(None, description="The total amount payable by the due date.") 78 | previous_balance: Optional[float] = Field(None, description="The balance carried over from the previous billing cycle.") 79 | previous_unpaid_balance: Optional[float] = Field( 80 | None, description="Balance remaining from previous periods after payments were applied." 81 | ) 82 | payment_received: Optional[float] = Field(None, description="Total payments received and applied since the last bill.") 83 | adjustments_credits: Optional[float] = Field(None, description="Total adjustments or credits applied during this billing period.") # Added for completeness 84 | current_charges: Optional[float] = Field(None, description="Total new charges for the current billing cycle.") 85 | 86 | # Charges Breakdown 87 | breakdown_of_charges: Optional[List[ChargeDetail]] = Field( 88 | None, description="Itemized list of charges, services, taxes, and fees with descriptions and amounts." 89 | ) 90 | 91 | # Usage Information 92 | usage_summaries: Optional[List[UsageSummary]] = Field( 93 | None, description="Summaries of utility usage, possibly comparing different periods or types of service." 94 | ) 95 | 96 | # Payment Information 97 | payment_options: Optional[List[str]] = Field( 98 | None, description="Accepted methods for bill payment (e.g., 'Online at provider.com/pay', 'Mail-in check')." 99 | ) 100 | payment_instructions: Optional[str] = Field( 101 | None, description="Specific instructions for making a payment (e.g., 'Return this portion with your payment')." 102 | ) 103 | payment_remittance_address: Optional[Address] = Field(None, description="Address to mail payments to.") 104 | 105 | # Miscellaneous 106 | important_messages: Optional[List[str]] = Field( 107 | None, description="List of important messages, announcements, or regulatory notices on the bill." 108 | ) 109 | page_information: Optional[str] = Field( 110 | None, description="Page numbering or other page-specific information (e.g., 'Page 1 of 2')." 111 | ) 112 | notes: Optional[str] = Field(None, description="General notes or miscellaneous information on the bill.") # Added for completeness 113 | -------------------------------------------------------------------------------- /docs/SCHEMA-GUIDELINES.md: -------------------------------------------------------------------------------- 1 | # Schema Guidelines 2 | 3 | Thank you for contributing to the VLM Run Hub! To maintain consistency and adhere to industry best practices, please follow these guidelines when creating a new schema. 4 | 5 | 6 | ## ✏️ Guidelines for Writing a Schema 7 | 8 | 1. **Use Pydantic’s BaseModel**: All schemas must inherit from Pydantic’s `BaseModel`. 9 | ```python 10 | from pydantic import BaseModel 11 | 12 | class ExampleSchema(BaseModel): 13 | ... 14 | ``` 15 | 16 | 2. **Strongly-Typed Fields**: Define each field with precise, strongly-typed annotations (e.g., `str`, `int`, `float`, `list`, `dict`). 17 | 18 | 3. **Optional Fields**: Use `Optional[...]` to define optional fields. This is critical as some fields may not be present in the document, and we want to make sure that Pydantic data validation does not fail when the JSON returned does not contain the relevant key. 19 | 20 | Example: 21 | ```python 22 | class CustomerInvoice(BaseModel): 23 | invoice_id: str = Field(..., description="The invoice number, typically represented as a string of alphanumeric characters.") 24 | ... 25 | invoice_email: Optional[str] = Field(None, description="The email address of the customer, typically represented as a string of alphanumeric characters.") 26 | ``` 27 | In the example above, the `invoice_email` field is optional as it may or may not be present in the input document. If it is not present in the JSON, the Pydantic model will not fail the schema validation. If the field is present in the JSON, the Pydantic model will validate the field against the schema. 28 | 29 | 4. **Descriptive Field Names**: Use clear, descriptive, and `snake_case` field names, along with a short `description` field that explains the field's purpose. This is critical for the model to interpret the field to be mapped from. 30 | 31 | Good example: 32 | ```python 33 | class CustomerInvoice(BaseModel): 34 | invoice_number: str = Field(..., description="The invoice number, typically represented as a string of alphanumeric characters.") 35 | ``` 36 | 37 | Bad example: 38 | ```python 39 | class CustomerInvoice(BaseModel): 40 | invoice_number: str = Field(..., description="The invoice number.") 41 | ``` 42 | 43 | 5. **Field Metadata**: 44 | - Use the `Field` class to provide: 45 | - `default`: If applicable (e.g., `Field(None, ...)`). 46 | - `description`: Include a short, clear explanation of the field’s purpose. (e.g., `Field(..., description="The invoice number, typically represented as a string of alphanumeric characters.")`) 47 | - Other constraints: For validation (e.g., `max_length`, `regex`). 48 | - Validation: Add custom validators where necessary to enforce domain-specific rules. 49 | 50 | 6. **Nested Models**: Use nested Pydantic models for complex structures (e.g., lists of dictionaries). 51 | 52 | ```python 53 | class CustomerInvoice(BaseModel): 54 | invoice_number: str = Field(..., description="The invoice number, typically represented as a string of alphanumeric characters.") 55 | items: list[Item] = Field(..., description="A list of items in the invoice.") 56 | ``` 57 | 58 | 7. **Enums**: Use enums or `Literal` for fixed choices. 59 | 60 | Using `Enum`: 61 | ```python 62 | class Status(Enum): 63 | pending = "pending" 64 | paid = "paid" 65 | cancelled = "cancelled" 66 | 67 | class CustomerInvoice(BaseModel): 68 | ... 69 | status: Status = Field(..., description="The status of the invoice, which can be either 'pending', 'paid', or 'cancelled'.") 70 | ``` 71 | 72 | Using `Literal`: 73 | ```python 74 | class CustomerInvoice(BaseModel): 75 | status: Literal["pending", "paid", "cancelled"] = Field(..., description="The status of the invoice, which can be either 'pending', 'paid', or 'cancelled'.") 76 | ``` 77 | 78 | ### ✅ Schema Review Checklist 79 | 80 | Before submitting your schema: 81 | 82 | - [ ] **Field Types**: Ensure all fields are strongly-typed. 83 | - [ ] **Field Metadata**: Check that all fields include descriptions and constraints where applicable. 84 | - [ ] **Optional Fields**: Use `Optional[...]` to define optional fields. 85 | - [ ] **Validation**: Add custom validators for domain-specific rules. 86 | - [ ] **Reusability**: Use nested models for complex types and avoid redundancy. 87 | - [ ] **Tests**: Provide unit tests to validate the schema against valid and invalid data. 88 | 89 | ### 👩‍💻 Adding a New Schema to the Hub 90 | 91 | 1. **Create a new schema file**: Create a new file in the [`schemas/contrib`](../vlmrun/hub/schemas/contrib) directory, under the appropriate industry and use case (e.g., `schemas/contrib/retail/ecommerce_product_caption.py`). Follow the [Schema Guidelines](#✏️-guidelines-for-writing-a-schema) to write the schema. 92 | 93 | 2. **Add sample image, prompt and schema reference in `catalog.yaml`:** Add a sample image for the schema, a prompt that can be used with VLMs to appropriately extract the JSON, and a reference to the schema in the [`contrib/catalog.yaml`](../vlmrun/hub/contrib/catalog.yaml) file. You can also refer to the [Catalog Specification Guidelines](./catalog-spec.yaml) for more information on the catalog format. 94 | 95 | Example: 96 | ```yaml 97 | - domain: media.nfl-game-state 98 | schema: vlmrun.hub.schemas.contrib.media.nfl_game_state.NFLGameState 99 | prompt: "You are a detail-oriented NFL Game Analyst. Extract all the relevant game state information from the video feed or screenshot as accurately as possible." 100 | description: "NFL game state extraction system that processes game footage or screenshots to extract structured information including teams, scores, game clock, possession, and other relevant game state details." 101 | sample_data: "https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.nfl-game-state/packers_cardinals_screenshot.png" 102 | metadata: 103 | supported_inputs: ["image", "video"] 104 | tags: ["media", "sports"] 105 | ``` 106 | 107 | 3. **Test the schema against the sample data**: Run the following test to ensure the schema is working as expected. Let's say you just added the above schema with `domain=media.nfl-game-state`. 108 | 109 | You can run: 110 | ```bash 111 | pytest -sv tests/test_instructor.py -k test_instructor_hub_sample --domain media.nfl-game-state 112 | ``` 113 | 114 | This will download the sample data from the URL and call [Instructor](https://github.com/jxnl/instructor/) with `gpt-4o-mini` to generate a JSON output against the schema. It will then validate the JSON output against the schema and print the output to the console. 115 | 116 | You will see the output in the console. 117 | Example: 118 | ```bash 119 | { 120 | "description": null, 121 | "teams": [ 122 | { 123 | "name": "Green Bay Packers", 124 | "score": 0 125 | }, 126 | { 127 | "name": "Arizona Cardinals", 128 | "score": 7 129 | } 130 | ], 131 | "status": "in_progress", 132 | "quarter": 2, 133 | "clock_time": "12:12", 134 | "possession_team": "Green Bay Packers", 135 | "down": "2nd", 136 | "distance": 10, 137 | "yard_line": -10, 138 | "network": "NBC", 139 | "is_shown": true 140 | } 141 | ``` 142 | 143 | > [!NOTE] 144 | > You can optionally change the provider and model to test against different models. For example, to test against `llama3.2-vision:11b` using `ollama`, you can run: 145 | > ```bash 146 | > pytest -sv tests/test_instructor.py -k test_instructor_hub_sample --domain media.nfl-game-state --provider ollama --model llama3.2-vision:11b 147 | > ``` 148 | -------------------------------------------------------------------------------- /vlmrun/hub/schemas/document/invoice.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class Address(BaseModel): 8 | street: Optional[str] = Field(None, description="Street address") 9 | city: Optional[str] = Field(None, description="City") 10 | state: Optional[str] = Field(None, description="State, province, or region") 11 | postal_code: Optional[str] = Field(None, description="Postal or ZIP code") 12 | country: Optional[str] = Field(None, description="Country") 13 | 14 | 15 | class BankDetails(BaseModel): 16 | bank_name: Optional[str] = Field(None, description="Name of the bank") 17 | account_holder_name: Optional[str] = Field(None, description="Name of the account holder") 18 | account_number: Optional[str] = Field(None, description="Bank account number") 19 | routing_number: Optional[str] = Field(None, description="Bank routing number (e.g., ABA for US)") 20 | bsb_number: Optional[str] = Field(None, description="Branch Sort Code (BSB) or similar local bank code") 21 | iban: Optional[str] = Field(None, description="International Bank Account Number (IBAN)") 22 | swift_bic: Optional[str] = Field(None, description="SWIFT/BIC code") 23 | 24 | 25 | class Item(BaseModel): 26 | description: Optional[str] = Field(None, description="Description or name of the item or service") 27 | quantity: Optional[float] = Field(None, description="Quantity of the item, which can be a float for hours or partial units") 28 | unit_price: Optional[float] = Field(None, description="Unit price or rate of the item") 29 | 30 | # This is typically quantity * unit_price. 31 | total_price: Optional[float] = Field(None, description="Total price for this line item, typically quantity multiplied by unit price, before item-specific adjustments/taxes") 32 | 33 | currency: Optional[str] = Field(None, description="3-digit currency code for this item, if it differs from the main invoice currency") 34 | 35 | item_tax_details: Optional[str] = Field(None, description="Tax details or rate specific to this item (e.g., 'VAT 0%[1]', 'Sales Tax Exempt')") 36 | item_adjustment_details: Optional[str] = Field(None, description="Adjustment or discount details specific to this item (e.g., '10% off promo', 'Volume discount')") 37 | # If item-level tax/adjustment amounts are needed, they can be added: 38 | # item_tax_amount: Optional[float] = Field(None, description="Tax amount for this specific item") 39 | # item_adjustment_amount: Optional[float] = Field(None, description="Adjustment/discount amount for this specific item") 40 | 41 | 42 | class Invoice(BaseModel): 43 | # Core Invoice Information 44 | invoice_id: Optional[str] = Field(None, description="Unique invoice identifier or number") 45 | invoice_issue_date: Optional[date] = Field(None, description="Date when the invoice was issued") 46 | invoice_due_date: Optional[date] = Field(None, description="Date by which the invoice payment is due") 47 | period_start: Optional[date] = Field(None, description="Start date of the billing period covered by the invoice, if applicable") 48 | period_end: Optional[date] = Field(None, description="End date of the billing period covered by the invoice, if applicable") 49 | 50 | # Related Identifiers 51 | order_id: Optional[str] = Field(None, description="Unique order identifier related to this invoice, if applicable") 52 | customer_id: Optional[str] = Field(None, description="Unique customer identifier, if applicable") 53 | 54 | # Issuer Details 55 | issuer_name: Optional[str] = Field(None, description="Name of the invoice issuer (company or individual)") 56 | issuer_address: Optional[Address] = Field(None, description="Address of the invoice issuer") 57 | issuer_email: Optional[str] = Field(None, description="Email address of the invoice issuer") 58 | issuer_phone: Optional[str] = Field(None, description="Phone number of the invoice issuer") 59 | issuer_vat_id: Optional[str] = Field(None, description="VAT identification number or other tax ID of the issuer") 60 | issuer_website: Optional[str] = Field(None, description="Website of the invoice issuer") 61 | 62 | # Customer/Recipient Details 63 | customer_name: Optional[str] = Field(None, description="Name of the invoice recipient (company or individual)") 64 | customer_email: Optional[str] = Field(None, description="Email address of the recipient") 65 | customer_phone: Optional[str] = Field(None, description="Phone number of the recipient") 66 | customer_billing_address: Optional[Address] = Field(None, description="Billing address of the recipient") 67 | customer_shipping_address: Optional[Address] = Field(None, description="Shipping address of the recipient, if different from billing") 68 | customer_vat_id: Optional[str] = Field(None, description="VAT identification number or other tax ID of the customer") 69 | 70 | # Invoice Line Items 71 | items: Optional[List[Item]] = Field(None, description="List of items or services detailed in the invoice") 72 | 73 | # Financial Summary 74 | currency: Optional[str] = Field(None, description="Primary 3-digit currency code for the invoice amounts (e.g., USD, EUR)") 75 | subtotal: Optional[float] = Field(None, description="Total amount of all line items before any discounts, taxes, and shipping") 76 | 77 | discount_amount: Optional[float] = Field(None, description="Total discount amount applied to the invoice subtotal") 78 | discount_percentage: Optional[float] = Field(None, description="Overall discount percentage applied to the invoice") 79 | discount_description: Optional[str] = Field(None, description="Description of the discount applied (e.g., 'Early payment discount', 'Volume discount')") 80 | 81 | shipping_cost: Optional[float] = Field(None, description="Shipping and handling charges") 82 | shipping_description: Optional[str] = Field(None, description="Description of shipping charges or method") 83 | 84 | tax_amount: Optional[float] = Field(None, description="Total tax amount for the invoice") 85 | tax_percentage: Optional[float] = Field(None, description="Overall tax rate percentage applied to the taxable amount") 86 | tax_description: Optional[str] = Field(None, description="Description of the tax applied, such as type, rate, or jurisdiction") 87 | overall_tax_notes: Optional[str] = Field(None, description="Additional notes regarding taxes for the entire invoice (e.g., 'Tax to be paid on reverse charge basis')") 88 | 89 | total_amount: Optional[float] = Field(None, description="The final total amount of the invoice after all deductions and additions (subtotal - discounts + shipping + taxes)") 90 | amount_paid: Optional[float] = Field(None, description="Amount already paid by the customer towards this invoice") 91 | balance_due: Optional[float] = Field(None, description="Remaining amount due for payment (total_amount - amount_paid)") 92 | 93 | # Payment Information 94 | payment_terms: Optional[str] = Field(None, description="Payment terms and conditions (e.g., 'Net 30 days', 'Due upon receipt')") 95 | payment_instructions: Optional[str] = Field(None, description="Specific instructions for making payment") 96 | payment_link: Optional[str] = Field(None, description="A URL for online payment, if available") 97 | bank_details: Optional[BankDetails] = Field(None, description="Bank account details for payment transfers") 98 | 99 | # Miscellaneous 100 | notes: Optional[str] = Field(None, description="General notes, comments, or miscellaneous information on the invoice") 101 | terms_and_conditions: Optional[str] = Field(None, description="General terms and conditions related to the invoice or service") 102 | footer_text: Optional[str] = Field(None, description="Text appearing in the footer of the invoice (e.g., thank you message, company slogan)") 103 | page_information: Optional[str] = Field(None, description="Page numbering or other page-specific information (e.g., 'Page 1 of 1')") 104 | logo_url: Optional[str] = Field(None, description="URL of the company logo displayed on the invoice") 105 | -------------------------------------------------------------------------------- /tests/benchmarks/2025-01-10-gemini-2.0-flash-exp-instructor-results.md: -------------------------------------------------------------------------------- 1 | ## Benchmark Results (model=gemini-2.0-flash-exp, date=2025-01-10) 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
Domain Response Model Sample Response JSON
document.invoice Invoice
document.receipt Receipt
document.resume Resume
document.us-drivers-license USDriversLicense
{
"issuing_state": "MT",
"license_number": "0812319684104",
"full_name": "BRENDA LYNN",
"first_name": "BRENDA",
"middle_name": null,
"last_name": "LYNN",
"address": {
"street": "123 MAIN STREET",
"city": "HELENA",
"state": "MT",
"zip_code": "59601"
},
"date_of_birth": "1968-08-04",
"gender": "F",
"height": "5'-06\"",
"weight": 150.0,
"eye_color": "BRO",
"hair_color": null,
"issue_date": "2015-02-15",
"expiration_date": "2023-08-04",
"license_class": "D",
"donor": null,
"veteran": null
}
document.utility-bill UtilityBill
{
"account_number": "1234567890-1",
"date_mailed": "2019-09-07",
"service_for": "SPARKY JOULE",
"service_address": {
"street": "12345 ENERGY CT",
"city": null,
"state": null,
"zip_code": null
},
"billing_period_start": null,
"billing_period_end": null,
"date_due": "2019-09-28",
"amount_due": 88.14,
"previous_balance": 0.0,
"payment_received": -91.57,
"current_charges": 88.14,
"breakdown_of_charges": [
{
"description": "Current PG&E Electric Delivery Charges",
"amount": 55.66,
"usage": null,
"rate": null
},
{
"description": "Silicon Valley Clean Energy Electric Generation Charges",
"amount": 32.48,
"usage": null,
"rate": null
}
],
"payment_options": [
"www.pge.com/waystopay"
],
"contact_information": {
"phone": "1-800-743-5000",
"website": "www.pge.com/MyEnergy"
}
}
document.w2-form W2Form
{
"control_number": "GN1851",
"ein": "63-0065650",
"ssn": "554-03-0876",
"employee_name": "Anastasia Hodges",
"employee_address": {
"street": "200 2nd Street NE",
"city": "Waseca",
"state": "MN",
"zip_code": "56093"
},
"employer_name": "NORTH 312",
"employer_address": {
"street": "151 N Market Street",
"city": "Wooster",
"state": "OH",
"zip_code": "44691"
},
"wages_tips_other_compensation": 23677.7,
"federal_income_tax_withheld": 2841.32,
"social_security_wages": 24410.0,
"social_security_tax_withheld": 1513.42,
"medicare_wages_and_tips": 24410.0,
"medicare_tax_withheld": 353.95,
"tax_year": 2020
}
aerospace.remote-sensing RemoteSensing
{
"description": "This satellite image shows a large port area with numerous docks, ships, and storage facilities. Adjacent to the port is a dense urban area with a grid-like street pattern. The coastline is visible with a beach area and a breakwater extending into the ocean. The image is clear with no cloud cover.",
"objects": [
"ships",
"docks",
"storage facilities",
"urban area",
"beach",
"breakwater",
"roads",
"railway",
"parking lots",
"marina"
],
"categories": [
"port",
"residential-area",
"beach",
"commercial-area",
"industrial-area",
"parking-lot",
"other"
],
"is_visible": true
}
healthcare.medical-insurance-card MedicalInsuranceCard
{
"provider_service": null,
"member_information": {
"member_name": "Member Name",
"member_id": "XY2123456789",
"group_number": "023457"
},
"pharmacy_plan": {
"rx_bin": "987654",
"rx_pcn": null,
"rx_grp": null,
"pharmacy_help_desk": null
},
"insurance_provider": {
"provider_name": "BlueCross BlueShield",
"network": "PPO"
},
"coverage": {
"office_visit": "$15",
"specialist_visit": "$15",
"urgent_care": null,
"emergency_room": "$75",
"inpatient_hospital": null
}
}
retail.ecommerce-product-caption RetailEcommerceProductCaption
{
"description": "The product is a black Kindle Paperwhite with a 6.8\" display and adjustable warm light. It is shown being held in a person's hand with a book displayed on the screen.",
"rating": 95,
"name": "Kindle Paperwhite (8 GB)",
"brand": "Amazon",
"category": "Amazon Devices / Kindle",
"price": "$139.99",
"color": "Black"
}
media.tv-news TVNews
{
"description": "A female news anchor is reporting from a studio. The background is blurred and shows multiple screens. The chyron at the bottom of the screen displays the main headline and a secondary news item.",
"chyron": "Biden criticises Netanyahu in an interview\n• Alan Bates: Post Office would do 'anything to hide Horizon failures'",
"network": "BBC NEWS",
"reporters": null
}
media.nfl-game-state NFLGameState
{
"description": null,
"teams": [
{
"name": "GB",
"score": 0
},
{
"name": "AZ",
"score": 7
}
],
"status": "in_progress",
"quarter": 2,
"clock_time": "12:12",
"possession_team": null,
"down": "2nd",
"distance": 10,
"yard_line": null,
"network": "NBC",
"is_shown": true
}
media.nba-game-state NBAGameState
{
"description": "The Miami Heat are playing against the San Antonio Spurs in Game 1 of the NBA Finals.",
"teams": [
{
"name": "MIA",
"score": 7
},
{
"name": "SA",
"score": 6
}
],
"status": "in_progress",
"quarter": 1,
"clock_time": "9:09",
"shot_clock": 11,
"network": "ESPN",
"is_shown": true
}
60 | -------------------------------------------------------------------------------- /vlmrun/hub/registry.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import importlib 3 | import json 4 | from functools import cached_property 5 | from pathlib import Path 6 | from typing import Dict, List, Literal, Optional, Tuple, Type, Union 7 | 8 | from loguru import logger 9 | from pydantic import BaseModel, Field, model_validator 10 | from pydantic_yaml import parse_yaml_raw_as 11 | 12 | 13 | class Registry: 14 | """A singleton registry for schemas. 15 | 16 | Examples: 17 | >>> from vlmrun.hub.registry import registry 18 | >>> schema = registry["document.invoice"] 19 | >>> registry.list_schemas() 20 | ['document.invoice', 'document.receipt', ...] 21 | """ 22 | 23 | _instance = None 24 | 25 | def __new__(cls): 26 | if cls._instance is None: 27 | cls._instance = super().__new__(cls) 28 | cls._instance._schemas = {} 29 | cls._instance._initialized = False 30 | cls._instance._schema_metadata = {} # Store metadata when registering schemas 31 | return cls._instance 32 | 33 | @property 34 | def schemas(self) -> Dict[str, Type[BaseModel]]: 35 | """Lazily load schemas when first accessed.""" 36 | if not self._initialized: 37 | self.load_schemas() 38 | return self._schemas 39 | 40 | def _extract_metadata(self, schema) -> dict: 41 | return { 42 | "description": schema.description, 43 | "supported_inputs": schema.metadata.supported_inputs if schema.metadata else None, 44 | "tags": schema.metadata.tags if schema.metadata else None, 45 | "sample_data": ([schema.sample_data] if isinstance(schema.sample_data, str) else schema.sample_data), 46 | } 47 | 48 | def _load_catalog(self, path: Path) -> None: 49 | catalog = SchemaCatalogYaml.from_yaml(path) 50 | for schema in catalog.schemas: 51 | metadata = self._extract_metadata(schema) 52 | self.register(schema.domain, schema.schema_class, metadata) 53 | logger.debug(f"Loaded schemas from {path}") 54 | 55 | def register(self, name: str, schema: Type[BaseModel], metadata: Optional[dict] = None) -> None: 56 | """Register a schema with the registry.""" 57 | if not issubclass(schema, BaseModel): 58 | raise ValueError(f"Schema {name} is not a subclass of BaseModel, type={type(schema)}") 59 | self._schemas[name] = schema 60 | if metadata: 61 | self._schema_metadata[name] = metadata 62 | 63 | def load_schemas(self, catalog_paths: Optional[Tuple[Union[str, Path]]] = None) -> None: 64 | from vlmrun.hub.constants import VLMRUN_HUB_CATALOG_PATH, VLMRUN_HUB_PATH 65 | 66 | if not self._initialized: 67 | try: 68 | # Load default catalog 69 | self._load_catalog(VLMRUN_HUB_CATALOG_PATH) 70 | 71 | # Load contrib catalog if exists 72 | contrib_path = VLMRUN_HUB_PATH / "schemas/contrib/catalog.yaml" 73 | if contrib_path.exists(): 74 | try: 75 | self._load_catalog(contrib_path) 76 | except Exception as e: 77 | logger.error(f"Failed to load contrib schemas: {e}") 78 | 79 | self._initialized = True 80 | except Exception as e: 81 | logger.error(f"Failed to load default schemas: {e}") 82 | raise 83 | 84 | # Load additional catalogs if provided 85 | if catalog_paths is not None: 86 | for path in catalog_paths: 87 | path = Path(path) 88 | if not path.exists(): 89 | raise FileNotFoundError(f"Catalog file not found: {path}") 90 | self._load_catalog(path) 91 | 92 | def get_domain_info(self, domain: str) -> dict: 93 | """Get metadata for a domain.""" 94 | return self._schema_metadata.get(domain, {}) 95 | 96 | def list_schemas(self) -> List[str]: 97 | return sorted(self.schemas.keys()) 98 | 99 | def __contains__(self, name: str) -> bool: 100 | return name in self.schemas 101 | 102 | def __getitem__(self, name: str) -> Type[BaseModel]: 103 | try: 104 | return self.schemas[name] 105 | except KeyError: 106 | raise KeyError(f"Schema '{name}' not found. Available schemas: {', '.join(self.list_schemas())}") 107 | 108 | def __repr__(self) -> str: 109 | repr_str = f"Registry [schemas={len(self.schemas)}]" 110 | for name, schema in sorted(self.schemas.items()): 111 | repr_str += f"\n {name} :: {schema.__name__}" 112 | return repr_str 113 | 114 | 115 | registry = Registry() 116 | 117 | 118 | class SchemaCatalogMetadata(BaseModel): 119 | """Represents the metadata for a schema in the catalog.""" 120 | 121 | supported_inputs: Optional[List[Literal["image", "audio", "video", "document"]]] = Field( 122 | None, description="List of supported input types" 123 | ) 124 | tags: Optional[List[str]] = Field(None, description="List of tags") 125 | 126 | 127 | class SchemaCatalogItem(BaseModel): 128 | """Represents a single schema entry in the catalog.""" 129 | 130 | domain: str = Field(..., description="Domain identifier for the schema") 131 | schema_path: str = Field(..., alias="schema", description="Fully qualified path to the schema class") 132 | prompt: str = Field(..., description="Task-specific prompt for the schema") 133 | description: Optional[str] = Field(None, description="Detailed description of the schema's purpose") 134 | sample_data: Optional[Union[str, List[str]]] = Field(None, description="URL to sample data for testing") 135 | version: Optional[str] = Field(None, description="Optional schema version in semver format") 136 | metadata: Optional[SchemaCatalogMetadata] = Field(None, description="Additional metadata including tags") 137 | 138 | @model_validator(mode="after") 139 | def validate_supported_inputs(self): 140 | if self.metadata and self.metadata.supported_inputs: 141 | assert all( 142 | input in ["image", "audio", "video", "document"] for input in self.metadata.supported_inputs 143 | ), "Supported inputs must be valid" 144 | return self 145 | 146 | @model_validator(mode="after") 147 | def validate_domain(self): 148 | assert "." in self.domain, "Domain must be in format: category.name" 149 | category, name = self.domain.split(".", 1) 150 | assert category and name, "Both category and name must be non-empty" 151 | return self 152 | 153 | @property 154 | def module_name(self) -> str: 155 | return self.schema_path.rsplit(".", 1)[0] 156 | 157 | @property 158 | def class_name(self) -> str: 159 | return self.schema_path.rsplit(".", 1)[1] 160 | 161 | @cached_property 162 | def schema_class(self) -> type[BaseModel]: 163 | try: 164 | module = importlib.import_module(self.module_name) 165 | schema_class = getattr(module, self.class_name) 166 | except (ImportError, AttributeError) as e: 167 | raise ValueError(f"Unable to import {self.schema_path}: {e}") 168 | return schema_class 169 | 170 | @cached_property 171 | def schema_hash(self) -> str: 172 | """Compute a hash of the schema JSON.""" 173 | schema_json: dict = self.schema_class.model_json_schema() 174 | schema_hash: str = hashlib.sha256(json.dumps(schema_json).encode()).hexdigest()[:8] 175 | return schema_hash 176 | 177 | 178 | class SchemaCatalogYaml(BaseModel): 179 | """Root model for the catalog.yaml file.""" 180 | 181 | apiVersion: str = Field(..., description="API version of the catalog format") 182 | catalogs: Union[List[str], None] = Field(None, description="List of catalog files to include as references") 183 | schemas: List[SchemaCatalogItem] = Field(default_factory=list, description="List of schema entries") 184 | 185 | @classmethod 186 | def from_yaml(cls, yaml_path: Path) -> "SchemaCatalogYaml": 187 | if not yaml_path.exists(): 188 | raise FileNotFoundError(f"Catalog file not found: {yaml_path}") 189 | catalog: SchemaCatalogYaml = parse_yaml_raw_as(cls, yaml_path.read_text()) 190 | catalog = catalog.load_catalogs(yaml_path.parent) 191 | return catalog 192 | 193 | def load_catalogs(self, subdirectory: Union[str, Path]) -> "SchemaCatalogYaml": 194 | """Unroll the catalog references into a single list of schemas.""" 195 | if self.catalogs: 196 | for catalog in self.catalogs: 197 | logger.debug(f"Loading sub-catalog [catalog={catalog}, dir={subdirectory}]") 198 | catalog_path = Path(subdirectory) / catalog 199 | assert catalog_path.exists(), f"Catalog {catalog} not found in schemas" 200 | catalog_yaml = SchemaCatalogYaml.from_yaml(catalog_path) 201 | n_schemas = len(catalog_yaml.schemas) 202 | self.schemas.extend(catalog_yaml.schemas) 203 | logger.debug(f"Loaded sub-catalog [catalog={catalog}, n_schemas={n_schemas}]") 204 | logger.debug(f"Loaded full catalog [n_catalogs={len(self.catalogs)}, n_schemas={len(self.schemas)}]") 205 | return self 206 | -------------------------------------------------------------------------------- /tests/benchmarks/2025-01-10-gpt-4o-mini-2024-07-18-instructor-results.md: -------------------------------------------------------------------------------- 1 | ## Benchmark Results (model=gpt-4o-mini-2024-07-18, date=2025-01-10) 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
Domain Response Model Sample Response JSON
document.invoice Invoice
document.receipt Receipt
{
"receipt_id": null,
"transaction_date": "2021-01-26T22:36:22",
"merchant_name": "Walmart",
"merchant_address": {
"street": "8060 W TROPICAL PKWY",
"city": "LAS VEGAS",
"state": "NV",
"postal_code": "89149",
"country": null
},
"merchant_phone": null,
"cashier_name": "SARAH",
"register_number": "35",
"customer_name": null,
"customer_id": null,
"items": [
{
"description": "BOYS CREW",
"quantity": 1.0,
"unit_price": 9.48,
"total_price": 9.48
},
{
"description": "BOYS SOCKS",
"quantity": 1.0,
"unit_price": 6.97,
"total_price": 6.97
},
{
"description": "BOXER BRIEF",
"quantity": 1.0,
"unit_price": 10.98,
"total_price": 10.98
}
],
"subtotal": 27.43,
"tax": 2.3,
"total": 29.73,
"currency": "USD",
"payment_method": {
"type": "Debit",
"card_last_4": "****",
"card_type": null
},
"discount_amount": null,
"discount_description": null,
"tip_amount": null,
"return_policy": null,
"barcode": null,
"additional_charges": null,
"notes": null,
"others": null
}
document.resume Resume
document.us-drivers-license USDriversLicense
{
"issuing_state": "MT",
"license_number": "0812319684104",
"full_name": "Brenda Lynn Sample",
"first_name": "Brenda",
"middle_name": "Lynn",
"last_name": "Sample",
"address": {
"street": "123 MAIN STREET",
"city": "HELENA",
"state": "MT",
"zip_code": "59601"
},
"date_of_birth": "1968-08-04",
"gender": "F",
"height": "5'06\"",
"weight": 150.0,
"eye_color": "BRO",
"hair_color": null,
"issue_date": "2015-02-15",
"expiration_date": "2023-08-04",
"license_class": "D",
"donor": null,
"veteran": null
}
document.utility-bill UtilityBill
{
"account_number": "1234567890-1",
"date_mailed": "2019-09-07",
"service_for": "SPARKY JOULE",
"service_address": {
"street": "12345 ENERGY CT",
"city": null,
"state": null,
"zip_code": null
},
"billing_period_start": null,
"billing_period_end": null,
"date_due": "2019-09-28",
"amount_due": 88.14,
"previous_balance": 0.0,
"payment_received": 91.57,
"current_charges": 88.14,
"breakdown_of_charges": [
{
"description": "Current PG&E Electric Delivery Charges",
"amount": 55.66,
"usage": null,
"rate": null
},
{
"description": "Silicon Valley Clean Energy Electric Generation Charges",
"amount": 32.48,
"usage": null,
"rate": null
}
],
"payment_options": [
"www.pge.com/waystopay"
],
"contact_information": {
"phone": "1-800-743-5000",
"website": "www.pge.com/MyEnergy"
}
}
document.w2-form W2Form
{
"control_number": "GNI851",
"ein": "63-0065650",
"ssn": "554-03-0876",
"employee_name": "Anastasia Hodges",
"employee_address": {
"street": "200 2nd Street NE",
"city": "Waseca",
"state": "MN",
"zip_code": "56093"
},
"employer_name": "NORTH 312",
"employer_address": {
"street": "151 N Market Street",
"city": "Wooster",
"state": "OH",
"zip_code": "44691"
},
"wages_tips_other_compensation": 23677.7,
"federal_income_tax_withheld": 2841.32,
"social_security_wages": 24410.0,
"social_security_tax_withheld": 1513.42,
"medicare_wages_and_tips": 24410.0,
"medicare_tax_withheld": 353.95,
"tax_year": 2020
}
aerospace.remote-sensing RemoteSensing
{
"description": "The satellite image captures a coastal urban area adjacent to a busy port. The scene includes residential neighborhoods, a marina, and extensive shipping facilities with numerous cargo containers visible. The waterway is active with vessels, indicating significant maritime activity.",
"objects": [
"residential buildings",
"marina",
"cargo containers",
"shipping docks",
"waterway",
"roads",
"beach"
],
"categories": [
"commercial-area",
"port",
"residential-area",
"water-treatment",
"beach"
],
"is_visible": true
}
healthcare.medical-insurance-card MedicalInsuranceCard
{
"provider_service": {
"provider_service_number": null,
"precertification_number": null
},
"member_information": {
"member_name": "Member Name",
"member_id": "XY2 123456789",
"group_number": "023457"
},
"pharmacy_plan": {
"rx_bin": "987654",
"rx_pcn": null,
"rx_grp": "HIOPT",
"pharmacy_help_desk": null
},
"insurance_provider": {
"provider_name": "BlueCross BlueShield",
"network": "PPO"
},
"coverage": {
"office_visit": "$15",
"specialist_visit": null,
"urgent_care": null,
"emergency_room": "$75",
"inpatient_hospital": null
}
}
retail.ecommerce-product-caption RetailEcommerceProductCaption
{
"description": "The Kindle Paperwhite features a 6.8\" display and adjustable warm light for a comfortable reading experience. It is designed for easy portability and offers a sleek black finish.",
"rating": 85,
"name": "Kindle Paperwhite",
"brand": "Amazon",
"category": "Electronics / E-readers",
"price": "$139.99",
"color": "Black"
}
media.tv-news TVNews
{
"description": "A news anchor presenting a segment about President Biden criticizing Netanyahu in an interview.",
"chyron": "Biden criticises Netanyahu in an interview",
"network": "BBC News",
"reporters": null
}
media.nfl-game-state NFLGameState
{
"description": null,
"teams": [
{
"name": "GB",
"score": 0
},
{
"name": "AZ",
"score": 7
}
],
"status": "in_progress",
"quarter": 2,
"clock_time": "12:12",
"possession_team": "GB",
"down": "2nd",
"distance": 10,
"yard_line": -10,
"network": "NBC",
"is_shown": true
}
media.nba-game-state NBAGameState
{
"description": null,
"teams": [
{
"name": "MIA",
"score": 7
},
{
"name": "SA",
"score": 6
}
],
"status": "in_progress",
"quarter": 1,
"clock_time": "9:09",
"shot_clock": 11,
"network": "ESPN",
"is_shown": true
}
60 | --------------------------------------------------------------------------------