├── vlmrun
    └── hub
    │   ├── schemas
    │       ├── __init__.py
    │       ├── media
    │       │   └── tv_news.py
    │       ├── retail
    │       │   ├── product_catalog.py
    │       │   └── ecommerce_product_caption.py
    │       ├── contrib
    │       │   ├── media
    │       │   │   ├── nba_game_state.py
    │       │   │   └── nfl_game_state.py
    │       │   ├── document
    │       │   │   ├── india
    │       │   │   │   ├── pan_card.py
    │       │   │   │   └── aadhaar_card.py
    │       │   │   ├── us_passport.py
    │       │   │   ├── business_card.py
    │       │   │   ├── bank_check.py
    │       │   │   ├── request_for_proposal.py
    │       │   │   └── form_work_order.py
    │       │   ├── social
    │       │   │   └── twitter_card.py
    │       │   ├── accounting
    │       │   │   └── form_payslip.py
    │       │   ├── food
    │       │   │   └── nutrition_facts_label.py
    │       │   ├── logistics
    │       │   │   └── bill_of_lading.py
    │       │   ├── finance
    │       │   │   └── balance_sheet.py
    │       │   └── healthcare
    │       │   │   └── pathology_report.py
    │       ├── aerospace
    │       │   └── remote_sensing.py
    │       ├── healthcare
    │       │   ├── medical_insurance_card.py
    │       │   └── hipaa_release.py
    │       ├── document
    │       │   ├── us_drivers_license.py
    │       │   ├── receipt.py
    │       │   ├── bank_statement.py
    │       │   ├── resume.py
    │       │   ├── utility_bill.py
    │       │   └── invoice.py
    │       └── accounting
    │       │   └── w2_form.py
    │   ├── server
    │       ├── __init__.py
    │       ├── app.py
    │       ├── README.md
    │       └── routes.py
    │   ├── version.py
    │   ├── full-catalog.yaml
    │   ├── constants.py
    │   ├── dataset.py
    │   ├── utils.py
    │   └── registry.py
├── requirements
    ├── requirements.extra.txt
    ├── requirements.build.txt
    ├── requirements.server.txt
    ├── requirements.txt
    └── requirements.test.txt
├── MANIFEST.in
├── tests
    ├── test_imports.py
    ├── test_dataset.py
    ├── test_server.py
    ├── test_vlmrun.py
    ├── test_ollama.py
    ├── conftest.py
    ├── test_utils.py
    ├── test_catalog.py
    ├── test_openai.py
    ├── benchmarks
    │   ├── 2025-02-20-bsahane-Qwen2.5-VL-7B-Instruct-Q4_K_M_benxh-instructor-results.md
    │   ├── 2025-01-10-llama3.2-vision-11b-instructor-results.md
    │   ├── 2025-01-10-gemini-2.0-flash-exp-instructor-results.md
    │   └── 2025-01-10-gpt-4o-mini-2024-07-18-instructor-results.md
    ├── test_registry.py
    └── test_instructor.py
├── .env.template
├── docs
    ├── RELEASE.md
    ├── catalog-spec.yaml
    ├── CONTRIBUTING.md
    ├── CONTRIBUTING-SCHEMA.md
    └── SCHEMA-GUIDELINES.md
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── config.yaml
    │   └── documentation.yaml
    ├── workflows
    │   ├── ci.yml
    │   └── python-publish.yml
    └── PULL_REQUEST_TEMPLATE
    │   └── schema-request.yaml
├── .pre-commit-config.yaml
├── makefiles
    └── Makefile.admin.mk
├── scripts
    └── bump_version.py
├── Makefile
├── pyproject.toml
└── .gitignore


/vlmrun/hub/schemas/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vlmrun/hub/server/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vlmrun/hub/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.35"
2 | 


--------------------------------------------------------------------------------
/requirements/requirements.extra.txt:
--------------------------------------------------------------------------------
1 | ollama>=0.4.4
2 | 


--------------------------------------------------------------------------------
/requirements/requirements.build.txt:
--------------------------------------------------------------------------------
1 | build>=1.2.2
2 | twine>=6.0.1
3 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include vlmrun/hub/catalog.yaml
2 | include vlmrun/hub/schemas/contrib/catalog.yaml
3 | 


--------------------------------------------------------------------------------
/tests/test_imports.py:
--------------------------------------------------------------------------------
1 | def test_standalone_import():
2 |     from vlmrun import hub  # noqa: F401
3 | 


--------------------------------------------------------------------------------
/requirements/requirements.server.txt:
--------------------------------------------------------------------------------
1 | fastapi>=0.115.8
2 | httpx>=0.28.0
3 | uvicorn[standard]>=0.34.0
4 | 


--------------------------------------------------------------------------------
/vlmrun/hub/full-catalog.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | catalogs:
3 |   - ./catalog.yaml
4 |   - ./schemas/contrib/catalog.yaml
5 | 


--------------------------------------------------------------------------------
/.env.template:
--------------------------------------------------------------------------------
1 | OPENAI_BASE_URL=https://api.openai.com/v1
2 | OPENAI_API_KEY=
3 | GEMINI_API_KEY=
4 | VLMRUN_API_KEY=
5 | FIREWORKS_API_KEY=
6 | 


--------------------------------------------------------------------------------
/docs/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Release
 2 | 
 3 | 1. Checkout the main branch
 4 | 2. Create a new tag and push it
 5 | 
 6 | ```
 7 | git tag v0.0.1
 8 | git push origin v0.0.1
 9 | ```
10 | 


--------------------------------------------------------------------------------
/requirements/requirements.txt:
--------------------------------------------------------------------------------
1 | annotated-types>=0.7.0
2 | datamodel-code-generator>=0.26.1,<=0.39.0
3 | loguru>=0.7.3
4 | Pillow>=10.2.0
5 | pydantic>=2.5,<3
6 | pydantic_core>=2.23.4
7 | pydantic_yaml>=1.2.0
8 | typing_extensions>=4.12.2
9 | 


--------------------------------------------------------------------------------
/vlmrun/hub/constants.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | 
3 | VLMRUN_HUB_PATH = Path(__file__).parent
4 | VLMRUN_HUB_CATALOG_PATH = VLMRUN_HUB_PATH / "catalog.yaml"
5 | VLMRUN_HUB_CONTRIB_CATALOG_PATH = VLMRUN_HUB_PATH / "schemas" / "contrib" / "catalog.yaml"
6 | 


--------------------------------------------------------------------------------
/requirements/requirements.test.txt:
--------------------------------------------------------------------------------
 1 | fastapi>=0.115.8
 2 | httpx>=0.28.0
 3 | instructor>=1.7.0
 4 | openai>=1.58.1
 5 | pandas>=2.1.1
 6 | pre-commit>=4.0.1
 7 | pytest>=8.3.4
 8 | python-dotenv>=1.0.1
 9 | requests>=2.32.3
10 | ruff>=0.2.2
11 | tqdm>=4.67.1
12 | twine>=6.0.1
13 | vlmrun[doc]>=0.1.14
14 | 


--------------------------------------------------------------------------------
/vlmrun/hub/server/app.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | 
 3 | from vlmrun.hub.server.routes import router
 4 | from vlmrun.hub.version import __version__
 5 | 
 6 | app = FastAPI(
 7 |     title="VLM Run Hub",
 8 |     description="API server for VLM Run Hub schema registry",
 9 |     version=__version__,
10 |     docs_url="/docs",
11 | )
12 | 
13 | app.include_router(router, prefix="")
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yaml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Schema Request
4 |     about: Suggest a schema or an idea
5 |     url: https://www.github.com/vlm-run/vlmrun-hub/discussions/categories/schema-request
6 |   - name: Documentation Request
7 |     about: Suggest a documentation improvement
8 |     url: https://www.github.com/vlm-run/vlmrun-hub/discussions/categories/documentation
9 | 


--------------------------------------------------------------------------------
/tests/test_dataset.py:
--------------------------------------------------------------------------------
1 | def test_dataset_samples():
2 |     from vlmrun.hub.dataset import VLMRUN_HUB_DATASET
3 | 
4 |     for domain, sample in VLMRUN_HUB_DATASET.items():
5 |         assert sample.domain == domain, "Domain must match the sample domain"
6 |         assert sample.prompt is not None, "Sample prompt must be present"
7 |         assert sample.data is not None, "Sample data must be present"
8 |         assert sample.response_model is not None, "Sample response model must be present"
9 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/media/tv_news.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class TVNews(BaseModel):
 7 |     description: Optional[str] = Field(None, description="Description of the scene contents and visual elements")
 8 |     chyron: Optional[str] = Field(
 9 |         None, description="Text displayed in the lower third of the screen (chyron/news ticker)"
10 |     )
11 |     network: Optional[str] = Field(None, description="Name of the news network broadcasting the content")
12 |     reporters: Optional[List[str]] = Field(None, description="List of reporter names appearing in the news broadcast")
13 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/retail/product_catalog.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class ProductCatalog(BaseModel):
 7 |     description: str = Field(
 8 |         ..., description="A 2-sentence general visual description of the product embedded as an image."
 9 |     )
10 |     category: str = Field(
11 |         ..., description="One or two-word category of the product (i.e, apparel, accessories, footwear etc)."
12 |     )
13 |     season: Literal["fall", "spring", "summer", "winter"] = Field(
14 |         ..., description="The season the product is intended for."
15 |     )
16 |     gender: Literal["men", "women", "boys", "girls"] = Field(
17 |         ..., description="Gender or audience the product is intended for."
18 |     )
19 | 


--------------------------------------------------------------------------------
/vlmrun/hub/server/README.md:
--------------------------------------------------------------------------------
 1 | ### 🌐 Server Usage
 2 | 
 3 | The hub includes a FastAPI server for easy access to schemas:
 4 | 
 5 | ```bash
 6 | # Install with server dependencies
 7 | pip install "vlmrun-hub[server]"
 8 | 
 9 | # Run the server
10 | uvicorn vlmrun.hub.server.app:app --reload
11 | ```
12 | 
13 | Access the API:
14 | ```python
15 | import requests
16 | 
17 | # Get hub info
18 | response = requests.get("http://localhost:8000/info")
19 | print(response.json())
20 | 
21 | # List all domains
22 | response = requests.get("http://localhost:8000/domains")
23 | print(response.json())
24 | 
25 | # Get schema for specific domain
26 | response = requests.post(
27 |     "http://localhost:8000/schema",
28 |     json={"domain": "document.invoice"}
29 | )
30 | print(response.json())
31 | ```
32 | 
33 | API documentation is available at http://localhost:8000/docs
34 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/retail/ecommerce_product_caption.py:
--------------------------------------------------------------------------------
 1 | """Schema for retail product captions."""
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class RetailEcommerceProductCaption(BaseModel):
 7 |     description: str = Field(
 8 |         ...,
 9 |         description="A 2-sentence general visual description of the product embedded as an image.",
10 |     )
11 |     rating: int = Field(
12 |         ...,
13 |         description="The visual rating or appeal of the product between 0 and 100.",
14 |         ge=0,
15 |         le=100,
16 |     )
17 |     name: str = Field(..., description="The name of the product.")
18 |     brand: str = Field(..., description="The brand of the product.")
19 |     category: str = Field(..., description="The category of the product, e.g. 'Electronics / E-readers'.")
20 |     price: str = Field(..., description="The price of the product.")
21 |     color: str = Field(..., description="The color of the product.")
22 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |   autofix_prs: true
 3 |   autoupdate_commit_msg: '[pre-commit.ci] pre-commit autofix suggestions'
 4 | 
 5 | repos:
 6 |   - repo: https://github.com/charliermarsh/ruff-pre-commit
 7 |     rev: 'v0.5.5'
 8 |     hooks:
 9 |     - id: ruff
10 |       args: ['--fix', '--exit-non-zero-on-fix']
11 | 
12 |   - repo: https://github.com/psf/black
13 |     rev: 24.3.0
14 |     hooks:
15 |     - id: black
16 |       exclude: notebooks|^tests/test_data$
17 |       args: ['--config=./pyproject.toml']
18 | 
19 |   - repo: https://github.com/pre-commit/pre-commit-hooks
20 |     rev: v3.1.0
21 |     hooks:
22 |     - id: check-ast
23 |     - id: check-docstring-first
24 |     - id: check-json
25 |     - id: check-merge-conflict
26 |     - id: debug-statements
27 |     - id: detect-private-key
28 |     - id: end-of-file-fixer
29 |     - id: pretty-format-json
30 |     - id: trailing-whitespace
31 |     - id: check-added-large-files
32 |       args: ['--maxkb=100']
33 |     - id: requirements-txt-fixer
34 | 


--------------------------------------------------------------------------------
/makefiles/Makefile.admin.mk:
--------------------------------------------------------------------------------
 1 | VLMRUN_HUB_VERSION := $(shell python -c 'from vlmrun.hub.version import __version__; print(__version__.replace("-", "."))')
 2 | PYPI_USERNAME :=
 3 | PYPI_PASSWORD :=
 4 | 
 5 | WHL_GREP_PATTERN := .*\$(VLMRUN_HUB_VERSION).*\.whl
 6 | 
 7 | create-pypi-release-test:
 8 | 	@echo "looking for vlmrun-hub whl file..."
 9 | 	@for file in dist/*; do \
10 | 		echo "examining file: $$file"; \
11 | 		if [ -f "$$file" ] && echo "$$file" | grep -qE "$(WHL_GREP_PATTERN)"; then \
12 | 			echo "Uploading: $$file"; \
13 | 			twine upload --repository testpypi "$$file"; \
14 | 		fi; \
15 | 	done
16 | 	@echo "Upload completed"
17 | 
18 | 
19 | create-pypi-release:
20 | 	@echo "looking for vlmrun-hub whl file..."
21 | 	@for file in dist/*; do \
22 | 		echo "examining file: $$file"; \
23 | 		if [ -f "$$file" ] && echo "$$file" | grep -qE "$(WHL_GREP_PATTERN)"; then \
24 | 			echo "Uploading: $$file"; \
25 | 			twine upload "$$file"; \
26 | 		fi; \
27 | 	done
28 | 	@echo "Upload completed"
29 | 
30 | create-tag:
31 | 	git tag -a ${VLMRUN_HUB_VERSION} -m "Release ${VLMRUN_HUB_VERSION}"
32 | 	git push origin ${VLMRUN_HUB_VERSION}
33 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/media/nba_game_state.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class TeamInfo(BaseModel):
 7 |     name: Optional[str] = Field(None, description="Name of the team")
 8 |     score: Optional[int] = Field(None, description="Current score of the team")
 9 | 
10 | 
11 | class NBAGameState(BaseModel):
12 |     description: Optional[str] = Field(None, description="Text description of the current game state")
13 |     teams: Optional[List[TeamInfo]] = Field(None, description="List of teams playing in the game")
14 |     status: Optional[str] = Field(None, description="Current status of the game, e.g., 'in_progress', 'final'")
15 |     quarter: Optional[int] = Field(None, description="Current quarter of the game (1-4, or 5+ for overtime)")
16 |     clock_time: Optional[str] = Field(None, description="Time remaining in the current quarter, e.g., '9:09'")
17 |     shot_clock: Optional[int] = Field(None, description="Shot clock time remaining")
18 |     network: Optional[str] = Field(None, description="TV network broadcasting the game")
19 |     is_shown: Optional[bool] = Field(None, description="Whether the game is currently being shown")
20 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/document/india/pan_card.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class PANCard(BaseModel):
 7 |     """PAN (Permanent Account Number) Card schema for extracting information from India's tax identity document."""
 8 | 
 9 |     # Core PAN information
10 |     pan_number: str = Field(..., description="10-character alphanumeric PAN (Permanent Account Number)")
11 |     name: str = Field(..., description="Full name of the PAN card holder")
12 |     father_name: Optional[str] = Field(None, description="Father's name as printed on the PAN card")
13 |     date_of_birth: Optional[str] = Field(None, description="Date of birth of the PAN card holder")
14 | 
15 |     # Security features and visual elements
16 |     has_photo: Optional[bool] = Field(None, description="Whether the card has a photo of the holder")
17 |     has_signature: Optional[bool] = Field(None, description="Whether the card has a signature")
18 |     has_income_tax_logo: Optional[bool] = Field(None, description="Whether the Income Tax Department logo is visible")
19 |     has_govt_of_india_text: Optional[bool] = Field(
20 |         None, description="Whether 'GOVT. OF INDIA' or similar text is visible"
21 |     )
22 | 
23 |     # Additional information
24 |     languages: List[str] = Field(
25 |         default_factory=list,
26 |         description="Languages in which the card information is printed (e.g., Hindi, English, etc.)",
27 |     )
28 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/media/nfl_game_state.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class TeamInfo(BaseModel):
 7 |     name: Optional[str] = Field(None, description="Name of the team")
 8 |     score: Optional[int] = Field(None, description="Current score of the team")
 9 | 
10 | 
11 | class NFLGameState(BaseModel):
12 |     description: Optional[str] = Field(None, description="Text description of the current game state")
13 |     teams: Optional[List[TeamInfo]] = Field(None, description="List of teams playing in the game")
14 |     status: Optional[str] = Field(None, description="Current status of the game, e.g., 'in_progress', 'final'")
15 |     quarter: Optional[int] = Field(None, description="Current quarter of the game (1-4, or 5 for overtime)")
16 |     clock_time: Optional[str] = Field(None, description="Time remaining in the current quarter, e.g., '14:56'")
17 |     possession_team: Optional[str] = Field(None, description="Name of the team currently in possession")
18 |     down: Optional[str] = Field(None, description="Current down (1st, 2nd, 3rd, 4th)")
19 |     distance: Optional[int] = Field(None, description="Yards needed for first down")
20 |     yard_line: Optional[int] = Field(None, description="Current yard line position")
21 |     network: Optional[str] = Field(None, description="TV network broadcasting the game")
22 |     is_shown: Optional[bool] = Field(None, description="Whether the game is currently being shown")
23 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/document/us_passport.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | from typing import Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class USPassport(BaseModel):
 8 |     """US Passport schema for extracting information from passport documents."""
 9 | 
10 |     family_name: str = Field(..., description="Family name (surname) of the passport holder")
11 |     given_names: str = Field(..., description="Given names (first and middle names) of the passport holder")
12 |     document_id: str = Field(..., description="Passport document identification number")
13 |     expiration_date: date = Field(..., description="Expiration date of the passport")
14 |     date_of_birth: date = Field(..., description="Date of birth of the passport holder")
15 |     issue_date: date = Field(..., description="Issue date of the passport")
16 |     mrz_code: str = Field(..., description="Machine Readable Zone (MRZ) code from the passport")
17 | 
18 |     # Additional optional fields that might be present
19 |     nationality: Optional[str] = Field(None, description="Nationality of the passport holder")
20 |     place_of_birth: Optional[str] = Field(None, description="Place of birth of the passport holder")
21 |     sex: Optional[str] = Field(None, description="Sex of the passport holder (M/F)")
22 |     authority: Optional[str] = Field(None, description="Issuing authority of the passport")
23 |     place_of_issue: Optional[str] = Field(None, description="Place where the passport was issued")
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.yaml:
--------------------------------------------------------------------------------
 1 | name: Documentation
 2 | description: Report an issue or suggest improvements for VLM Run Hub documentation
 3 | title: "DOC: <Please provide a clear title after the 'DOC: ' prefix>"
 4 | labels: [documentation]
 5 | 
 6 | body:
 7 | - type: markdown
 8 |   attributes:
 9 |     value: >
10 |       Thank you for helping improve the VLM Run Hub documentation!
11 | 
12 |       This template is specifically for:
13 |       - Reporting documentation errors or unclear content
14 |       - Suggesting new documentation topics
15 |       - Improving existing documentation
16 | 
17 |       **Helpful Resources:**
18 |       - [Contribution Guide](./docs/CONTRIBUTING.md)
19 | 
20 | - type: dropdown
21 |   id: doc_type
22 |   attributes:
23 |     label: Documentation Type
24 |     description: What type of documentation issue are you reporting?
25 |     options:
26 |       - General Improvements / Fixes
27 |       - Usage Guides / Examples
28 |       - Other
29 |   validations:
30 |     required: true
31 | 
32 | - type: textarea
33 |   attributes:
34 |     label: Issue Description and Proposed Changes
35 |     description: >
36 |       Please describe the issue with the current documentation and your suggested changes. Include:
37 |     placeholder: |
38 |       E.g.:
39 | 
40 |       In the CustomerInvoice schema documentation:
41 |       - The `status` field description is unclear about valid enum values
42 |       - Missing example for nested Item model
43 | 
44 |   validations:
45 |     required: true
46 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Main CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main]
 7 |   workflow_call:
 8 | 
 9 | jobs:
10 |   test:
11 |     name: Test Python ${{ matrix.python-version }}
12 |     runs-on: ubuntu-latest
13 |     timeout-minutes: 20
14 |     environment: dev
15 |     strategy:
16 |       matrix:
17 |         python-version: ['3.9', '3.10', '3.11', '3.12']
18 |       fail-fast: false
19 |     defaults:
20 |       run:
21 |         shell: bash -el {0}
22 |     env:
23 |       CACHE_NUMBER: 0
24 | 
25 |     steps:
26 |       - name: Checkout git repo
27 |         uses: actions/checkout@v3
28 | 
29 |       - uses: actions/setup-python@v5
30 |         with:
31 |           python-version: ${{ matrix.python-version }}
32 | 
33 |       - uses: actions/cache@v4
34 |         with:
35 |           path: ~/.cache/pip
36 |           key: pip-${{ hashFiles('requirements/requirements*.txt') }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('MANIFEST.in') }}-${{ env.CACHE_NUMBER }}
37 |           restore-keys: |
38 |             pip-
39 | 
40 |       - name: Install dependencies
41 |         run: |
42 |           which python
43 |           python -m pip install --upgrade pip
44 |           pip install -e '.[test]'
45 |         if: steps.cache.outputs.cache-hit != 'true'
46 | 
47 |       - name: Quality Check
48 |         uses: pre-commit/action@v3.0.1
49 |         continue-on-error: true
50 | 
51 |       - name: Run tests
52 |         env:
53 |           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
54 |         run: |
55 |           make test
56 | 


--------------------------------------------------------------------------------
/tests/test_server.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from fastapi.testclient import TestClient
 3 | 
 4 | from vlmrun.hub.server.app import app
 5 | from vlmrun.hub.version import __version__
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def client():
10 |     return TestClient(app)
11 | 
12 | 
13 | def test_info(client):
14 |     response = client.get("/info")
15 |     assert response.status_code == 200
16 |     data = response.json()
17 |     assert data["version"] == __version__
18 | 
19 | 
20 | def test_list_domains(client):
21 |     response = client.get("/domains")
22 |     assert response.status_code == 200
23 |     domains = response.json()
24 |     assert isinstance(domains, list)
25 |     assert len(domains) > 0
26 |     assert all(isinstance(d["domain"], str) for d in domains)
27 | 
28 | 
29 | def test_has_domain(client):
30 |     response = client.get("/domains/document.invoice")
31 |     assert response.status_code == 200
32 |     assert response.json() is True
33 | 
34 |     response = client.get("/domains/invalid.domain")
35 |     assert response.status_code == 200
36 |     assert response.json() is False
37 | 
38 | 
39 | def test_get_schema_valid_domain(client):
40 |     response = client.post("/schema", json={"domain": "document.invoice"})
41 |     assert response.status_code == 200
42 |     data = response.json()
43 |     assert "json_schema" in data
44 |     assert "schema_version" in data
45 |     assert "schema_hash" in data
46 | 
47 | 
48 | def test_get_schema_invalid_domain(client):
49 |     response = client.post("/schema", json={"domain": "invalid.domain"})
50 |     assert response.status_code == 404
51 | 


--------------------------------------------------------------------------------
/scripts/bump_version.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from pathlib import Path
 3 | 
 4 | 
 5 | def bump_version(version_file: str, bump_type: str = "patch") -> str:
 6 |     """
 7 |     Bump the version number in the specified file.
 8 | 
 9 |     Args:
10 |         version_file: Path to the version file
11 |         bump_type: One of 'major', 'minor', or 'patch'
12 | 
13 |     Returns:
14 |         The new version string
15 |     """
16 |     version_path = Path(version_file)
17 |     content = version_path.read_text()
18 | 
19 |     # Extract current version
20 |     version_match = re.search(r'__version__ = ["\']([^"\']+)["\']', content)
21 |     if not version_match:
22 |         raise ValueError("Could not find version string")
23 | 
24 |     current_version = version_match.group(1)
25 |     major, minor, patch = map(int, current_version.split("."))
26 | 
27 |     # Bump version according to type
28 |     if bump_type == "major":
29 |         major += 1
30 |         minor = 0
31 |         patch = 0
32 |     elif bump_type == "minor":
33 |         minor += 1
34 |         patch = 0
35 |     elif bump_type == "patch":
36 |         patch += 1
37 |     else:
38 |         raise ValueError(f"Invalid bump type: {bump_type}")
39 | 
40 |     new_version = f"{major}.{minor}.{patch}"
41 | 
42 |     # Update the file
43 |     new_content = content.replace(f'__version__ = "{current_version}"', f'__version__ = "{new_version}"')
44 |     version_path.write_text(new_content)
45 | 
46 |     return new_version
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     import sys
51 | 
52 |     version_file = sys.argv[1]
53 |     bump_type = sys.argv[2] if len(sys.argv) > 2 else "patch"
54 |     new_version = bump_version(version_file, bump_type)
55 |     print(new_version)
56 | 


--------------------------------------------------------------------------------
/tests/test_vlmrun.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import pytest
 5 | import requests
 6 | from dotenv import load_dotenv
 7 | from loguru import logger
 8 | 
 9 | from vlmrun.common.image import encode_image
10 | from vlmrun.common.utils import remote_image
11 | from vlmrun.hub.schemas.document.invoice import Invoice
12 | 
13 | load_dotenv()
14 | 
15 | 
16 | VLMRUN_API_KEY = os.getenv("VLMRUN_API_KEY", None)
17 | VLMRUN_BASE_URL = os.getenv("VLMRUN_BASE_URL", None)
18 | 
19 | pytestmark = pytest.mark.skipif(not VLMRUN_API_KEY, reason="This test requires VLMRUN_API_KEY to be set")
20 | 
21 | 
22 | def test_vlmrun_invoice():
23 |     invoice_url = "https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg"
24 |     invoice_image = remote_image(invoice_url)
25 |     domain = "document.invoice"
26 | 
27 |     json_data = {
28 |         "file_id": invoice_url,
29 |         "images": [encode_image(invoice_image, format="JPEG")],
30 |         "json_schema": Invoice.model_json_schema(),
31 |         "model": "vlm-1",
32 |         "domain": domain,
33 |     }
34 | 
35 |     response = requests.post(
36 |         f"{VLMRUN_BASE_URL}/v1/image/generate",
37 |         json=json_data,
38 |         headers={"Authorization": f"Bearer {VLMRUN_API_KEY}"},
39 |     )
40 |     assert response.status_code == 201, f"Response failed: {response.text}"
41 |     json_response = response.json()
42 |     assert isinstance(json_response, dict), "Expected a dict response"
43 |     assert "response" in json_response, "Failed to fetch 'response' key"
44 |     logger.debug(f"Document: {json.dumps(json_response, indent=2)}")
45 | 
46 |     invoice_data = json_response["response"]
47 |     invoice = Invoice.model_validate_json(json.dumps(invoice_data))
48 |     logger.debug(invoice)
49 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE/schema-request.yaml:
--------------------------------------------------------------------------------
 1 | name: Schema Request
 2 | description: Request a new schema or suggest updates to an existing schema
 3 | title: "schema: New schema for `<industry>/<use_case>`"
 4 | labels: [schema]
 5 | 
 6 | body:
 7 | - type: markdown
 8 |   attributes:
 9 |     value: >
10 |       Thank you for contributing to the VLM Run Hub schema!
11 | 
12 |       This template is specifically for:
13 |       - Requesting new schemas
14 |       - Suggesting updates to existing schemas
15 | 
16 | - type: dropdown
17 |   id: schema_action
18 |   attributes:
19 |     label: Schema Action
20 |     description: What action are you requesting?
21 |     options:
22 |       - New Schema
23 |       - Update Existing Schema
24 |   validations:
25 |     required: true
26 | 
27 | - type: textarea
28 |   attributes:
29 |     label: Schema Motivation
30 |     description: >
31 |       Please provide a motivation for the new schema or the updates needed for an existing schema. Include:
32 |     placeholder: |
33 |       E.g.:
34 | 
35 |       New Schema Request:
36 |       - Name: CustomerInvoice
37 |       - Fields: id, status, amount, date
38 | 
39 |       Update Existing Schema:
40 |       - Schema: CustomerInvoice
41 |       - Changes: Add `due_date` field
42 | 
43 | - type: url
44 |   attributes:
45 |     label: Sample Image
46 |     description: Link to an example image to run the schema on
47 |     placeholder: "https://example.com/image.png"
48 |     optional: true
49 | 
50 | - type: textarea
51 |   attributes:
52 |     label: Sample JSON Output
53 |     description: The JSON output of the structured extraction from the sample image
54 |     placeholder: |
55 |       ```json
56 |       {
57 |         "id": "123",
58 |         "status": "paid",
59 |         "amount": 100,
60 |         "date": "2024-01-01"
61 |       }
62 |       ```
63 | 


--------------------------------------------------------------------------------
/docs/catalog-spec.yaml:
--------------------------------------------------------------------------------
 1 | # Catalog Specification Guidelines
 2 | 
 3 | # Define the API version for the catalog
 4 | apiVersion: v1
 5 | 
 6 | # List of schemas to be included in the catalog
 7 | schemas:
 8 |   # Each schema entry should define a unique domain, typically hyphenated
 9 |   # The format is <input-type>.<use-case> or <industry>.<use-case> for industry-specific schemas
10 |   - domain: # Specify the domain name, e.g., document.utility-bill
11 | 
12 |     # Define the schema path using the format: vlmrun.hub.schemas.<category>.<name>.<ClassName>
13 |     # Note that this is the fully qualified path to the schema class, and contains underscores instead of hyphens
14 |     schema: # e.g., vlmrun.hub.schemas.document.utility_bill.UtilityBill
15 | 
16 |     # Provide a prompt that describes the full prompt to use for the call into the VLM.
17 |     prompt: # e.g., "You are a detail-oriented invoice analyst..."
18 | 
19 |     # Give a brief description or motivation for the schema
20 |     description: # e.g., "Comprehensive invoice data extraction system..."
21 | 
22 |     # Provide a publicly accessible URL to sample data for testing and reference
23 |     sample_data: # e.g., "https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg"
24 | 
25 |     # Metadata section for additional information
26 |     metadata:
27 |       # List the types of inputs supported by the schema
28 |       # Currently, only `image`, `video`, and `document` are supported.
29 |       supported_inputs: # e.g., ["image", "document"]
30 | 
31 |       # Add relevant tags for categorization and searchability in the catalog
32 |       tags: # e.g., ["document", "finance"]
33 | 
34 | # Note: Ensure each schema entry is complete and follows the structure above.
35 | # Use consistent naming conventions and provide meaningful descriptions.
36 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | default: help;
 2 | 
 3 | help:
 4 | 	@echo "🔥 Official VLM Run Hub"
 5 | 	@echo ""
 6 | 	@echo "Usage: make <target>"
 7 | 	@echo ""
 8 | 	@echo "Targets:"
 9 | 	@echo "  clean               Remove all build, test, coverage and Python artifacts"
10 | 	@echo "  clean-build         Remove build artifacts"
11 | 	@echo "  clean-pyc           Remove Python file artifacts"
12 | 	@echo "  clean-test          Remove test and coverage artifacts"
13 | 	@echo "  lint                Format source code automatically"
14 | 	@echo "  test                Basic testing"
15 | 	@echo "  dist                Builds source and wheel package"
16 | 	@echo ""
17 | 
18 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
19 | 
20 | clean-build: ## remove build artifacts
21 | 	rm -fr build/
22 | 	rm -fr dist/
23 | 	rm -fr .eggs/
24 | 	rm -fr site/
25 | 	find . -name '*.egg-info' -exec rm -fr {} +
26 | 	find . -name '*.egg' -exec rm -f {} +
27 | 
28 | 
29 | clean-pyc: ## remove Python file artifacts
30 | 	find . -name '*.pyc' -exec rm -f {} +
31 | 	find . -name '*.pyo' -exec rm -f {} +
32 | 	find . -name '*~' -exec rm -f {} +
33 | 	find . -name '__pycache__' -exec rm -fr {} +
34 | 
35 | clean-test: ## remove test and coverage artifacts
36 | 	rm -fr .tox/
37 | 	rm -f .coverage
38 | 	rm -fr htmlcov/
39 | 	rm -fr .pytest_cache
40 | 
41 | lint: ## Format source code automatically
42 | 	pre-commit run --all-files # Uses pyproject.toml
43 | 
44 | test: ## Basic CPU testing
45 | 	pytest -sv tests
46 | 
47 | test-benchmark: ## Benchmark testing
48 | 	pytest -sv tests -m benchmark
49 | 
50 | dist: clean ## builds source and wheel package
51 | 	python -m build --sdist --wheel
52 | 	ls -lh dist
53 | 
54 | bump-version:
55 | 	python scripts/bump_version.py vlmrun/hub/version.py patch
56 | 
57 | generate-benchmark-html:
58 | 	pytest -sv tests/test_instructor.py -m benchmark
59 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/document/business_card.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from pydantic import BaseModel, Field
 3 | 
 4 | 
 5 | class Address(BaseModel):
 6 |     """Address information that may be present on a business card."""
 7 |     street: Optional[str] = Field(None, description="Street address")
 8 |     city: Optional[str] = Field(None, description="City")
 9 |     state: Optional[str] = Field(None, description="State or province")
10 |     postal_code: Optional[str] = Field(None, description="Postal or ZIP code")
11 |     country: Optional[str] = Field(None, description="Country")
12 | 
13 | 
14 | class BusinessCard(BaseModel):
15 |     """Business card schema for extracting information from business card images or documents."""
16 |     
17 |     # Personal information
18 |     name: Optional[str] = Field(None, description="Full name of the person on the business card")
19 |     job_title: Optional[str] = Field(None, description="Job title or position of the person")
20 |     
21 |     # Company information
22 |     company_name: Optional[str] = Field(None, description="Name of the company or organization")
23 |     
24 |     # Contact information
25 |     phone: Optional[str] = Field(None, description="Phone number, may include country code and formatting")
26 |     email: Optional[str] = Field(None, description="Email address")
27 |     website: Optional[str] = Field(None, description="Website URL")
28 |     address: Optional[Address] = Field(None, description="Physical address information")
29 |     
30 |     # Visual elements
31 |     has_logo: Optional[bool] = Field(None, description="Indicates if the business card has a company logo")
32 |     has_photo: Optional[bool] = Field(None, description="Indicates if the business card has a photo of the person")
33 |     
34 |     # Additional information
35 |     social_media: Optional[dict] = Field(None, description="Dictionary of social media handles or URLs")
36 |     additional_info: Optional[str] = Field(None, description="Any additional information present on the card")
37 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/aerospace/remote_sensing.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import List, Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class RemoteSensingCategory(str, Enum):
 8 |     airport = "airport"
 9 |     baseball_field = "baseball-field"
10 |     beach = "beach"
11 |     bridge = "bridge"
12 |     cemetery = "cemetery"
13 |     commercial_area = "commercial-area"
14 |     dam = "dam"
15 |     desert = "desert"
16 |     factory = "factory"
17 |     farmlands = "farmlands"
18 |     forest = "forest"
19 |     golf_course = "golf-course"
20 |     greenhouse = "greenhouse"
21 |     hospital = "hospital"
22 |     industrial_area = "industrial-area"
23 |     lake = "lake"
24 |     landfill = "landfill"
25 |     military_base = "military-base"
26 |     mining_site = "mining-site"
27 |     mountain = "mountain"
28 |     oil_field = "oil-field"
29 |     other = "other"
30 |     park = "park"
31 |     parking_lot = "parking-lot"
32 |     port = "port"
33 |     power_plant = "power-plant"
34 |     quarry = "quarry"
35 |     railway_station = "railway-station"
36 |     residential_area = "residential-area"
37 |     resort = "resort"
38 |     river = "river"
39 |     runway = "runway"
40 |     school_campus = "school-campus"
41 |     shopping_mall = "shopping-mall"
42 |     solar_farm = "solar-farm"
43 |     stadium = "stadium"
44 |     storage_tanks = "storage-tanks"
45 |     vineyard = "vineyard"
46 |     water_treatment = "water-treatment"
47 |     wetland = "wetland"
48 |     wind_farm = "wind-farm"
49 | 
50 | 
51 | class RemoteSensing(BaseModel):
52 |     description: Optional[str] = Field(None, description="2-3 sentence description of the satellite image.")
53 |     objects: Optional[List[str]] = Field(None, description="List of unique objects in the scene")
54 |     categories: Optional[List[RemoteSensingCategory]] = Field(
55 |         None, description="List of categories that pertain to the scene."
56 |     )
57 |     is_visible: Optional[bool] = Field(
58 |         None, description="Whether the land mass is visible from space, or if it is obscured by clouds."
59 |     )
60 | 


--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to VLM Run Hub
 2 | 
 3 | We welcome contributions to the VLM Run Hub! Whether you're reporting bugs, suggesting features, or contributing code, your input is valuable to us.
 4 | 
 5 | ## Reporting Bugs and Asking Questions
 6 | 
 7 | - **GitHub Issues**: Use the [GitHub Issues](https://github.com/vlm-run-hub/issues) to report bugs or request features.
 8 | - **Discussions**: Join our [Discord forum](https://discord.gg/4jgyECY4rq) for general questions and discussions.
 9 | 
10 | ## Contributing Code
11 | 
12 | 1. **Fork the Repository**: Start by forking the repository and cloning it to your local machine.
13 | 
14 | 2. **Set Up Your Development Environment**: Follow the instructions in the `README.md` to set up your development environment.
15 | 
16 | 3. **Create a Branch**: Create a new branch for your feature or bug fix.
17 | 
18 | 4. **Write Tests**: Ensure your code is well-tested. We use `pytest` for testing. Use `make test` to run all the tests.
19 | 
20 | 5. **Submit a Pull Request**: Once your changes are ready, submit a pull request. Make sure to follow the [Schema Guidelines](./SCHEMA-GUIDELINES.md) if your contribution involves Pydantic schema changes.
21 | 
22 | ## Schema Contributions
23 | 
24 | For contributions involving Pydantic schemas, please refer to the [Schema Guidelines](./SCHEMA-GUIDELINES.md) for detailed instructions on creating and submitting schemas.
25 | 
26 | ## Review Process
27 | 
28 | - **For Members**: Assign a reviewer to your pull request. Address any feedback and ensure all tests pass before merging.
29 | - **For Non-Members**: A project member will be assigned to review your pull request. Please address their feedback promptly.
30 | 
31 | ## PR Checklist
32 | 
33 | Before submitting your changes, ensure:
34 | 
35 | - Make any relevant changes to the repository.
36 | - Run `make lint` to ensure your code is linted.
37 | - Add any relevant tests under `tests/`, and run `make test` to ensure all tests pass.
38 | - If you are contributing a new schema, follow the [Contributing Schemas](./CONTRIBUTING-SCHEMA.md) instead of the general contributing guidelines.
39 | 
40 | Thank you for helping us maintain high standards for schema contributions!
41 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/healthcare/medical_insurance_card.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class ProviderService(BaseModel):
 7 |     provider_service_number: Optional[str] = Field(None, description="Provider service number.")
 8 |     precertification_number: Optional[str] = Field(None, description="Precertification number.")
 9 | 
10 | 
11 | class MemberInformation(BaseModel):
12 |     member_name: str = Field(..., description="Name of the member.")
13 |     member_id: Optional[str] = Field(None, description="Member ID.")
14 |     group_number: Optional[str] = Field(None, description="Group number.")
15 | 
16 | 
17 | class PharmacyPlan(BaseModel):
18 |     rx_bin: Optional[str] = Field(None, description="Rx bin.")
19 |     rx_pcn: Optional[str] = Field(None, description="Rx pcn.")
20 |     rx_grp: Optional[str] = Field(None, description="Rx grp.")
21 |     pharmacy_help_desk: Optional[str] = Field(None, description="Pharmacy help desk.")
22 | 
23 | 
24 | class InsuranceProvider(BaseModel):
25 |     provider_name: Optional[str] = Field(None, description="Provider name.")
26 |     network: Optional[str] = Field(None, description="Network.")
27 | 
28 | 
29 | class Coverage(BaseModel):
30 |     office_visit: Optional[str] = Field(None, description="Office visit.")
31 |     specialist_visit: Optional[str] = Field(None, description="Specialist visit.")
32 |     urgent_care: Optional[str] = Field(None, description="Urgent care.")
33 |     emergency_room: Optional[str] = Field(None, description="Emergency room.")
34 |     inpatient_hospital: Optional[str] = Field(None, description="Inpatient hospital.")
35 | 
36 | 
37 | class MedicalInsuranceCard(BaseModel):
38 |     provider_service: Optional[ProviderService] = Field(None, description="Provider service information.")
39 |     member_information: Optional[MemberInformation] = Field(None, description="Member information.")
40 |     pharmacy_plan: Optional[PharmacyPlan] = Field(None, description="Pharmacy plan information.")
41 |     insurance_provider: Optional[InsuranceProvider] = Field(None, description="Insurance provider information.")
42 |     coverage: Optional[Coverage] = Field(None, description="Coverage information.")
43 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/document/us_drivers_license.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class Gender(str, Enum):
 8 |     MALE = "M"
 9 |     FEMALE = "F"
10 |     OTHER = "X"
11 | 
12 | 
13 | class LicenseClass(str, Enum):
14 |     A = "A"
15 |     B = "B"
16 |     C = "C"
17 |     D = "D"
18 |     E = "E"
19 |     F = "F"
20 |     G = "G"
21 |     M = "M"
22 | 
23 | 
24 | class Address(BaseModel):
25 |     street: str = Field(..., description="Street address")
26 |     city: str = Field(..., description="City")
27 |     state: str = Field(..., description="Two-letter state code")
28 |     zip_code: str = Field(..., description="ZIP code")
29 | 
30 | 
31 | class USDriversLicense(BaseModel):
32 |     issuing_state: str = Field(..., description="Two-letter code of the issuing state")
33 |     license_number: str = Field(..., description="Driver's license number")
34 | 
35 |     full_name: str = Field(..., description="Full name of the license holder")
36 |     first_name: Optional[str] = Field(None, description="First name of the license holder")
37 |     middle_name: Optional[str] = Field(None, description="Middle name of the license holder")
38 |     last_name: Optional[str] = Field(None, description="Last name of the license holder")
39 | 
40 |     address: Address = Field(..., description="Address of the license holder")
41 | 
42 |     date_of_birth: str = Field(..., description="Date of birth")
43 |     gender: Gender = Field(..., description="Gender of the license holder")
44 | 
45 |     height: Optional[str] = Field(None, description="Height of the license holder in the format X'Y\" (e.g. 5'7\")")
46 |     weight: Optional[float] = Field(None, description="Weight (in lbs) of the license holder (e.g. 150.5 lbs)")
47 |     eye_color: Optional[str] = Field(None, description="Eye color code of the license holder")
48 |     hair_color: Optional[str] = Field(None, description="Hair color code of the license holder")
49 | 
50 |     issue_date: str = Field(..., description="Date the license was issued")
51 |     expiration_date: str = Field(..., description="Expiration date of the license")
52 | 
53 |     license_class: LicenseClass = Field(..., description="Class of the driver's license")
54 | 
55 |     donor: Optional[bool] = Field(None, description="Indicates if the holder is an organ donor")
56 |     veteran: Optional[bool] = Field(None, description="Indicates if the holder is a veteran")
57 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/social/twitter_card.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | from typing import List, Literal, Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class User(BaseModel):
 8 |     """Twitter user information."""
 9 | 
10 |     username: str = Field(..., description="The Twitter handle of the user.")
11 |     display_name: str = Field(..., description="The display name of the user.")
12 | 
13 | 
14 | class Media(BaseModel):
15 |     """Media content attached to a tweet."""
16 | 
17 |     description: Optional[str] = Field(default=None, description="A description of the media content linked.")
18 |     type: Optional[Literal["image", "video", "url"]] = Field(
19 |         default=None, description="The type of media (image, video, etc.)."
20 |     )
21 | 
22 | 
23 | class Tweet(BaseModel):
24 |     """Individual tweet information including content and engagement metrics."""
25 | 
26 |     content: Optional[str] = Field(default=None, description="The text content of the tweet.")
27 |     created_at: Optional[date] = Field(default=None, description="The timestamp when the tweet was created.")
28 |     user: Optional[User] = Field(default=None, description="The user who posted the tweet.")
29 |     media: Optional[List[Media]] = Field(default=None, description="List of media items attached to the tweet, if any.")
30 |     retweet_count: Optional[int] = Field(
31 |         default=None, description="The approximate number of times this tweet has been retweeted."
32 |     )
33 |     like_count: Optional[int] = Field(
34 |         default=None, description="The approximate number of likes this tweet has received (icon is a heart)."
35 |     )
36 |     reply_count: Optional[int] = Field(
37 |         default=None, description="The approximate number of replies to this tweet (icon is a reply arrow)."
38 |     )
39 |     view_count: Optional[int] = Field(
40 |         default=None,
41 |         description="The approximate number of views this tweet has received (icon is a vertical bar chart).",
42 |     )
43 |     quote_count: Optional[int] = Field(
44 |         default=None, description="The approximate number of times this tweet has been quoted."
45 |     )
46 | 
47 | 
48 | class TwitterCard(BaseModel):
49 |     """A Twitter card containing tweet information and any quoted tweets."""
50 | 
51 |     tweet: Tweet = Field(..., description="The main tweet content and metadata.")
52 |     quoted_tweet: Optional[Tweet] = Field(default=None, description="A tweet that is quoted by the main tweet, if any.")
53 | 


--------------------------------------------------------------------------------
/tests/test_ollama.py:
--------------------------------------------------------------------------------
 1 | import importlib.util
 2 | import os
 3 | from typing import Type
 4 | 
 5 | import pytest
 6 | import requests
 7 | from conftest import BenchmarkResult, create_benchmark
 8 | from dotenv import load_dotenv
 9 | from loguru import logger
10 | from pydantic import BaseModel
11 | 
12 | from vlmrun.hub.dataset import VLMRUN_HUB_DATASET
13 | 
14 | load_dotenv()
15 | 
16 | OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
17 | 
18 | 
19 | @pytest.mark.benchmark
20 | @pytest.mark.skipif(not importlib.util.find_spec("ollama"), reason="Ollama is not installed")
21 | def test_local_ollama():
22 |     from ollama import chat
23 | 
24 |     from vlmrun.common.image import encode_image
25 | 
26 |     try:
27 |         requests.get(f"{OLLAMA_BASE_URL}/api/version")
28 |     except requests.exceptions.ConnectionError:
29 |         pytest.skip("Ollama server is not running")
30 | 
31 |     results = []
32 |     model = "bsahane/Qwen2.5-VL-7B-Instruct:Q4_K_M_benxh"  # "llama3.2-vision:11b",
33 |     for sample in VLMRUN_HUB_DATASET.values():
34 |         response_model: Type[BaseModel] = sample.response_model
35 |         try:
36 |             chat_response = chat(
37 |                 model=model,
38 |                 format=response_model.model_json_schema(),  # Pass in the schema for the response
39 |                 messages=[
40 |                     {
41 |                         "role": "user",
42 |                         "content": sample.prompt,
43 |                         "images": [encode_image(img, format="JPEG").split(",")[1] for img in sample.images],
44 |                     },
45 |                 ],
46 |                 options={"temperature": 0},  # Set temperature to 0 for more deterministic output
47 |             )
48 |             response: Type[BaseModel] = response_model.model_validate_json(chat_response.message.content)
49 |         except Exception as e:
50 |             response = None
51 |             logger.error(f"Error processing sample {sample.domain}: {e}")
52 | 
53 |         results.append(
54 |             BenchmarkResult(
55 |                 domain=sample.domain,
56 |                 sample=sample.data,
57 |                 response_model=sample.response_model.__name__,
58 |                 response_json=response.model_dump_json(indent=2, exclude_none=False) if response else None,
59 |             )
60 |         )
61 |         if response:
62 |             logger.debug(response.model_dump_json(indent=2))
63 | 
64 |     create_benchmark(results, model, suffix="ollama")
65 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/document/bank_check.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | from typing import Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class Address(BaseModel):
 8 |     street: Optional[str] = Field(None, description="Street address")
 9 |     city: Optional[str] = Field(None, description="City")
10 |     state: Optional[str] = Field(None, description="State")
11 |     postal_code: Optional[str] = Field(None, description="Postal code")
12 |     country: Optional[str] = Field(None, description="Country")
13 | 
14 | 
15 | class BankInformation(BaseModel):
16 |     name: Optional[str] = Field(None, description="Name of the bank")
17 |     address: Optional[Address] = Field(None, description="Address of the bank")
18 |     routing_number: Optional[str] = Field(None, description="Bank routing number")
19 |     account_number: Optional[str] = Field(None, description="Bank account number")
20 | 
21 | 
22 | class BankCheck(BaseModel):
23 |     check_number: Optional[str] = Field(None, description="Check number, typically printed in the top right corner of the check")
24 |     payment_date: Optional[str] = Field(None, description="Date written on the check")
25 |     payee: Optional[str] = Field(None, description="Name of the person or entity to whom the check is payable (Pay to the order of)")
26 |     amount_numeric: Optional[float] = Field(None, description="Amount of the check in numeric form")
27 |     amount_text: Optional[str] = Field(None, description="Amount of the check written out in words")
28 |     bank_info: Optional[BankInformation] = Field(None, description="Information about the bank issuing the check")
29 |     drawer_name: Optional[str] = Field(None, description="Name of the person writing the check (drawer)")
30 |     drawer_address: Optional[Address] = Field(None, description="Address of the person writing the check")
31 |     drawer_signature: Optional[bool] = Field(None, description="Whether the check is signed by the drawer")
32 |     memo: Optional[str] = Field(None, description="Memo or note written on the check")
33 |     micr_line: Optional[str] = Field(None, description="MICR (Magnetic Ink Character Recognition) line at the bottom of the check containing routing and account numbers")
34 |     is_void: Optional[bool] = Field(None, description="Whether the check is marked as void")
35 |     is_post_dated: Optional[bool] = Field(None, description="Whether the check is post-dated (date is in the future)")
36 |     currency: Optional[str] = Field(None, description="Currency of the check")
37 | 


--------------------------------------------------------------------------------
/docs/CONTRIBUTING-SCHEMA.md:
--------------------------------------------------------------------------------
 1 | # Contributing Schemas to VLM Run Hub
 2 | 
 3 | Thank you for your interest in contributing schemas to the VLM Run Hub! To ensure consistency and quality, please follow these guidelines.
 4 | 
 5 | ## Guidelines for Writing a Schema
 6 | 
 7 | Please refer to the [Schema Guidelines](./SCHEMA-GUIDELINES.md) for comprehensive instructions on creating schemas. Key points include:
 8 | 
 9 | - **Use Pydantic’s BaseModel**: All schemas should inherit from Pydantic’s `BaseModel`.
10 | - **Strongly-Typed Fields**: Ensure all fields are strongly-typed with precise annotations.
11 | - **Field Metadata**: Include descriptions and constraints for each field.
12 | - **Examples**: Provide example data using `Config.schema_extra`.
13 | 
14 | ## Adding a New Schema
15 | 
16 | 1. **Define domain**: Create a new domain for your schema. Checkout the [Catalog](../vlmrun/hub/catalog.yaml) for existing domains. eg. `document.invoice`, `document.receipt`, `accounting.form-payslip`, `healthcare.pathology-report`, `real-estate.lease-agreement`. If it's the document is a form, second part of the domain should be `form-<form-name>`.
17 | 2. **Create a New Schema File**: Place your schema in `schemas/contrib/<industry>/<use_case>.py`, following the appropriate industry and use case structure defined in the [Schema Guidelines](./SCHEMA-GUIDELINES.md).
18 | 
19 | 3. **Add Tests**: Include tests for your schema in `tests/test_schemas.py`.
20 | 
21 | 4. **Add to the contrib Catalog**: Add your schema to the [`vlmrun/hub/schemas/contrib/catalog.yaml`](../vlmrun/hub/schemas/contrib/catalog.yaml) file in the `schemas` section, and test it with `pytest -sv tests/test_instructor.py --domain="<domain_name>"`.
22 | 
23 | 5. **Submit a Pull Request**: Once your schema is complete and tested, submit a pull request with the [`schema-request`](../.github/PULL_REQUEST_TEMPLATE/schema-request.yaml) template for review. You can take a look at a previous PR for reference.
24 | 
25 | ## PR Checklist
26 | 
27 | Before submitting your schema, ensure:
28 | 
29 | - Follow the [Schema Review Checklist](./SCHEMA-GUIDELINES.md#✅-schema-review-checklist)
30 | - Add the schema to the [`vlmrun/hub/schemas/contrib/catalog.yaml`](../vlmrun/hub/schemas/contrib/catalog.yaml) file, following the [Adding a New Schema to the Hub](./SCHEMA-GUIDELINES.md#👩‍💻-adding-a-new-schema-to-the-hub) section
31 | - Make sure the sample image is publicly accessible.
32 | - Test the schema with `pytest -sv tests/test_instructor.py. --domain="<domain_name>"`.
33 | 
34 | Thank you for helping us maintain high standards for schema contributions!
35 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | from datetime import datetime
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | from loguru import logger
 7 | 
 8 | 
 9 | def pytest_addoption(parser):
10 |     parser.addoption("--domain", action="store", default="document.us-drivers-license", help="Domain to test")
11 |     parser.addoption("--provider", action="store", default="openai", help="Provider to use for testing")
12 |     parser.addoption("--model", action="store", default="gpt-4o-mini-2024-07-18", help="Model to use for testing")
13 | 
14 | 
15 | @pytest.fixture
16 | def domain_arg(request):
17 |     """Domain fixture for testing"""
18 |     return request.config.getoption("--domain")
19 | 
20 | 
21 | @pytest.fixture
22 | def provider_arg(request):
23 |     """Provider fixture for testing"""
24 |     return request.config.getoption("--provider")
25 | 
26 | 
27 | @pytest.fixture
28 | def model_arg(request):
29 |     """Model fixture for testing"""
30 |     return request.config.getoption("--model")
31 | 
32 | 
33 | BenchmarkResult = namedtuple("BenchmarkResult", ["domain", "sample", "response_model", "response_json"])
34 | 
35 | 
36 | def create_benchmark(results: list[BenchmarkResult], model: str, suffix: str):
37 |     # Write the results to a pandas dataframe -> HTML
38 |     # render the data_url in a new column
39 |     BENCHMARK_DIR = Path(__file__).parent / "benchmarks"
40 |     BENCHMARK_DIR.mkdir(parents=True, exist_ok=True)
41 |     date_str = datetime.now().strftime("%Y-%m-%d")
42 |     benchmark_path = BENCHMARK_DIR / f"{date_str}-{model}-{suffix}-results.md".replace("/", "-")
43 | 
44 |     # Render the results in markdown
45 |     def parse_json(x):
46 |         return x.replace("\n", "<br>") if x is not None else "❌"
47 | 
48 |     markdown_str = f"## Benchmark Results (model={model}, date={date_str})\n\n"
49 |     markdown_str += """<table>
50 | <tr>
51 | <td style='width: 5%;'> Domain </td>
52 | <td style='width: 5%;'> Response Model </td>
53 | <td style='width: 40%;'> Sample </td>
54 | <td style='width: 50%;'> Response JSON </td>
55 | </tr>
56 |     """
57 |     for result in results:
58 |         markdown_str += "<tr>"
59 |         markdown_str += f"<td> <kbd>{result.domain}</kbd> </td>\n"
60 |         markdown_str += f"<td> <kbd>{result.response_model}</kbd> </td>\n"
61 |         markdown_str += f"<td> <img src='{result.sample}' width='100%' /> </td>\n"
62 |         markdown_str += "<td> <pre>{x}</pre> </td>\n".format(x=parse_json(result.response_json))
63 |         markdown_str += "</tr>"
64 |     markdown_str += "\n</table>"
65 | 
66 |     with benchmark_path.open("w") as f:
67 |         f.write(markdown_str)
68 |     logger.debug(f"Results written to {benchmark_path}")
69 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/document/india/aadhaar_card.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import List, Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class Address(BaseModel):
 8 |     """Address structure found on Aadhaar PVC cards."""
 9 | 
10 |     full_address: str = Field(..., description="Complete address as printed on the card")
11 |     pin_code: Optional[str] = Field(None, description="PIN code (postal code) extracted from the address")
12 |     state: Optional[str] = Field(None, description="State extracted from the address")
13 |     district: Optional[str] = Field(None, description="District extracted from the address")
14 | 
15 | 
16 | class CardSide(Enum):
17 |     FRONT = "front"
18 |     BACK = "back"
19 |     BOTH = "both"
20 |     UNKNOWN = "unknown"
21 | 
22 | 
23 | class AadhaarCard(BaseModel):
24 |     """Aadhaar PVC Card schema for extracting information from India's national identity document."""
25 | 
26 |     # Metadata about the extraction
27 |     detected_side: CardSide = Field(
28 |         ..., description="Which side of the Aadhaar card is visible in the image (front/back/both/unknown)"
29 |     )
30 | 
31 |     # Front side information
32 |     aadhaar_number: Optional[str] = Field(
33 |         None, description="12-digit unique Aadhaar identification number (may be partially masked)"
34 |     )
35 |     name: Optional[str] = Field(None, description="Full name of the Aadhaar card holder")
36 |     date_of_birth: Optional[str] = Field(None, description="Date of birth of the Aadhaar card holder")
37 |     gender: Optional[str] = Field(None, description="Gender of the Aadhaar card holder (Male/Female/Transgender)")
38 | 
39 |     # Back side information
40 |     address: Optional[Address] = Field(None, description="Address details as printed on the back of the card")
41 | 
42 |     # Security features and other elements
43 |     has_photo: Optional[bool] = Field(None, description="Whether the card has a photo of the holder (front side)")
44 |     has_qr_code: Optional[bool] = Field(None, description="Whether the card has a QR code")
45 |     has_emblem: Optional[bool] = Field(None, description="Whether the card has the Government of India emblem")
46 |     has_uidai_logo: Optional[bool] = Field(None, description="Whether the UIDAI logo is visible")
47 | 
48 |     # Additional information
49 |     issue_date: Optional[str] = Field(None, description="Date of issue if visible on the card")
50 |     print_date: Optional[str] = Field(None, description="Date when the PVC card was printed, if visible")
51 | 
52 |     # Language information
53 |     languages: List[str] = Field(
54 |         default_factory=list,
55 |         description="Languages in which the card information is printed (e.g., Hindi, English, etc.)",
56 |     )
57 | 


--------------------------------------------------------------------------------
/vlmrun/hub/server/routes.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import json
 3 | from typing import List, Optional
 4 | 
 5 | from fastapi import APIRouter, HTTPException
 6 | from pydantic import BaseModel, Field
 7 | 
 8 | from vlmrun.hub.registry import registry
 9 | from vlmrun.hub.version import __version__
10 | 
11 | router = APIRouter()
12 | 
13 | 
14 | class HubInfoResponse(BaseModel):
15 |     version: str = Field(..., description="The version of the hub")
16 | 
17 | 
18 | class HubDomainInfo(BaseModel):
19 |     domain: str = Field(..., description="The domain identifier")
20 |     description: Optional[str] = Field(None, description="Description of the schema's purpose")
21 |     supported_inputs: Optional[List[str]] = Field(None, description="List of supported input types")
22 |     tags: Optional[List[str]] = Field(None, description="List of tags for the schema")
23 |     sample_data: Optional[List[str]] = Field(None, description="URLs to sample data")
24 | 
25 | 
26 | class HubSchemaRequest(BaseModel):
27 |     domain: str = Field(..., description="The domain to get the schema for")
28 | 
29 | 
30 | class HubSchemaResponse(BaseModel):
31 |     json_schema: dict = Field(..., description="The JSON schema for the domain")
32 |     schema_version: str = Field(..., description="The specific version of the schema")
33 |     schema_hash: str = Field(..., description="The first 8 characters of the sha256 hash")
34 | 
35 | 
36 | @router.get("/info", response_model=HubInfoResponse)
37 | def info() -> HubInfoResponse:
38 |     """Get hub version information."""
39 |     return HubInfoResponse(version=__version__)
40 | 
41 | 
42 | @router.get("/domains", response_model=List[HubDomainInfo])
43 | def list_domains() -> List[HubDomainInfo]:
44 |     """List available domains."""
45 |     return [HubDomainInfo(domain=domain, **registry.get_domain_info(domain)) for domain in registry.list_schemas()]
46 | 
47 | 
48 | @router.get("/domains/{domain}", response_model=bool)
49 | def has_domain(domain: str) -> bool:
50 |     """Check if domain exists."""
51 |     return domain in registry
52 | 
53 | 
54 | @router.post("/schema", response_model=HubSchemaResponse)
55 | async def get_domain_schema(request: HubSchemaRequest) -> HubSchemaResponse:
56 |     """Get schema for domain."""
57 |     try:
58 |         schema_class = registry[request.domain]
59 |         json_schema = schema_class.model_json_schema()
60 |         schema_hash = hashlib.sha256(json.dumps(json_schema, sort_keys=True).encode()).hexdigest()[:8]
61 | 
62 |         return HubSchemaResponse(
63 |             json_schema=json_schema,
64 |             schema_version=__version__,
65 |             schema_hash=schema_hash,
66 |         )
67 |     except KeyError:
68 |         raise HTTPException(status_code=404, detail=f"Schema '{request.domain}' not found")
69 |     except Exception as e:
70 |         raise HTTPException(status_code=500, detail=str(e))
71 | 


--------------------------------------------------------------------------------
/vlmrun/hub/dataset.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from pathlib import Path
 3 | from typing import List, Type, Union
 4 | 
 5 | from PIL import Image
 6 | from pydantic import BaseModel
 7 | from typing_extensions import TypeAlias
 8 | 
 9 | from vlmrun.common.pdf import pdf_images
10 | from vlmrun.common.utils import download_artifact, remote_image
11 | from vlmrun.hub.constants import (
12 |     VLMRUN_HUB_CATALOG_PATH,
13 |     VLMRUN_HUB_CONTRIB_CATALOG_PATH,
14 | )
15 | from vlmrun.hub.registry import SchemaCatalogYaml
16 | 
17 | ImageType: TypeAlias = Union[str, Path]
18 | PDFType: TypeAlias = Union[str, Path]
19 | 
20 | 
21 | @dataclass
22 | class HubSample:
23 |     domain: str
24 |     """The domain / identifier of the sample"""
25 |     response_model: Type[BaseModel]
26 |     """The response model to use for the sample"""
27 |     prompt: str
28 |     """The prompt to use for the sample"""
29 |     data: str
30 |     """The images or image URLs associated with the sample"""
31 | 
32 |     def _handle_image(self, image: ImageType) -> Image.Image:
33 |         if isinstance(image, str):
34 |             if image.startswith("http"):
35 |                 return remote_image(image)
36 |             return Image.open(image)
37 |         else:
38 |             raise ValueError(f"Invalid image type: {type(image)}")
39 | 
40 |     def _handle_pdf(self, url: PDFType) -> List[Image.Image]:
41 |         if url.endswith(".pdf"):
42 |             if url.startswith("http"):
43 |                 path: Path = download_artifact(url, format="file")
44 |             else:
45 |                 path: Path = Path(str(url))
46 |             return [p.image for p in pdf_images(path, dpi=72)]
47 |         else:
48 |             raise ValueError(f"Invalid PDF type: {type(url)}")
49 | 
50 |     def _handle_url(self, url: str) -> List[Image.Image]:
51 |         if url.endswith(".pdf"):
52 |             return self._handle_pdf(url)
53 |         elif url.endswith(".jpg") or url.endswith(".jpeg") or url.endswith(".png") or url.endswith(".webp"):
54 |             return [self._handle_image(url)]
55 |         else:
56 |             raise ValueError(f"Invalid data extension: {url}")
57 | 
58 |     @property
59 |     def images(self) -> List[Image.Image]:
60 |         return self._handle_url(self.data)
61 | 
62 | 
63 | catalog = SchemaCatalogYaml.from_yaml(VLMRUN_HUB_CATALOG_PATH)
64 | VLMRUN_HUB_DATASET = {
65 |     schema.domain: HubSample(
66 |         domain=schema.domain,
67 |         response_model=schema.schema_class,
68 |         prompt=schema.prompt,
69 |         data=schema.sample_data,
70 |     )
71 |     for schema in catalog.schemas
72 | } | {
73 |     schema.domain: HubSample(
74 |         domain=schema.domain,
75 |         response_model=schema.schema_class,
76 |         prompt=schema.prompt,
77 |         data=schema.sample_data,
78 |     )
79 |     for schema in SchemaCatalogYaml.from_yaml(VLMRUN_HUB_CONTRIB_CATALOG_PATH).schemas
80 | }
81 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/accounting/form_payslip.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | from typing import List, Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class Address(BaseModel):
 8 |     street: Optional[str] = Field(None, description="Street address including building number")
 9 |     city: Optional[str] = Field(None, description="City name")
10 |     state: Optional[str] = Field(None, description="State or province")
11 |     zip_code: Optional[str] = Field(None, description="ZIP or postal code")
12 | 
13 | 
14 | class EmployerInfo(BaseModel):
15 |     name: Optional[str] = Field(None, description="Name of the employer or company")
16 |     address: Optional[Address] = Field(None, description="Employer's address")
17 | 
18 | 
19 | class EmployeeInfo(BaseModel):
20 |     name: Optional[str] = Field(None, description="Full name of the employee")
21 |     employee_id: Optional[str] = Field(None, description="Employee identification number")
22 |     department: Optional[str] = Field(None, description="Employee's department")
23 |     position: Optional[str] = Field(None, description="Employee's job title or position")
24 |     date_of_joining: Optional[date] = Field(None, description="Date when employee joined the company")
25 | 
26 | 
27 | class PayPeriod(BaseModel):
28 |     period: Optional[str] = Field(None, description="Pay period (e.g., 'August 2021')")
29 |     days_worked: Optional[int] = Field(None, description="Number of days worked in the period")
30 | 
31 | 
32 | class EarningsItem(BaseModel):
33 |     description: str = Field(..., description="Description of the earnings (e.g., 'Basic', 'Incentive Pay')")
34 |     amount: Optional[float] = Field(None, description="Amount for this earnings type")
35 | 
36 | 
37 | class DeductionItem(BaseModel):
38 |     description: str = Field(..., description="Description of the deduction (e.g., 'Provident Fund', 'Tax')")
39 |     amount: Optional[float] = Field(None, description="Amount deducted")
40 | 
41 | 
42 | class Payslip(BaseModel):
43 |     employer: Optional[EmployerInfo] = Field(None, description="Information about the employer")
44 |     employee: Optional[EmployeeInfo] = Field(None, description="Information about the employee")
45 |     pay_period: Optional[PayPeriod] = Field(None, description="Pay period details")
46 |     earnings: Optional[List[EarningsItem]] = Field(None, description="List of earnings items")
47 |     deductions: Optional[List[DeductionItem]] = Field(None, description="List of deduction items")
48 |     total_earnings: Optional[float] = Field(None, description="Total earnings amount")
49 |     total_deductions: Optional[float] = Field(None, description="Total deductions amount")
50 |     net_pay: Optional[float] = Field(None, description="Net pay amount after all deductions")
51 |     currency: Optional[str] = Field(None, description="Currency code (e.g., 'USD', 'EUR', 'INR')")
52 |     net_pay_in_words: Optional[str] = Field(None, description="Net pay amount expressed in words")
53 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | from datetime import date, datetime, time, timedelta
 2 | 
 3 | import pytest
 4 | from loguru import logger
 5 | from pydantic import BaseModel
 6 | 
 7 | from vlmrun.hub.registry import registry
 8 | from vlmrun.hub.utils import jsonschema_to_model, patch_response_format
 9 | 
10 | 
11 | def test_patch_response_format():
12 |     class OriginalModel(BaseModel):
13 |         date_field: date
14 |         datetime_field: datetime
15 |         time_field: time
16 |         timedelta_field: timedelta
17 | 
18 |     # Patch the model
19 |     PatchedModel = patch_response_format(OriginalModel)
20 | 
21 |     # Check that the fields have been converted to str
22 |     assert PatchedModel.model_fields["date_field"].annotation is str
23 |     assert PatchedModel.model_fields["datetime_field"].annotation is str
24 |     assert PatchedModel.model_fields["time_field"].annotation is str
25 |     assert PatchedModel.model_fields["timedelta_field"].annotation is str
26 | 
27 |     # Check that the patched model can be instantiated with string values
28 |     instance = PatchedModel(
29 |         date_field="2023-01-01",
30 |         datetime_field="2023-01-01T12:00:00",
31 |         time_field="12:00:00",
32 |         timedelta_field="1 day, 0:00:00",
33 |     )
34 | 
35 |     # Verify the instance is created successfully
36 |     assert instance.date_field == "2023-01-01"
37 |     assert instance.datetime_field == "2023-01-01T12:00:00"
38 |     assert instance.time_field == "12:00:00"
39 |     assert instance.timedelta_field == "1 day, 0:00:00"
40 | 
41 | 
42 | def test_patch_response_format_models():
43 |     from typing import Type
44 | 
45 |     from vlmrun.hub.dataset import VLMRUN_HUB_DATASET
46 | 
47 |     for sample in VLMRUN_HUB_DATASET.values():
48 |         logger.debug(f"Patching model {sample.response_model.__name__}")
49 |         response_model: Type[BaseModel] = sample.response_model
50 |         patched_model = patch_response_format(response_model)
51 |         assert issubclass(patched_model, BaseModel)
52 | 
53 | 
54 | def test_jsonschema_to_model_with_registry_schemas():
55 |     """Test that jsonschema_to_model works with all schemas in the registry."""
56 |     registry.load_schemas()
57 | 
58 |     for domain, schema_class in registry.schemas.items():
59 |         json_schema = schema_class.model_json_schema()
60 | 
61 |         try:
62 |             generated_model = jsonschema_to_model(json_schema)
63 | 
64 |             original_fields = set(schema_class.model_fields.keys())
65 |             generated_fields = set(generated_model.model_fields.keys())
66 | 
67 |             assert original_fields == generated_fields, (
68 |                 f"Field mismatch for {domain}:\n"
69 |                 f"Original fields: {original_fields}\n"
70 |                 f"Generated fields: {generated_fields}\n"
71 |                 f"Missing: {original_fields - generated_fields}\n"
72 |                 f"Extra: {generated_fields - original_fields}"
73 |             )
74 | 
75 |         except Exception as e:
76 |             pytest.fail(f"Failed to process schema for {domain}: {str(e)}")
77 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "vlmrun-hub"
 7 | description = "VLM Run Hub for various industry-specific schemas"
 8 | authors = [{name = "VLM Support", email = "support@vlm.com"}]
 9 | readme = "README.md"
10 | requires-python = ">=3.9"
11 | classifiers = [
12 |     "Development Status :: 4 - Beta",
13 |     "Programming Language :: Python",
14 |     "Environment :: Console",
15 |     "Intended Audience :: Developers",
16 |     "Intended Audience :: Education",
17 |     "Intended Audience :: Information Technology",
18 |     "Intended Audience :: Science/Research","Topic :: Software Development :: Libraries",
19 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
20 |     "Topic :: Scientific/Engineering :: Image Processing",
21 |     "License :: OSI Approved :: Apache Software License",
22 |     "Programming Language :: Python :: 3 :: Only",
23 |     "Programming Language :: Python :: 3.9",
24 |     "Programming Language :: Python :: 3.10",
25 |     "Programming Language :: Python :: 3.11",
26 | ]
27 | license = {text = "Apache-2.0"}
28 | dynamic = ["version", "dependencies", "optional-dependencies"]
29 | 
30 | [project.urls]
31 | homepage = "https://github.com/vlm-run/vlmrun-hub"
32 | repository = "https://github.com/vlm-run/vlmrun-hub"
33 | documentation = "https://docs.vlm.run/hub"
34 | 
35 | [pytest]
36 | log_cli = true
37 | log_cli_level = "INFO"
38 | 
39 | [tool.pytest.ini_options]
40 | addopts = "--show-capture=no -sv -m 'not (skip) and not (benchmark)' -p no:warnings"
41 | markers = [
42 |     "skip: Skip test",
43 |     "benchmark: Benchmark tests",
44 | ]
45 | 
46 | [tool.setuptools.dynamic]
47 | version = {attr = "vlmrun.hub.version.__version__"}
48 | dependencies = {file = ["requirements/requirements.txt"]}
49 | 
50 |     [tool.setuptools.dynamic.optional-dependencies]
51 |     test = {file = ["requirements/requirements.test.txt"]}
52 |     build = {file = ["requirements/requirements.build.txt"]}
53 |     extra = {file = ["requirements/requirements.extra.txt"]}
54 |     server = {file = ["requirements/requirements.server.txt"]}
55 |     all = {file = ["requirements/requirements.test.txt", "requirements/requirements.build.txt", "requirements/requirements.extra.txt", "requirements/requirements.server.txt"]}
56 | 
57 | [tool.setuptools.packages.find]
58 | where = ["."]
59 | include = [
60 |     "vlmrun.hub*",
61 |     "vlmrun.hub.schemas*",
62 |     "vlmrun.hub.schemas.contrib*"
63 | ]
64 | namespaces = true
65 | 
66 | 
67 | [tool.black]
68 | line-length = 120
69 | 
70 | [tool.ruff]
71 | # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
72 | lint.select = [
73 |     "E",  # pycodestyle errors
74 |     "W",  # pycodestyle warnings
75 |     "F",  # pyflakes
76 |     "I",  # isort
77 |     "C",  # flake8-comprehensions
78 |     "B",  # flake8-bugbear
79 | ]
80 | lint.ignore = [
81 |     "E501",  # E501: Line too long
82 |     "C901",  # C901: Function is too complex
83 |     "B008",  # do not perform function calls in argument defaults
84 |     "B904",  # B904: check for raise from None
85 | ]
86 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
  1 | name: Python Publish
  2 | 
  3 | on:
  4 |   push:
  5 |     paths:
  6 |       - "vlmrun/hub/version.py"
  7 |     branches:
  8 |       - main
  9 | 
 10 | env:
 11 |   CACHE_NUMBER: 1 # increase to reset cache manually
 12 | 
 13 | jobs:
 14 |   test:
 15 |     name: Test
 16 |     runs-on: ubuntu-latest
 17 |     timeout-minutes: 20
 18 |     environment: dev
 19 |     steps:
 20 |       - name: Checkout git repo
 21 |         uses: actions/checkout@v3
 22 | 
 23 |       - uses: actions/setup-python@v5
 24 |         with:
 25 |           python-version: "3.10"
 26 | 
 27 |       - uses: actions/cache@v4
 28 |         with:
 29 |           path: ~/.cache/pip
 30 |           key: pip-${{ hashFiles('requirements/requirements*.txt') }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('MANIFEST.in') }}-${{ env.CACHE_NUMBER }}
 31 |           restore-keys: |
 32 |             pip-
 33 | 
 34 |       - name: Install dependencies
 35 |         if: steps.cache.outputs.cache-hit != 'true'
 36 |         run: |
 37 |           python -m pip install --upgrade pip
 38 |           pip install -e '.[test]'
 39 | 
 40 |       - name: Quality Check
 41 |         uses: pre-commit/action@v3.0.1
 42 |         continue-on-error: true
 43 | 
 44 |       - name: Run tests
 45 |         env:
 46 |           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
 47 |           VLMRUN_BASE_URL: ${{ vars.VLMRUN_BASE_URL }}
 48 |           VLMRUN_API_KEY: ${{ secrets.VLMRUN_API_KEY }}
 49 |         run: |
 50 |           make test
 51 | 
 52 |   publish:
 53 |     name: Publish
 54 |     runs-on: ubuntu-latest
 55 |     timeout-minutes: 20
 56 |     environment: prod
 57 |     needs: test
 58 |     steps:
 59 |       - name: Checkout git repo
 60 |         uses: actions/checkout@v3
 61 |         with:
 62 |           fetch-depth: 0
 63 |           token: ${{ secrets.GH_TOKEN }}
 64 | 
 65 |       - uses: actions/setup-python@v5
 66 |         with:
 67 |           python-version: "3.10"
 68 | 
 69 |       - uses: actions/cache@v4
 70 |         with:
 71 |           path: ~/.cache/pip
 72 |           key: pip-${{ hashFiles('requirements/requirements*.txt') }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('MANIFEST.in') }}-${{ env.CACHE_NUMBER }}
 73 |           restore-keys: |
 74 |             pip-
 75 | 
 76 |       - name: Install dependencies
 77 |         if: steps.cache.outputs.cache-hit != 'true'
 78 |         run: |
 79 |           python -m pip install --upgrade pip
 80 |           pip install -e '.[test,build]'
 81 | 
 82 |       - name: Bump version
 83 |         if: success()
 84 |         run: |
 85 |           version=$(grep -oP '__version__ = "\K[^"]+' vlmrun/hub/version.py)
 86 |           echo "Current version: ${version}"
 87 | 
 88 |           git config --local user.email "github-actions[bot]@users.noreply.github.com"
 89 |           git config --local user.name "github-actions[bot]"
 90 | 
 91 |           git tag -a "v${version}" -m "Version ${version}"
 92 |           git push origin main
 93 |           git push origin "v${version}"
 94 | 
 95 |       - name: Build package
 96 |         run: |
 97 |           python -m build
 98 | 
 99 |       - name: Publish to PyPI
100 |         uses: pypa/gh-action-pypi-publish@release/v1
101 |         with:
102 |           password: ${{ secrets.PYPI_TOKEN }}
103 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/food/nutrition_facts_label.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class NutrientInfo(BaseModel):
 7 |     amount: Optional[float] = Field(None, description="The amount of the nutrient")
 8 |     unit: Optional[str] = Field(None, description="The unit of measurement (g, mg, mcg, etc.)")
 9 |     daily_value_percent: Optional[float] = Field(None, description="The percentage of daily value")
10 | 
11 | 
12 | class NutritionFactsLabel(BaseModel):
13 |     serving_size: Optional[str] = Field(None, description="The serving size, typically represented as a quantity with unit")
14 |     servings_per_container: Optional[float] = Field(None, description="Number of servings per container")
15 |     calories: Optional[int] = Field(None, description="Total calories per serving")
16 |     
17 |     total_fat: Optional[NutrientInfo] = Field(None, description="Total fat content per serving")
18 |     saturated_fat: Optional[NutrientInfo] = Field(None, description="Saturated fat content per serving")
19 |     trans_fat: Optional[NutrientInfo] = Field(None, description="Trans fat content per serving")
20 |     polyunsaturated_fat: Optional[NutrientInfo] = Field(None, description="Polyunsaturated fat content per serving")
21 |     monounsaturated_fat: Optional[NutrientInfo] = Field(None, description="Monounsaturated fat content per serving")
22 |     
23 |     cholesterol: Optional[NutrientInfo] = Field(None, description="Cholesterol content per serving")
24 |     sodium: Optional[NutrientInfo] = Field(None, description="Sodium content per serving")
25 |     
26 |     total_carbohydrate: Optional[NutrientInfo] = Field(None, description="Total carbohydrate content per serving")
27 |     dietary_fiber: Optional[NutrientInfo] = Field(None, description="Dietary fiber content per serving")
28 |     total_sugars: Optional[NutrientInfo] = Field(None, description="Total sugars content per serving")
29 |     added_sugars: Optional[NutrientInfo] = Field(None, description="Added sugars content per serving")
30 |     sugar_alcohols: Optional[NutrientInfo] = Field(None, description="Sugar alcohols content per serving")
31 |     
32 |     protein: Optional[NutrientInfo] = Field(None, description="Protein content per serving")
33 |     
34 |     # Vitamins and minerals
35 |     vitamin_d: Optional[NutrientInfo] = Field(None, description="Vitamin D content per serving")
36 |     calcium: Optional[NutrientInfo] = Field(None, description="Calcium content per serving")
37 |     iron: Optional[NutrientInfo] = Field(None, description="Iron content per serving")
38 |     potassium: Optional[NutrientInfo] = Field(None, description="Potassium content per serving")
39 |     vitamin_a: Optional[NutrientInfo] = Field(None, description="Vitamin A content per serving")
40 |     vitamin_c: Optional[NutrientInfo] = Field(None, description="Vitamin C content per serving")
41 |     
42 |     # Additional nutrients that might be present
43 |     additional_nutrients: Optional[Dict[str, NutrientInfo]] = Field(None, description="Additional nutrients not covered by standard fields")
44 |     
45 |     # Additional information
46 |     ingredients: Optional[str] = Field(None, description="List of ingredients")
47 |     allergens: Optional[List[str]] = Field(None, description="List of allergens")
48 |     manufacturer: Optional[str] = Field(None, description="Manufacturer or distributor of the product")
49 |     product_name: Optional[str] = Field(None, description="Name of the product")
50 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/document/receipt.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Any, Dict, List, Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class Address(BaseModel):
 8 |     street: Optional[str] = Field(None, description="Street address")
 9 |     city: Optional[str] = Field(None, description="City")
10 |     state: Optional[str] = Field(None, description="State")
11 |     postal_code: Optional[str] = Field(None, description="Postal code")
12 |     country: Optional[str] = Field(None, description="Country")
13 | 
14 | 
15 | class Item(BaseModel):
16 |     description: str = Field(..., description="Description or name of the item")
17 |     quantity: Optional[float] = Field(None, description="Quantity of the item")
18 |     unit_price: Optional[float] = Field(None, description="Unit price of the item")
19 |     total_price: Optional[float] = Field(None, description="Total price of the item")
20 | 
21 | 
22 | class PaymentMethod(BaseModel):
23 |     type: str = Field(..., description="Type of payment (e.g., cash, credit card, debit card)")
24 |     card_last_4: Optional[str] = Field(None, description="Last 4 digits of the card if applicable")
25 |     card_type: Optional[str] = Field(None, description="Type of card if applicable")
26 | 
27 | 
28 | class Receipt(BaseModel):
29 |     receipt_id: Optional[str] = Field(None, description="Unique receipt identifier")
30 |     transaction_date: Optional[datetime] = Field(None, description="Date and time of the transaction")
31 | 
32 |     merchant_name: Optional[str] = Field(None, description="Name of the merchant")
33 |     merchant_address: Optional[Address] = Field(None, description="Address of the merchant")
34 |     merchant_phone: Optional[str] = Field(None, description="Phone number of the merchant")
35 | 
36 |     cashier_name: Optional[str] = Field(None, description="Name of the cashier")
37 |     register_number: Optional[str] = Field(None, description="Register or POS terminal number")
38 | 
39 |     customer_name: Optional[str] = Field(None, description="Name of the customer if provided")
40 |     customer_id: Optional[str] = Field(None, description="Customer ID or loyalty number if applicable")
41 | 
42 |     items: List[Item] = Field(..., description="Items purchased")
43 | 
44 |     subtotal: Optional[float] = Field(None, description="Subtotal of the purchase")
45 |     tax: Optional[float] = Field(None, description="Tax amount")
46 |     total: float = Field(..., description="Total amount of the purchase")
47 |     currency: str = Field(..., description="Currency of the transaction")
48 | 
49 |     payment_method: PaymentMethod = Field(..., description="Method of payment")
50 | 
51 |     discount_amount: Optional[float] = Field(None, description="Amount of discount applied")
52 |     discount_description: Optional[str] = Field(None, description="Description of the discount")
53 | 
54 |     tip_amount: Optional[float] = Field(None, description="Tip amount if applicable")
55 | 
56 |     return_policy: Optional[str] = Field(None, description="Return policy information")
57 | 
58 |     barcode: Optional[str] = Field(None, description="Barcode or QR code data if present")
59 | 
60 |     additional_charges: Optional[List[Dict]] = Field(None, description="Any additional charges (e.g., service fees)")
61 | 
62 |     notes: Optional[str] = Field(None, description="Any additional notes or comments")
63 |     others: Optional[Dict[str, Any]] = Field(
64 |         None, description="Other information on the receipt not captured by other fields"
65 |     )
66 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/document/request_for_proposal.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | from typing import List, Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class ContactPerson(BaseModel):
 8 |     name: Optional[str] = Field(None, description="Name of the contact person for the RFP")
 9 |     title: Optional[str] = Field(None, description="Title or position of the contact person")
10 |     email: Optional[str] = Field(None, description="Email address of the contact person")
11 |     phone: Optional[str] = Field(None, description="Phone number of the contact person")
12 | 
13 | 
14 | class Responsibility(BaseModel):
15 |     description: str = Field(..., description="Description of the contractor responsibility")
16 | 
17 | 
18 | class EvaluationCriterion(BaseModel):
19 |     description: str = Field(..., description="Description of the evaluation criterion")
20 |     weight: Optional[float] = Field(None, description="Weight or importance of this criterion (if specified)")
21 | 
22 | 
23 | class RFP(BaseModel):
24 |     """Request for Proposal (RFP) schema for extracting information from RFP documents."""
25 | 
26 |     title: Optional[str] = Field(None, description="Title of the Request for Proposal")
27 | 
28 |     submission_deadline: Optional[date] = Field(None, description="Deadline date for proposal submissions")
29 | 
30 |     governing_law: Optional[str] = Field(None, description="Governing law or jurisdiction that applies to the contract")
31 | 
32 |     duration_of_contract: Optional[str] = Field(None, description="Overall period of performance for the contract")
33 | 
34 |     budget_cost_estimate: Optional[str] = Field(None, description="Estimated budget or cost range for the project")
35 | 
36 |     rfp_contact_person: Optional[ContactPerson] = Field(
37 |         None, description="Contact person information for inquiries about the RFP"
38 |     )
39 | 
40 |     responsibilities_of_contractor: Optional[List[Responsibility]] = Field(
41 |         None, description="List of responsibilities expected from the contractor"
42 |     )
43 | 
44 |     evaluation_criteria: Optional[List[EvaluationCriterion]] = Field(
45 |         None, description="Criteria used to evaluate and score proposals"
46 |     )
47 | 
48 |     proposal_submission_location: Optional[str] = Field(
49 |         None, description="Physical or electronic location where proposals should be submitted"
50 |     )
51 | 
52 |     insurance_requirements: Optional[str] = Field(None, description="Insurance requirements for the contractor")
53 | 
54 |     project_timeline: Optional[str] = Field(None, description="Expected timeline for project completion")
55 | 
56 |     eligibility_requirements: Optional[str] = Field(
57 |         None, description="Requirements that bidders must meet to be eligible"
58 |     )
59 | 
60 |     proposal_format_requirements: Optional[str] = Field(
61 |         None, description="Required format, structure, or content for submitted proposals"
62 |     )
63 | 
64 |     question_submission_deadline: Optional[date] = Field(
65 |         None, description="Deadline for potential bidders to submit questions"
66 |     )
67 | 
68 |     pre_proposal_conference_details: Optional[str] = Field(
69 |         None, description="Details about any pre-proposal meetings or conferences"
70 |     )
71 | 
72 |     issuing_organization: Optional[str] = Field(None, description="Organization that issued the RFP")
73 | 
74 |     amendment_history: Optional[List[str]] = Field(
75 |         None, description="History of amendments or changes to the original RFP"
76 |     )
77 | 


--------------------------------------------------------------------------------
/tests/test_catalog.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | from pydantic import BaseModel
 5 | 
 6 | from vlmrun.hub.registry import SchemaCatalogYaml
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "catalog_path",
11 |     [
12 |         Path(__file__).parent.parent / "vlmrun" / "hub" / "catalog.yaml",
13 |         Path(__file__).parent.parent / "vlmrun" / "hub" / "schemas" / "contrib" / "catalog.yaml",
14 |     ],
15 | )
16 | def test_catalog_yaml(catalog_path):
17 |     """Test that catalog.yaml is valid and follows the expected structure."""
18 |     assert catalog_path.exists(), "catalog.yaml file not found"
19 | 
20 |     # Load the catalog
21 |     catalog = SchemaCatalogYaml.from_yaml(catalog_path)
22 | 
23 |     # Basic validation
24 |     assert catalog.apiVersion == "v1", "API version must be v1"
25 |     assert len(catalog.schemas) > 0, "Catalog must contain at least one schema"
26 | 
27 |     # Schema-specific validation
28 |     for entry in catalog.schemas:
29 |         # Domain format validation
30 |         assert "." in entry.domain, "Domain must be in format: category.name"
31 |         category, name = entry.domain.split(".", 1)
32 |         assert category and name, "Both category and name must be non-empty"
33 | 
34 |         # Schema path validation
35 |         assert entry.schema_path.startswith("vlmrun.hub.schemas."), "Schema must be in vlmrun.hub.schemas package"
36 | 
37 |         # Version format validation (basic semver check)
38 |         if entry.version:
39 |             version_parts = entry.version.split(".")
40 |             assert len(version_parts) == 3, "Version must follow semver format (X.Y.Z)"
41 |             assert all(part.isdigit() for part in version_parts), "Version parts must be numeric"
42 | 
43 |         # Metadata validation
44 |         if entry.metadata:
45 |             if entry.metadata.supported_inputs:
46 |                 assert isinstance(entry.metadata.supported_inputs, list), "Supported inputs must be a list"
47 |                 assert len(entry.metadata.supported_inputs) > 0, "Must have at least one supported input"
48 |             if entry.metadata.tags:
49 |                 assert isinstance(entry.metadata.tags, list), "Tags must be a list"
50 |                 assert len(entry.metadata.tags) > 0, "Must have at least one tag"
51 |                 assert all(isinstance(tag, str) for tag in entry.metadata.tags), "All tags must be strings"
52 | 
53 |         # Content validation
54 |         assert len(entry.prompt) >= 10, "Prompt must be descriptive (min 10 chars)"
55 |         assert len(entry.description) >= 20, "Description must be detailed (min 20 chars)"
56 | 
57 |         # Dynamic schema validation
58 |         try:
59 |             schema_class = entry.schema_class
60 |             assert issubclass(schema_class, BaseModel), f"Schema {entry.schema} must be a Pydantic model"
61 |         except Exception as e:
62 |             pytest.fail(f"Unable to import {entry.schema}: {e}")
63 | 
64 | 
65 | def test_catalog_yaml_with_refs():
66 |     """Test that catalog.yaml with refs is valid and follows the expected structure."""
67 |     catalog_path = Path(__file__).parent.parent / "vlmrun" / "hub" / "full-catalog.yaml"
68 |     assert catalog_path.exists(), "full-catalog.yaml file not found"
69 | 
70 |     # Load the catalog
71 |     catalog = SchemaCatalogYaml.from_yaml(catalog_path)
72 | 
73 |     # Basic validation
74 |     assert catalog.apiVersion == "v1", "API version must be v1"
75 | 
76 |     n_schemas = len(
77 |         SchemaCatalogYaml.from_yaml(Path(__file__).parent.parent / "vlmrun" / "hub" / "catalog.yaml").schemas
78 |     ) + len(
79 |         SchemaCatalogYaml.from_yaml(
80 |             Path(__file__).parent.parent / "vlmrun" / "hub" / "schemas" / "contrib" / "catalog.yaml"
81 |         ).schemas
82 |     )
83 |     assert len(catalog.schemas) == n_schemas, "Catalog must contain the correct number of schemas"
84 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .env.dev
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | # pytype static type analyzer
153 | .pytype/
154 | 
155 | # Cython debug symbols
156 | cython_debug/
157 | 
158 | # PyCharm
159 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
162 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
163 | #.idea/
164 | /helpers
165 | *.env.*
166 | .DS_Store
167 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/healthcare/hipaa_release.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | from typing import List, Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class HealthInformation(BaseModel):
 8 |     full_disclosure: Optional[bool] = Field(
 9 |         default=None, description="Indicates if the full health record is disclosed."
10 |     )
11 |     excluded_information: Optional[List[str]] = Field(
12 |         default=None, description="Types of health records excluded from disclosure."
13 |     )
14 |     other_exclusions: Optional[List[str]] = Field(
15 |         default=None, description="Additional exclusions specified by the patient."
16 |     )
17 |     disclosure_format: Optional[str] = Field(
18 |         default=None, description="The preferred format for disclosing the health records."
19 |     )
20 | 
21 | 
22 | class Recipient(BaseModel):
23 |     name: Optional[str] = Field(default=None, description="Name of the recipient.")
24 |     organization: Optional[str] = Field(default=None, description="Organization name if applicable.")
25 |     address: Optional[str] = Field(default=None, description="Recipient's address.")
26 | 
27 | 
28 | class AuthorizationDuration(BaseModel):
29 |     start_date: Optional[date] = Field(default=None, description="Start date of authorization.")
30 |     end_date: Optional[date] = Field(default=None, description="End date of authorization.")
31 |     all_time: Optional[bool] = Field(
32 |         default=None, description="Indicates if authorization applies to all past, present, and future periods."
33 |     )
34 |     event_based: Optional[str] = Field(default=None, description="Event upon which authorization expires.")
35 | 
36 | 
37 | class RevocationContact(BaseModel):
38 |     name: Optional[str] = Field(default=None, description="Name of person handling revocation.")
39 |     organization: Optional[str] = Field(default=None, description="Organization responsible for processing revocation.")
40 |     address: Optional[str] = Field(default=None, description="Address for sending revocation requests.")
41 | 
42 | 
43 | class RevocationDetails(BaseModel):
44 |     revocation_contact: Optional[RevocationContact] = Field(
45 |         default=None, description="Details on how the authorization can be revoked."
46 |     )
47 | 
48 | 
49 | class LegalRepresentative(BaseModel):
50 |     name: Optional[str] = Field(default=None, description="Name of the legal representative.")
51 |     signature: Optional[str] = Field(default=None, description="Signature of the legal representative.")
52 |     authority_description: Optional[str] = Field(
53 |         default=None, description="Description of the legal authority under which they are signing."
54 |     )
55 | 
56 | 
57 | class Signature(BaseModel):
58 |     signed_by: Optional[str] = Field(default=None, description="Name of the individual signing the form.")
59 |     is_signed: Optional[bool] = Field(default=None, description="Whether the form has been signed.")
60 |     date_signed: Optional[date] = Field(default=None, description="Date the form was signed.")
61 |     legal_representative: Optional[LegalRepresentative] = Field(
62 |         default=None, description="Details if signed by a legal representative."
63 |     )
64 | 
65 | 
66 | class HIPAARelease(BaseModel):
67 |     """HIPAA Release Form for authorizing disclosure of health information."""
68 | 
69 |     patient_name: Optional[str] = Field(
70 |         default=None, description="Full name of the individual authorizing the release."
71 |     )
72 |     authorized_entity: Optional[str] = Field(
73 |         default=None, description="Name of the entity or individual authorized to share information."
74 |     )
75 |     health_information: Optional[HealthInformation] = Field(
76 |         default=None, description="Details of the health records to be disclosed."
77 |     )
78 |     reason_for_disclosure: Optional[str] = Field(default=None, description="Reason for sharing the health information.")
79 |     recipient: Optional[Recipient] = Field(
80 |         default=None, description="Details of the recipient authorized to receive health information."
81 |     )
82 |     authorization_duration: Optional[AuthorizationDuration] = Field(
83 |         default=None, description="Duration of authorization for information disclosure."
84 |     )
85 |     revocation_details: Optional[RevocationDetails] = Field(
86 |         default=None, description="Details on how the authorization can be revoked."
87 |     )
88 |     signature: Optional[Signature] = Field(default=None, description="Signature and authorization details.")
89 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/accounting/w2_form.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class Address(BaseModel):
 7 |     street: Optional[str] = Field(None, description="Street address")
 8 |     city: Optional[str] = Field(None, description="City")
 9 |     state: Optional[str] = Field(None, description="State")
10 |     zip_code: Optional[str] = Field(None, description="ZIP code")
11 | 
12 | 
13 | class W2Form(BaseModel):
14 |     """W2 Form schema for extracting information from IRS Form W-2 (Wage and Tax Statement)."""
15 | 
16 |     ssn: Optional[str] = Field(None, description="Employee's Social Security Number (SSN) (Box a)")
17 |     ein: Optional[str] = Field(None, description="Employer Identification Number (EIN) (Box b)")
18 | 
19 |     employer_name: Optional[str] = Field(None, description="Full name of the employer (Box c)")
20 |     employer_address: Optional[Address] = Field(None, description="Employer's complete address (Box c)")
21 | 
22 |     control_number: Optional[str] = Field(None, description="Control number assigned to the W2 form (Box d)")
23 | 
24 |     employee_name: Optional[str] = Field(None, description="Full name of the employee (Box e)")
25 |     employee_address: Optional[Address] = Field(None, description="Employee's complete address (Box e)")
26 | 
27 |     wages_tips_other_compensation: Optional[float] = Field(
28 |         None, description="Wages, tips, and other compensation (Box 1)"
29 |     )
30 |     federal_income_tax_withheld: Optional[float] = Field(None, description="Federal income tax withheld (Box 2)")
31 |     social_security_wages: Optional[float] = Field(None, description="Social security wages (Box 3)")
32 |     social_security_tax_withheld: Optional[float] = Field(None, description="Social security tax withheld (Box 4)")
33 |     medicare_wages_and_tips: Optional[float] = Field(None, description="Medicare wages and tips (Box 5)")
34 |     medicare_tax_withheld: Optional[float] = Field(None, description="Medicare tax withheld (Box 6)")
35 |     social_security_tips: Optional[float] = Field(None, description="Social security tips (Box 7)")
36 |     allocated_tips: Optional[float] = Field(None, description="Allocated tips (Box 8)")
37 |     dependent_care_benefits: Optional[float] = Field(None, description="Dependent care benefits (Box 10)")
38 |     nonqualified_plans: Optional[float] = Field(None, description="Nonqualified plans (Box 11)")
39 |     total_wages: Optional[float] = Field(None, description="Total wages (Box 12)")
40 | 
41 |     statutory_employee: Optional[bool] = Field(None, description="Statutory employee checkbox value (Box 13)")
42 |     retirement_plan: Optional[bool] = Field(None, description="Retirement plan checkbox value (Box 13)")
43 |     third_party_sick_pay: Optional[bool] = Field(None, description="Third party sick pay checkbox value (Box 13)")
44 |     other_wages: Optional[bool] = Field(None, description="Other wages checkbox value (Box 14)")
45 | 
46 |     employers_state_id_number: Optional[str] = Field(None, description="Employer's state ID number (Box 15)")
47 |     state_wages: Optional[float] = Field(None, description="State wages (Box 16)")
48 |     state_income_tax_withheld: Optional[float] = Field(None, description="State income tax withheld (Box 17)")
49 |     local_wages: Optional[float] = Field(None, description="Local wages (Box 18)")
50 |     local_income_tax_withheld: Optional[float] = Field(None, description="Local income tax withheld (Box 19)")
51 |     locality_name: Optional[str] = Field(None, description="Locality name (Box 20)")
52 | 
53 |     form_year: Optional[int] = Field(
54 |         None, description="Tax year for which the W2 form is issued, usually on the bottom in bold."
55 |     )
56 | 
57 |     a_code: Optional[str] = Field(None, description="Code entered on the left side of Box 12a (Box 12a)")
58 |     a_value: Optional[float] = Field(None, description="Value entered on the right side of Box 12a (Box 12a)")
59 |     b_code: Optional[str] = Field(None, description="Code entered on the left side of Box 12b (Box 12b)")
60 |     b_value: Optional[float] = Field(None, description="Value entered on the right side of Box 12b (Box 12b)")
61 |     c_code: Optional[str] = Field(None, description="Code entered on the left side of Box 12c (Box 12c)")
62 |     c_value: Optional[float] = Field(None, description="Value entered on the right side of Box 12c (Box 12c)")
63 |     d_code: Optional[str] = Field(None, description="Code entered on the left side of Box 12d (Box 12d)")
64 |     d_value: Optional[float] = Field(None, description="Value entered on the right side of Box 12d (Box 12d)")
65 | 


--------------------------------------------------------------------------------
/tests/test_openai.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import List, Optional, Type
  3 | 
  4 | import pytest
  5 | from loguru import logger
  6 | from pydantic import BaseModel
  7 | 
  8 | pytestmark = pytest.mark.skipif(
  9 |     not os.getenv("OPENAI_API_KEY", False), reason="This test requires OPENAI_API_KEY to be set"
 10 | )
 11 | 
 12 | 
 13 | @pytest.fixture
 14 | def openai_client():
 15 |     from openai import OpenAI
 16 | 
 17 |     return OpenAI()
 18 | 
 19 | 
 20 | def test_openai_structured_outputs_simple(openai_client):
 21 |     from pydantic import Field
 22 | 
 23 |     from vlmrun.common.image import encode_image
 24 |     from vlmrun.common.utils import remote_image
 25 | 
 26 |     invoice_url = "https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg"
 27 |     invoice_image = remote_image(invoice_url)
 28 | 
 29 |     class Address(BaseModel):
 30 |         street: Optional[str] = Field(None, description="Street address")
 31 |         city: Optional[str] = Field(None, description="City")
 32 |         state: Optional[str] = Field(None, description="State")
 33 |         postal_code: Optional[str] = Field(None, description="Postal code")
 34 |         country: Optional[str] = Field(None, description="Country")
 35 | 
 36 |     class Item(BaseModel):
 37 |         description: Optional[str] = Field(None, description="Description or name of the item")
 38 |         quantity: Optional[int] = Field(None, description="Quantity of the item")
 39 |         currency: Optional[str] = Field(None, description="3-digit currency code")
 40 |         unit_price: Optional[float] = Field(None, description="Unit price of the item")
 41 |         total_price: Optional[float] = Field(None, description="Total price of the item")
 42 | 
 43 |     class Invoice(BaseModel):
 44 |         invoice_id: Optional[str] = Field(None, description="Unique invoice identifier")
 45 |         invoice_issue_date: Optional[str] = Field(None, description="Issue date of the invoice")
 46 | 
 47 |         customer_billing_address: Optional[Address] = Field(None, description="Recipient's billing address")
 48 |         customer_shipping_address: Optional[Address] = Field(None, description="Recipient's shipping address")
 49 | 
 50 |         items: Optional[List[Item]] = Field(None, description="Items in the invoice")
 51 |         subtotal: Optional[float] = Field(None, description="Subtotal of the invoice")
 52 |         tax: Optional[float] = Field(None, description="Tax of the invoice")
 53 |         total: Optional[float] = Field(None, description="Total of the invoice")
 54 |         currency: Optional[str] = Field(None, description="Currency of the invoice")
 55 | 
 56 |     response = openai_client.beta.chat.completions.parse(
 57 |         model="gpt-4o-mini",
 58 |         messages=[
 59 |             {
 60 |                 "role": "user",
 61 |                 "content": [
 62 |                     {"type": "text", "text": "Extract the invoice in JSON."},
 63 |                     *[
 64 |                         {"type": "image_url", "image_url": {"url": encode_image(img, format="JPEG")}}
 65 |                         for img in [invoice_image]
 66 |                     ],
 67 |                 ],
 68 |             },
 69 |         ],
 70 |         response_format=Invoice,
 71 |         temperature=0,
 72 |     )
 73 |     logger.info(response.choices[0].message.parsed.model_dump_json(indent=2))
 74 | 
 75 | 
 76 | @pytest.mark.benchmark
 77 | @pytest.mark.skip(reason="This test is not working due to the patch_response_format function")
 78 | def test_openai_structured_outputs_hub_dataset(openai_client):
 79 |     from vlmrun.common.image import encode_image
 80 |     from vlmrun.hub.dataset import VLMRUN_HUB_DATASET
 81 |     from vlmrun.hub.utils import patch_response_format
 82 | 
 83 |     for sample in VLMRUN_HUB_DATASET.values():
 84 |         response_model: Type[BaseModel] = sample.response_model
 85 |         response = openai_client.beta.chat.completions.parse(
 86 |             model="gpt-4o-mini",
 87 |             messages=[
 88 |                 {
 89 |                     "role": "user",
 90 |                     "content": [
 91 |                         {"type": "text", "text": sample.prompt},
 92 |                         *[
 93 |                             {"type": "image_url", "image_url": {"url": encode_image(img, format="JPEG")}}
 94 |                             for img in [
 95 |                                 sample.image,
 96 |                             ]
 97 |                         ],
 98 |                     ],
 99 |                 },
100 |             ],
101 |             response_format=patch_response_format(response_model),
102 |             temperature=0,
103 |         )
104 |         logger.info(response.model_dump_json(indent=2))
105 | 


--------------------------------------------------------------------------------
/tests/benchmarks/2025-02-20-bsahane-Qwen2.5-VL-7B-Instruct-Q4_K_M_benxh-instructor-results.md:
--------------------------------------------------------------------------------
 1 | ## Benchmark Results (model=bsahane/Qwen2.5-VL-7B-Instruct:Q4_K_M_benxh, date=2025-02-20)
 2 | 
 3 | <table>
 4 | <tr>
 5 | <td style='width: 5%;'> Domain </td>
 6 | <td style='width: 5%;'> Response Model </td>
 7 | <td style='width: 40%;'> Sample </td>
 8 | <td style='width: 50%;'> Response JSON </td>
 9 | </tr>
10 |     <tr><td> <kbd>document.bank-statement</kbd> </td>
11 | <td> <kbd>BankStatement</kbd> </td>
12 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.bank-statement/lending_bankstatement.pdf' width='100%' /> </td>
13 | <td> <pre>{<br>  "account_number": null,<br>  "account_type": null,<br>  "bank_address": null,<br>  "bank_name": null,<br>  "client_address": null,<br>  "client_name": null,<br>  "ending_balance": null,<br>  "starting_balance": null,<br>  "statement_date": null,<br>  "statement_start_date": null,<br>  "statement_end_date": null,<br>  "table_item": [],<br>  "others": null<br>}</pre> </td>
14 | </tr><tr><td> <kbd>document.invoice</kbd> </td>
15 | <td> <kbd>Invoice</kbd> </td>
16 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg' width='100%' /> </td>
17 | <td> <pre>❌</pre> </td>
18 | </tr><tr><td> <kbd>document.receipt</kbd> </td>
19 | <td> <kbd>Receipt</kbd> </td>
20 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.receipt/sample_receipt.webp' width='100%' /> </td>
21 | <td> <pre>❌</pre> </td>
22 | </tr><tr><td> <kbd>document.resume</kbd> </td>
23 | <td> <kbd>Resume</kbd> </td>
24 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.resume/fake-resume.webp' width='100%' /> </td>
25 | <td> <pre>❌</pre> </td>
26 | </tr><tr><td> <kbd>document.us-drivers-license</kbd> </td>
27 | <td> <kbd>USDriversLicense</kbd> </td>
28 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.us-drivers-license/dl3.jpg' width='100%' /> </td>
29 | <td> <pre>❌</pre> </td>
30 | </tr><tr><td> <kbd>document.utility-bill</kbd> </td>
31 | <td> <kbd>UtilityBill</kbd> </td>
32 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.utility-bill/utility-bill-example.webp' width='100%' /> </td>
33 | <td> <pre>❌</pre> </td>
34 | </tr><tr><td> <kbd>document.w2-form</kbd> </td>
35 | <td> <kbd>W2Form</kbd> </td>
36 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.w2-form/w2-form.jpg' width='100%' /> </td>
37 | <td> <pre>❌</pre> </td>
38 | </tr><tr><td> <kbd>aerospace.remote-sensing</kbd> </td>
39 | <td> <kbd>RemoteSensing</kbd> </td>
40 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/aerospace.remote-sensing/planet_labs_port.jpg' width='100%' /> </td>
41 | <td> <pre>{<br>  "description": "The satellite image shows an airport with visible runways and surrounding infrastructure, indicating a clear view from space.",<br>  "objects": [<br>    "runway"<br>  ],<br>  "categories": [<br>    "airport"<br>  ],<br>  "is_visible": true<br>}</pre> </td>
42 | </tr><tr><td> <kbd>healthcare.medical-insurance-card</kbd> </td>
43 | <td> <kbd>MedicalInsuranceCard</kbd> </td>
44 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/healthcare.medical-insurance-card/blue_cross_example.jpg' width='100%' /> </td>
45 | <td> <pre>❌</pre> </td>
46 | </tr><tr><td> <kbd>retail.ecommerce-product-caption</kbd> </td>
47 | <td> <kbd>RetailEcommerceProductCaption</kbd> </td>
48 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/retail.ecommerce-product-caption/Electronics%20-%20Kindle.webp' width='100%' /> </td>
49 | <td> <pre>{<br>  "description": "A sleek, modern e-reader with a vibrant display and intuitive interface.",<br>  "rating": 85,<br>  "name": "E-Reader Pro",<br>  "brand": "TechGenius",<br>  "category": "Electronics / E-readers",<br>  "price": "$299.99",<br>  "color": "Black"<br>}</pre> </td>
50 | </tr><tr><td> <kbd>media.tv-news</kbd> </td>
51 | <td> <kbd>TVNews</kbd> </td>
52 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.tv-news/bbc_news_ukraine_screenshot.jpg' width='100%' /> </td>
53 | <td> <pre>{<br>  "description": "[img-0]",<br>  "chyron": null,<br>  "network": null,<br>  "reporters": []<br>}</pre> </td>
54 | </tr><tr><td> <kbd>document.us-passport</kbd> </td>
55 | <td> <kbd>USPassport</kbd> </td>
56 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.us-passport/2020-Next-Gen-US-Passport.png' width='100%' /> </td>
57 | <td> <pre>❌</pre> </td>
58 | </tr><tr><td> <kbd>media.nfl-game-state</kbd> </td>
59 | <td> <kbd>NFLGameState</kbd> </td>
60 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.nfl-game-state/packers_cardinals_screenshot.png' width='100%' /> </td>
61 | <td> <pre>❌</pre> </td>
62 | </tr><tr><td> <kbd>media.nba-game-state</kbd> </td>
63 | <td> <kbd>NBAGameState</kbd> </td>
64 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.nba-game-state/heats_spurs.png' width='100%' /> </td>
65 | <td> <pre>❌</pre> </td>
66 | </tr>
67 | </table>
68 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/logistics/bill_of_lading.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | from typing import List, Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class Address(BaseModel):
 8 |     street: Optional[str] = Field(None, description="Street address")
 9 |     city: Optional[str] = Field(None, description="City")
10 |     state: Optional[str] = Field(None, description="State or province")
11 |     postal_code: Optional[str] = Field(None, description="Postal code or ZIP code")
12 |     country: Optional[str] = Field(None, description="Country")
13 | 
14 | 
15 | class Contact(BaseModel):
16 |     phone: Optional[str] = Field(None, description="Phone number")
17 |     email: Optional[str] = Field(None, description="Email address")
18 |     fax: Optional[str] = Field(None, description="Fax number")
19 | 
20 | 
21 | class Party(BaseModel):
22 |     name: Optional[str] = Field(None, description="Name of the party")
23 |     address: Optional[Address] = Field(None, description="Address of the party")
24 |     contact: Optional[Contact] = Field(None, description="Contact information (phone, email, etc.)")
25 |     reference: Optional[str] = Field(None, description="Reference number or identifier")
26 | 
27 | 
28 | class Container(BaseModel):
29 |     number: Optional[str] = Field(None, description="Container number")
30 |     seal_number: Optional[str] = Field(None, description="Seal number")
31 |     type: Optional[str] = Field(None, description="Container type")
32 |     weight: Optional[float] = Field(None, description="Weight of the container")
33 |     measurement: Optional[str] = Field(None, description="Measurement or dimensions of the container")
34 | 
35 | 
36 | class Goods(BaseModel):
37 |     description: Optional[str] = Field(None, description="Description of the goods")
38 |     packages: Optional[int] = Field(None, description="Number of packages")
39 |     package_type: Optional[str] = Field(None, description="Type of packages (cartons, pallets, etc.)")
40 |     weight: Optional[float] = Field(None, description="Weight of the goods")
41 |     weight_unit: Optional[str] = Field(None, description="Unit of weight measurement (kg, lb, etc.)")
42 |     volume: Optional[float] = Field(None, description="Volume of the goods")
43 |     volume_unit: Optional[str] = Field(None, description="Unit of volume measurement (cbm, cft, etc.)")
44 |     marks_and_numbers: Optional[str] = Field(None, description="Marks and numbers on the packages")
45 |     dangerous_goods_info: Optional[str] = Field(None, description="Information about dangerous goods, if applicable")
46 | 
47 | 
48 | class FreightDetails(BaseModel):
49 |     freight_terms: Optional[str] = Field(None, description="Terms of freight (prepaid, collect, etc.)")
50 |     freight_charges: Optional[float] = Field(None, description="Freight charges amount")
51 |     currency: Optional[str] = Field(None, description="Currency of the freight charges")
52 |     additional_charges: Optional[List[dict]] = Field(None, description="Additional charges or fees")
53 |     payment_method: Optional[str] = Field(None, description="Method of payment")
54 | 
55 | 
56 | class BillOfLading(BaseModel):
57 |     bill_number: Optional[str] = Field(None, description="Bill of Lading number")
58 |     booking_number: Optional[str] = Field(None, description="Booking or reference number")
59 |     issue_date: Optional[date] = Field(None, description="Date of issue of the Bill of Lading")
60 |     
61 |     shipper: Optional[Party] = Field(None, description="Shipper or exporter information")
62 |     consignee: Optional[Party] = Field(None, description="Consignee or importer information")
63 |     notify_party: Optional[Party] = Field(None, description="Notify party information")
64 |     forwarding_agent: Optional[Party] = Field(None, description="Forwarding agent information")
65 |     
66 |     vessel_name: Optional[str] = Field(None, description="Name of the vessel")
67 |     voyage_number: Optional[str] = Field(None, description="Voyage number")
68 |     carrier: Optional[str] = Field(None, description="Carrier or shipping line")
69 |     
70 |     port_of_loading: Optional[str] = Field(None, description="Port of loading")
71 |     port_of_discharge: Optional[str] = Field(None, description="Port of discharge")
72 |     place_of_receipt: Optional[str] = Field(None, description="Place of receipt")
73 |     place_of_delivery: Optional[str] = Field(None, description="Place of delivery")
74 |     
75 |     containers: Optional[List[Container]] = Field(None, description="List of containers")
76 |     goods: Optional[Goods] = Field(None, description="Details of the goods being shipped")
77 |     
78 |     freight_details: Optional[FreightDetails] = Field(None, description="Freight and payment details")
79 |     
80 |     special_instructions: Optional[str] = Field(None, description="Special instructions or remarks")
81 |     
82 |     number_of_original_bills: Optional[int] = Field(None, description="Number of original Bills of Lading issued")
83 |     
84 |     signature_place: Optional[str] = Field(None, description="Place of signature")
85 |     signature_date: Optional[date] = Field(None, description="Date of signature")
86 |     signatory: Optional[str] = Field(None, description="Name or title of the signatory")
87 | 


--------------------------------------------------------------------------------
/tests/test_registry.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import pytest
  4 | from pydantic import BaseModel
  5 | from ruamel.yaml.parser import ParserError
  6 | 
  7 | from vlmrun.hub.registry import Registry, SchemaCatalogItem, SchemaCatalogYaml
  8 | 
  9 | 
 10 | @pytest.fixture
 11 | def registry():
 12 |     """Create a fresh registry instance for each test"""
 13 |     return Registry()
 14 | 
 15 | 
 16 | def test_registry_singleton():
 17 |     """Test that Registry behaves as a singleton"""
 18 |     from vlmrun.hub.registry import registry as registry1
 19 |     from vlmrun.hub.registry import registry as registry2
 20 | 
 21 |     assert registry1 is registry2
 22 |     assert isinstance(registry1, Registry)
 23 | 
 24 | 
 25 | def test_registry_load_schemas(registry):
 26 |     """Test loading schemas from catalog"""
 27 |     registry.load_schemas()
 28 |     assert len(registry.schemas) > 0
 29 | 
 30 |     assert "document.receipt" in registry.schemas
 31 |     assert "document.resume" in registry.schemas
 32 |     assert "document.us-drivers-license" in registry.schemas
 33 | 
 34 | 
 35 | def test_registry_getitem(registry):
 36 |     """Test accessing schemas using dictionary syntax"""
 37 |     schema = registry["document.receipt"]
 38 |     assert issubclass(schema, BaseModel)
 39 | 
 40 |     with pytest.raises(KeyError):
 41 |         _ = registry["non.existent.schema"]
 42 | 
 43 | 
 44 | def test_registry_repr(registry):
 45 |     """Test string representation of registry"""
 46 |     repr_str = repr(registry)
 47 |     assert "Registry [schemas=" in repr_str
 48 |     assert "document.receipt" in repr_str
 49 |     assert "document.resume" in repr_str
 50 | 
 51 | 
 52 | def test_registry_list_schemas(registry):
 53 |     """Test listing available schemas"""
 54 |     schemas = registry.list_schemas()
 55 |     assert isinstance(schemas, list)
 56 |     assert len(schemas) > 0
 57 |     assert "document.receipt" in schemas
 58 |     assert "document.resume" in schemas
 59 | 
 60 | 
 61 | def test_schema_catalog_item_validation():
 62 |     """Test SchemaCatalogItem validation"""
 63 |     item = SchemaCatalogItem(
 64 |         domain="test.domain",
 65 |         schema="vlmrun.hub.schemas.document.Receipt",
 66 |         prompt="Test prompt",
 67 |         description="Test description that is sufficiently detailed",
 68 |         supported_inputs=["document"],
 69 |         tags=["test"],
 70 |     )
 71 |     assert item.domain == "test.domain"
 72 |     assert item.schema_path == "vlmrun.hub.schemas.document.Receipt"
 73 |     assert len(item.prompt) >= 10
 74 |     assert len(item.description) >= 20
 75 | 
 76 | 
 77 | def test_schema_catalog_yaml_loading():
 78 |     """Test loading catalog from YAML"""
 79 |     catalog_path = Path(__file__).parent.parent / "vlmrun" / "hub" / "catalog.yaml"
 80 |     catalog = SchemaCatalogYaml.from_yaml(catalog_path)
 81 | 
 82 |     assert catalog.apiVersion == "v1"
 83 |     assert isinstance(catalog.schemas, list)
 84 |     assert len(catalog.schemas) > 0
 85 | 
 86 |     if catalog.catalogs:
 87 |         assert isinstance(catalog.catalogs, list)
 88 |         for ref in catalog.catalogs:
 89 |             ref_path = Path(__file__).parent.parent / "vlmrun" / "hub" / ref
 90 |             assert ref_path.exists()
 91 | 
 92 | 
 93 | def test_ensure_schemas_loaded_decorator(registry):
 94 |     """Test the ensure_schemas_loaded decorator"""
 95 |     schema = registry["document.receipt"]
 96 |     assert schema is not None
 97 | 
 98 |     assert len(registry.schemas) > 0
 99 | 
100 | 
101 | def test_registry_load_schemas_with_invalid_path(registry):
102 |     """Test loading schemas from non-existent path raises FileNotFoundError"""
103 |     with pytest.raises(FileNotFoundError, match="Catalog file not found"):
104 |         registry.load_schemas(catalog_paths=("nonexistent.yaml",))
105 | 
106 | 
107 | def test_registry_load_schemas_with_invalid_yaml(registry, tmp_path):
108 |     """Test loading schemas with invalid YAML content raises YAMLError"""
109 |     invalid_yaml = tmp_path / "invalid.yaml"
110 |     invalid_yaml.write_text("invalid: [\nyaml: content")
111 | 
112 |     with pytest.raises(ParserError):
113 |         registry.load_schemas(catalog_paths=(invalid_yaml,))
114 | 
115 | 
116 | def test_registry_load_schemas_with_invalid_schema(registry, tmp_path):
117 |     """Test loading schemas with invalid schema definition raises ValueError"""
118 |     invalid_schema_yaml = """
119 | apiVersion: v1
120 | schemas:
121 | - domain: test.invalid
122 |   schema: nonexistent.module.Schema
123 |   prompt: Test prompt
124 |   description: Test description that is sufficiently detailed
125 |   metadata:
126 |     supported_inputs: ["document"]
127 |     tags: ["test"]
128 | """
129 |     test_yaml = tmp_path / "test.yaml"
130 |     test_yaml.write_text(invalid_schema_yaml)
131 | 
132 |     with pytest.raises(ValueError, match="Unable to import nonexistent.module.Schema"):
133 |         registry.load_schemas(catalog_paths=(test_yaml,))
134 | 
135 | 
136 | def test_registry_detailed_key_error(registry):
137 |     """Test that KeyError includes available schemas in message"""
138 |     with pytest.raises(KeyError) as exc_info:
139 |         _ = registry["non.existent.schema"]
140 | 
141 |     error_msg = str(exc_info.value)
142 |     assert "Available schemas:" in error_msg
143 |     assert "document.receipt" in error_msg
144 |     assert "document.resume" in error_msg
145 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/document/bank_statement.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | from typing import Dict, List, Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class Address(BaseModel):
 8 |     street: Optional[str] = Field(None, description="Street address")
 9 |     city: Optional[str] = Field(None, description="City")
10 |     state: Optional[str] = Field(None, description="State/Province code or name")
11 |     zip_code: Optional[str] = Field(None, description="Postal code")
12 | 
13 | 
14 | class BankTransaction(BaseModel):
15 |     # Original fields - descriptions updated for clarity
16 |     transaction_deposit: Optional[float] = Field(None, description="Deposit amount, if this transaction is a deposit.")
17 |     transaction_deposit_date: Optional[date] = Field(None, description="Date of the deposit, if this transaction is a deposit.")
18 |     transaction_deposit_description: Optional[str] = Field(None, description="Description of the deposit, if this transaction is a deposit.")
19 |     transaction_withdrawal: Optional[float] = Field(None, description="Withdrawal amount, if this transaction is a withdrawal.")
20 |     transaction_withdrawal_date: Optional[date] = Field(None, description="Date of the withdrawal, if this transaction is a withdrawal.")
21 |     transaction_withdrawal_description: Optional[str] = Field(None, description="Description of the withdrawal, if this transaction is a withdrawal.")
22 | 
23 |     # New fields added based on examples
24 |     check_number: Optional[str] = Field(None, description="Check number associated with the transaction, if applicable (e.g., for check payments or cashed checks).")
25 |     ending_daily_balance: Optional[float] = Field(None, description="The running daily balance of the account after this transaction, if provided on the transaction line.")
26 |     reference_number: Optional[str] = Field(None, description="A specific reference number or transaction ID for this item, if provided in the transaction line (e.g., for deposits, ATM transactions).")
27 | 
28 | 
29 | class BankStatement(BaseModel):
30 |     # Existing fields - descriptions may be slightly enhanced for clarity
31 |     account_number: Optional[str] = Field(None, description="Bank account number associated with the statement.")
32 |     account_type: Optional[str] = Field(None, description="Type of the bank account (e.g., Checking, Savings, 'CONNECTIONS CHECKING').")
33 |     bank_address: Optional[Address] = Field(None, description="Address of the banking institution.")
34 |     bank_name: Optional[str] = Field(None, description="Name of the banking institution.")
35 |     client_address: Optional[Address] = Field(None, description="Address of the account holder(s).")
36 |     client_name: Optional[str] = Field(None, description="Name of the account holder(s). If multiple, may be a concatenated string (e.g., 'Rachael Dean, Calvin Carrillo').")
37 |     ending_balance: Optional[float] = Field(None, description="The final balance of the account at the end of the statement period.")
38 |     starting_balance: Optional[float] = Field(None, description="The balance of the account at the beginning of the statement period.")
39 |     statement_date: Optional[date] = Field(None, description="The date the statement was issued or generated.")
40 |     statement_start_date: Optional[date] = Field(None, description="The first day of the period covered by this statement.")
41 |     statement_end_date: Optional[date] = Field(None, description="The last day of the period covered by this statement.")
42 |     table_item: Optional[List[BankTransaction]] = Field(None, description="A list of individual financial transactions (deposits, withdrawals, checks, etc.) detailed in the statement.")
43 |     
44 |     # New fields added based on examples
45 |     routing_number: Optional[str] = Field(None, description="Bank's routing transit number (RTN), if provided on the statement.")
46 |     total_deposits: Optional[float] = Field(None, description="Summary total of all deposits and other credits for the statement period, as per the statement's summary section.")
47 |     total_withdrawals: Optional[float] = Field(None, description="Summary total of all withdrawals, payments, and other debits for the statement period, as per the statement's summary section.")
48 |     
49 |     # More specific summary totals if available 
50 |     summary_total_atm_withdrawals: Optional[float] = Field(None, description="Total amount of ATM withdrawals as reported in a summary section of the statement, if available.")
51 |     summary_total_debit_card_purchases: Optional[float] = Field(None, description="Total amount of debit card purchases (e.g., VISA Check Card) as reported in a summary section, if available.")
52 |     summary_total_checks_paid: Optional[float] = Field(None, description="Total amount of checks paid as reported in a summary section, if available (this may differ from a sum of individual check transactions if the summary is specific).")
53 |     
54 |     monthly_service_fee: Optional[float] = Field(None, description="Amount of the monthly service fee charged during the statement period, if any.")
55 |     overdraft_protection_status: Optional[str] = Field(None, description="Textual description of the overdraft protection status or related services on the account, if mentioned.")
56 | 
57 |     # Existing 'others' field for flexibility
58 |     others: Optional[Dict] = Field(None, description="A dictionary for any other relevant data extracted from the statement that does not fit into the predefined fields.")
59 | 


--------------------------------------------------------------------------------
/tests/test_instructor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Literal
  3 | 
  4 | import pytest
  5 | from conftest import BenchmarkResult, create_benchmark
  6 | from dotenv import load_dotenv
  7 | from loguru import logger
  8 | 
  9 | from vlmrun.common.image import encode_image
 10 | from vlmrun.hub.dataset import VLMRUN_HUB_DATASET, HubSample
 11 | 
 12 | load_dotenv()
 13 | 
 14 | 
 15 | def get_instructor_client(provider: Literal["openai", "gemini", "fireworks", "ollama"] = "openai"):
 16 |     import instructor
 17 |     from openai import OpenAI
 18 | 
 19 |     client = None
 20 |     if provider == "openai":
 21 |         api_key = os.getenv("OPENAI_API_KEY", None)
 22 |         if not api_key:
 23 |             raise ValueError("OPENAI_API_KEY is not set")
 24 |         client = OpenAI(
 25 |             api_key=api_key,
 26 |             base_url="https://api.openai.com/v1",
 27 |         )
 28 |     elif provider == "gemini":
 29 |         api_key = os.getenv("GEMINI_API_KEY", None)
 30 |         if not api_key:
 31 |             raise ValueError("GEMINI_API_KEY is not set")
 32 |         client = OpenAI(
 33 |             api_key=api_key,
 34 |             base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
 35 |         )
 36 |     elif provider == "fireworks":
 37 |         api_key = os.getenv("FIREWORKS_API_KEY", None)
 38 |         if not api_key:
 39 |             raise ValueError("FIREWORKS_API_KEY is not set")
 40 |         client = OpenAI(
 41 |             api_key=api_key,
 42 |             base_url="https://api.fireworks.ai/inference/v1",
 43 |         )
 44 |     elif provider == "ollama":
 45 |         client = OpenAI(
 46 |             api_key="ollama",
 47 |             base_url="http://localhost:11434/v1/",
 48 |         )
 49 |         client.models.list()  # check if ollama is running, otherwise raise an error
 50 |     else:
 51 |         raise ValueError(f"Invalid provider: {provider}")
 52 | 
 53 |     return instructor.from_openai(
 54 |         client,
 55 |         mode=instructor.Mode.MD_JSON,
 56 |     )
 57 | 
 58 | 
 59 | def process_sample(client, sample: HubSample, model: str):
 60 |     return client.chat.completions.create(
 61 |         model=model,
 62 |         messages=[
 63 |             {
 64 |                 "role": "user",
 65 |                 "content": [
 66 |                     {"type": "text", "text": sample.prompt},
 67 |                     *[
 68 |                         {"type": "image_url", "image_url": {"url": encode_image(img, format="JPEG")}}
 69 |                         for img in sample.images
 70 |                     ],
 71 |                 ],
 72 |             },
 73 |         ],
 74 |         response_model=sample.response_model,
 75 |         temperature=0,
 76 |         max_retries=0,
 77 |     )
 78 | 
 79 | 
 80 | PROVIDER_MODELS = [
 81 |     ("openai", "gpt-4o-mini-2024-07-18"),
 82 |     # ("openai", "gpt-4o-2024-08-06"),
 83 |     # ("openai", "gpt-4o-2024-11-20"),
 84 |     # ("openai", "o1-2024-12-17"),
 85 |     # ("openai", "o1-mini-2024-09-12"),
 86 |     # ("openai", "o3-mini-2025-01-31"),
 87 |     # ("gemini", "gemini-2.0-flash-exp"),
 88 |     # ("fireworks", "accounts/fireworks/models/llama-v3p2-11b-vision-instruct"),
 89 |     # ("ollama", "llama3.2-vision:11b"),
 90 |     # ("ollama", "bsahane/Qwen2.5-VL-7B-Instruct:Q4_K_M_benxh"),
 91 | ]
 92 | 
 93 | 
 94 | def test_instructor_hub_sample(provider_arg: str, model_arg: str, domain_arg: str):
 95 |     from rich import print
 96 | 
 97 |     provider, model, domain = provider_arg, model_arg, domain_arg
 98 | 
 99 |     # Get the client (based on provider)
100 |     try:
101 |         instructor_client = get_instructor_client(provider)
102 |     except Exception as e:
103 |         pytest.skip(f"Error getting instructor client: {e}")
104 | 
105 |     logger.debug(f"Testing provider={provider}, model={model}, domain={domain}")
106 |     sample = VLMRUN_HUB_DATASET[domain]
107 |     logger.debug(f"Testing domain={sample.domain}, sample={sample}")
108 |     logger.debug(f"sample.images={sample.images}")
109 |     response = process_sample(instructor_client, sample, model=model)
110 |     print(response.model_dump_json(indent=2))
111 |     assert response is not None
112 | 
113 | 
114 | @pytest.mark.benchmark
115 | @pytest.mark.parametrize("provider_model", PROVIDER_MODELS)
116 | def test_instructor_hub_dataset(provider_model: tuple[str, str]):
117 |     provider, model = provider_model
118 | 
119 |     # Get the client (based on provider)
120 |     try:
121 |         instructor_client = get_instructor_client(provider)
122 |     except Exception as e:
123 |         pytest.skip(f"Error getting instructor client: {e}")
124 | 
125 |     # Process all samples
126 |     results = []
127 |     for sample in VLMRUN_HUB_DATASET.values():
128 |         logger.debug(f"Testing domain={sample.domain}, sample={sample}")
129 |         logger.debug(f"sample.images={sample.images}")
130 | 
131 |         # Try to process the sample
132 |         try:
133 |             response = process_sample(instructor_client, sample, model=model)
134 |         except Exception as e:
135 |             response = None
136 |             logger.error(f"Error processing sample {sample.domain}: {e}")
137 | 
138 |         results.append(
139 |             BenchmarkResult(
140 |                 domain=sample.domain,
141 |                 sample=sample.data,
142 |                 response_model=sample.response_model.__name__,
143 |                 response_json=response.model_dump_json(indent=2, exclude_none=False) if response else None,
144 |             )
145 |         )
146 |         if response:
147 |             logger.debug(response.model_dump_json(indent=2))
148 | 
149 |     create_benchmark(results, model, suffix="instructor")
150 | 


--------------------------------------------------------------------------------
/vlmrun/hub/utils.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | import json
  3 | import sys
  4 | from functools import lru_cache
  5 | from pathlib import Path
  6 | from tempfile import TemporaryDirectory
  7 | from typing import Any, Dict, List, Tuple, Type, Union, get_args, get_origin
  8 | 
  9 | from datamodel_code_generator import DataModelType, InputFileType, generate
 10 | from pydantic import BaseModel, create_model
 11 | from typing_extensions import TypeAlias
 12 | 
 13 | ResponseFormat: TypeAlias = Type[BaseModel]
 14 | AnnotationType: TypeAlias = Union[Type, Any]
 15 | 
 16 | # Regex patterns that use look-around constructs unsupported by pydantic-core's Rust regex engine
 17 | UNSUPPORTED_LOOKAROUND_TOKENS = ("(?=", "(?!", "(?<=", "(?<!")
 18 | 
 19 | 
 20 | def _strip_unsupported_patterns(node: Any) -> Any:
 21 |     """Recursively strip regex patterns with unsupported look-around constructs from JSON schema."""
 22 |     if isinstance(node, dict):
 23 |         pattern = node.get("pattern")
 24 |         if isinstance(pattern, str) and any(tok in pattern for tok in UNSUPPORTED_LOOKAROUND_TOKENS):
 25 |             node = dict(node)  # copy so we don't mutate callers' dicts
 26 |             node.pop("pattern", None)
 27 |         for key, value in list(node.items()):
 28 |             node[key] = _strip_unsupported_patterns(value)
 29 |         return node
 30 |     elif isinstance(node, list):
 31 |         return [_strip_unsupported_patterns(item) for item in node]
 32 |     return node
 33 | 
 34 | 
 35 | def sanitize_json_schema_for_pydantic_core(schema: Dict[str, Any]) -> Dict[str, Any]:
 36 |     """Sanitize JSON schema by removing regex patterns unsupported by pydantic-core.
 37 | 
 38 |     pydantic-core uses a Rust regex engine that doesn't support look-around constructs
 39 |     (look-ahead and look-behind). This function removes such patterns to allow model
 40 |     generation to succeed.
 41 |     """
 42 |     return _strip_unsupported_patterns(schema)
 43 | 
 44 | 
 45 | def patch_response_format(response_format: ResponseFormat) -> ResponseFormat:
 46 |     """Patch the OpenAI response format to handle Pydantic models, including nested models.
 47 | 
 48 |     The following fields are not supported by OpenAI:
 49 |     - date
 50 |     - datetime
 51 |     - time
 52 |     - timedelta
 53 | 
 54 |     This function patches the response format to handle these fields. We convert them to strings and
 55 |     then convert them back to the original type.
 56 |     """
 57 |     from datetime import date, datetime, time, timedelta
 58 | 
 59 |     def patch_pydantic_field_annotation(annotation: AnnotationType) -> AnnotationType:
 60 |         if annotation in [date, datetime, time, timedelta]:
 61 |             return str
 62 |         elif get_origin(annotation) is Union:
 63 |             return Union[tuple([patch_pydantic_field_annotation(a) for a in get_args(annotation)])]
 64 |         elif get_origin(annotation) is List:
 65 |             return List[patch_pydantic_field_annotation(get_args(annotation)[0])]
 66 |         elif isinstance(annotation, type) and issubclass(annotation, BaseModel):
 67 |             return patch_pydantic_model(annotation)
 68 |         else:
 69 |             return annotation
 70 | 
 71 |     def patch_pydantic_model(model: Type[BaseModel]) -> Type[BaseModel]:
 72 |         # Copy the fields from the base class
 73 |         fields = model.model_fields.copy()
 74 |         new_fields: Dict[str, Tuple[AnnotationType, Any]] = {
 75 |             field_name: (patch_pydantic_field_annotation(field.annotation), field)
 76 |             for field_name, field in fields.items()
 77 |         }
 78 |         # Create a new model with the subset of fields
 79 |         return create_model(f"{model.__name__}_patched", __base__=BaseModel, **new_fields)
 80 | 
 81 |     return patch_pydantic_model(response_format)
 82 | 
 83 | 
 84 | def jsonschema_to_model(schema: Dict) -> Type[BaseModel]:
 85 |     """Generate a Pydantic Model from a json schema.
 86 | 
 87 |     Args:
 88 |     schema: Source json schema to create Pydantic model from
 89 | 
 90 |     Returns:
 91 |     The newly created and loaded Pydantic class
 92 |     """
 93 |     class_name = schema.get("title", "Model")
 94 |     # Sanitize the schema to remove regex patterns unsupported by pydantic-core
 95 |     sanitized_schema = sanitize_json_schema_for_pydantic_core(schema)
 96 |     json_schema = json.dumps(sanitized_schema)
 97 |     model = jsonschemastr_to_model(json_schema, class_name)
 98 |     return model
 99 | 
100 | 
101 | @lru_cache(maxsize=16)
102 | def jsonschemastr_to_model(json_schema: str, class_name: str) -> Type[BaseModel]:
103 |     """Generate a Pydantic Model from a json schema string.
104 | 
105 |     Note (spillai): We use this to cache the generated models to avoid recompiling them.
106 | 
107 |     Args:
108 |     schema: Source json schema to create Pydantic model from
109 | 
110 |     Returns:
111 |     The newly created and loaded Pydantic class
112 |     """
113 |     # Ref: https://github.com/koxudaxi/datamodel-code-generator/issues/278
114 |     with TemporaryDirectory() as tmp_dirname:
115 |         tmp_dir = Path(tmp_dirname)
116 |         tmp_path = Path(tmp_dir / "tempmodel.py")
117 |         generate(
118 |             json_schema,
119 |             input_file_type=InputFileType.JsonSchema,
120 |             class_name=class_name,
121 |             output=tmp_path,
122 |             output_model_type=DataModelType.PydanticV2BaseModel,
123 |         )
124 |         spec = importlib.util.spec_from_file_location("models", str(tmp_path))
125 |         if spec and spec.loader:
126 |             module = importlib.util.module_from_spec(spec)
127 |             sys.modules[spec.name] = module
128 |             spec.loader.exec_module(module)
129 |             return getattr(module, class_name)
130 |         raise ImportError("Failed to import generated model")  # pragma: no cover
131 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/finance/balance_sheet.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from datetime import date
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class HeaderInformation(BaseModel):
 7 |     company_name: Optional[str] = Field(None, description="Name of the company")
 8 |     report_title: Optional[str] = Field(None, description="Title of the report (e.g., 'Consolidated Balance Sheet')")
 9 |     reporting_date: Optional[date] = Field(None, description="Date of the balance sheet")
10 |     reporting_period: Optional[str] = Field(None, description="Period covered by the report (e.g., 'December 31, 2023')")
11 |     currency: Optional[str] = Field(None, description="Currency used in the report (e.g., 'USD', 'EUR')")
12 |     units: Optional[str] = Field(None, description="Units of measurement (e.g., 'millions', 'thousands')")
13 |     accounting_standard: Optional[str] = Field(None, description="Accounting standard used (e.g., 'IFRS', 'GAAP')")
14 |     is_consolidated: Optional[bool] = Field(None, description="Whether this is a consolidated statement")
15 |     auditor: Optional[str] = Field(None, description="Name of the auditing firm")
16 |     audit_opinion: Optional[str] = Field(None, description="Type of audit opinion (e.g., 'Unqualified', 'Qualified')")
17 | 
18 | 
19 | class CurrentAssets(BaseModel):
20 |     cash_and_equivalents: Optional[float] = Field(None, description="Cash and highly liquid assets convertible to cash within 90 days")
21 |     marketable_securities: Optional[float] = Field(None, description="Short-term investments that can be readily converted to cash")
22 |     accounts_receivable: Optional[float] = Field(None, description="Money owed to the company by customers for goods/services delivered")
23 |     inventories: Optional[float] = Field(None, description="Raw materials, work-in-progress, and finished goods held for sale")
24 |     prepaid_expenses: Optional[float] = Field(None, description="Expenses paid in advance that haven't yet been incurred")
25 |     other_current_assets: Optional[float] = Field(None, description="Any other assets expected to be converted to cash within one year")
26 |     total_current_assets: Optional[float] = Field(None, description="Sum of all current assets")
27 | 
28 | 
29 | class NonCurrentAssets(BaseModel):
30 |     property_plant_equipment: Optional[float] = Field(None, description="Net value of physical assets like buildings, machinery, and equipment")  
31 |     intangible_assets: Optional[float] = Field(None, description="Non-physical assets like patents, trademarks, goodwill, and software")
32 |     long_term_investments: Optional[float] = Field(None, description="Investments intended to be held for more than one year")
33 |     other_non_current_assets: Optional[float] = Field(None, description="Any other assets not expected to be converted to cash within one year")
34 |     total_non_current_assets: Optional[float] = Field(None, description="Sum of all non-current assets")
35 | 
36 | 
37 | class Assets(BaseModel):
38 |     current_assets: Optional[CurrentAssets] = Field(None, description="Assets expected to be converted to cash within one year")
39 |     non_current_assets: Optional[NonCurrentAssets] = Field(None, description="Assets expected to provide economic benefits beyond one year")
40 |     total_assets: Optional[float] = Field(None, description="Sum of all assets (current and non-current)")
41 | 
42 | 
43 | class CurrentLiabilities(BaseModel):
44 |     accounts_payable: Optional[float] = Field(None, description="Money owed to suppliers for goods/services received")
45 |     short_term_debt: Optional[float] = Field(None, description="Debt due within one year, including current portion of long-term debt")            
46 |     accrued_expenses: Optional[float] = Field(None, description="Expenses recognized but not yet paid")
47 |     income_taxes_payable: Optional[float] = Field(None, description="Taxes owed but not yet paid")
48 |     deposits: Optional[float] = Field(None, description="Customer deposits or other funds held temporarily")                   
49 |     other_current_liabilities: Optional[float] = Field(None, description="Any other obligations due within one year")
50 |     total_current_liabilities: Optional[float] = Field(None, description="Sum of all current liabilities")
51 | 
52 | 
53 | class NonCurrentLiabilities(BaseModel):
54 |     long_term_debt: Optional[float] = Field(None, description="Debt obligations due beyond one year")
55 |     lease_liabilities: Optional[float] = Field(None, description="Long-term lease obligations")
56 |     other_non_current_liabilities: Optional[float] = Field(None, description="Any other obligations due beyond one year")
57 |     total_non_current_liabilities: Optional[float] = Field(None, description="Sum of all non-current liabilities")
58 | 
59 | 
60 | class Liabilities(BaseModel):
61 |     current_liabilities: Optional[CurrentLiabilities] = Field(None, description="Obligations due within one year")
62 |     non_current_liabilities: Optional[NonCurrentLiabilities] = Field(None, description="Obligations due beyond one year")
63 |     total_liabilities: Optional[float] = Field(None, description="Sum of all liabilities (current and non-current)")
64 | 
65 | 
66 | class ShareholdersEquity(BaseModel):
67 |     common_stock: Optional[float] = Field(None, description="Par value of issued common shares")
68 |     preferred_stock: Optional[float] = Field(None, description="Par value of issued preferred shares")
69 |     additional_paid_in_capital: Optional[float] = Field(None, description="Amount paid by shareholders above par value")
70 |     retained_earnings: Optional[float] = Field(None, description="Accumulated profits not distributed to shareholders")
71 |     total_equity: Optional[float] = Field(None, description="Sum of all shareholders' equity components")
72 | 
73 | 
74 | class BalanceSheet(BaseModel):
75 |     header: Optional[HeaderInformation] = Field(None, description="General information about the company and report")
76 |     assets: Optional[Assets] = Field(None, description="Resources owned or controlled by the company")
77 |     liabilities: Optional[Liabilities] = Field(None, description="Obligations and debts owed by the company")
78 |     equity: Optional[ShareholdersEquity] = Field(None, description="Residual interest in the assets after deducting liabilities")
79 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/document/resume.py:
--------------------------------------------------------------------------------
  1 | from datetime import date
  2 | from typing import Dict, List, Optional
  3 | 
  4 | from pydantic import BaseModel, Field, HttpUrl
  5 | 
  6 | 
  7 | class ContactInfo(BaseModel):
  8 |     full_name: str = Field(..., description="Full name of the individual.")
  9 |     email: Optional[str] = Field(None, description="Email address.")
 10 |     phone: Optional[str] = Field(None, description="Phone number.")
 11 |     address: Optional[str] = Field(None, description="Physical address.")
 12 |     linkedin: Optional[HttpUrl] = Field(None, description="LinkedIn profile URL.")
 13 |     github: Optional[HttpUrl] = Field(None, description="GitHub profile URL.")
 14 |     portfolio: Optional[HttpUrl] = Field(None, description="Portfolio website URL.")
 15 |     google_scholar: Optional[HttpUrl] = Field(None, description="Google Scholar profile URL.")
 16 | 
 17 | 
 18 | class Education(BaseModel):
 19 |     institution: Optional[str] = Field(None, description="Name of the educational institution.")
 20 |     degree: Optional[str] = Field(None, description="Degree obtained or pursued.")
 21 |     field_of_study: Optional[str] = Field(None, description="Field of study or major.")
 22 |     graduation_date: Optional[date] = Field(None, description="Date of graduation.")
 23 |     gpa: Optional[float] = Field(None, description="Grade Point Average.")
 24 |     honors: Optional[List[str]] = Field(None, description="Honors or awards received.")
 25 |     relevant_courses: Optional[List[str]] = Field(None, description="Relevant courses taken.")
 26 | 
 27 | 
 28 | class WorkExperience(BaseModel):
 29 |     company: Optional[str] = Field(None, description="Name of the company.")
 30 |     position: Optional[str] = Field(None, description="Job title or position held.")
 31 |     start_date: Optional[date] = Field(None, description="Start date of employment.")
 32 |     end_date: Optional[date] = Field(None, description="End date of employment.")
 33 |     is_current: bool = Field(False, description="Indicates if this is the current job.")
 34 |     responsibilities: List[str] = Field(..., description="Key responsibilities and achievements.")
 35 |     technologies: Optional[List[str]] = Field(None, description="Technologies or tools used.")
 36 | 
 37 | 
 38 | class Skill(BaseModel):
 39 |     name: str = Field(..., description="Name of the skill.")
 40 |     level: Optional[str] = Field(None, description="Proficiency level (e.g., 'Beginner', 'Intermediate', 'Expert')")
 41 |     years_of_experience: Optional[float] = Field(None, description="Years of experience with this skill.")
 42 | 
 43 | 
 44 | class TechnicalSkills(BaseModel):
 45 |     programming_languages: List[Skill] = Field(..., description="Programming languages.")
 46 |     frameworks_libraries: List[Skill] = Field(..., description="Frameworks and libraries.")
 47 |     databases: Optional[List[Skill]] = Field(None, description="Database technologies.")
 48 |     tools: Optional[List[Skill]] = Field(None, description="Development tools and environments.")
 49 |     cloud_platforms: Optional[List[Skill]] = Field(None, description="Cloud platforms and services.")
 50 |     other: Optional[List[Skill]] = Field(None, description="Other technical skills.")
 51 | 
 52 | 
 53 | class Project(BaseModel):
 54 |     name: str = Field(..., description="Name of the project.")
 55 |     description: Optional[str] = Field(None, description="Brief description of the project.")
 56 |     technologies: Optional[List[str]] = Field(None, description="Technologies or tools used.")
 57 |     url: Optional[HttpUrl] = Field(None, description="URL to the project or its repository.")
 58 |     github_url: Optional[HttpUrl] = Field(None, description="GitHub repository URL.")
 59 |     start_date: Optional[date] = Field(None, description="Start date of the project.")
 60 |     end_date: Optional[date] = Field(None, description="End date of the project.")
 61 |     role: Optional[str] = Field(None, description="Role in the project.")
 62 |     key_achievements: Optional[List[str]] = Field(None, description="Key achievements or features implemented.")
 63 | 
 64 | 
 65 | class Certification(BaseModel):
 66 |     name: str = Field(..., description="Name of the certification.")
 67 |     issuer: str = Field(..., description="Organization that issued the certification.")
 68 |     date_obtained: Optional[date] = Field(None, description="Date the certification was obtained.")
 69 |     expiration_date: Optional[date] = Field(None, description="Expiration date of the certification.")
 70 |     credential_id: Optional[str] = Field(None, description="Credential ID or verification URL.")
 71 | 
 72 | 
 73 | class OpenSourceContribution(BaseModel):
 74 |     project_name: str = Field(..., description="Name of the open-source project.")
 75 |     contribution_type: str = Field(
 76 |         ..., description="Type of contribution (e.g., 'Bug fix', 'Feature', 'Documentation')"
 77 |     )
 78 |     description: str = Field(..., description="Brief description of the contribution.")
 79 |     url: Optional[HttpUrl] = Field(None, description="URL to the contribution (e.g., pull request).")
 80 | 
 81 | 
 82 | class Resume(BaseModel):
 83 |     contact_info: ContactInfo = Field(..., description="Contact information of the individual.")
 84 |     summary: Optional[str] = Field(None, description="Professional summary or objective statement.")
 85 |     education: List[Education] = Field(..., description="Educational background.")
 86 |     work_experience: List[WorkExperience] = Field(..., description="Work experience.")
 87 |     technical_skills: TechnicalSkills = Field(..., description="Technical skills.")
 88 |     projects: Optional[List[Project]] = Field(None, description="Notable projects")
 89 |     open_source_contributions: Optional[List[OpenSourceContribution]] = Field(
 90 |         None, description="Open source contributions."
 91 |     )
 92 |     certifications: Optional[List[Certification]] = Field(None, description="Professional certifications.")
 93 |     publications: Optional[List[str]] = Field(None, description="Publications or technical writing.")
 94 |     conferences: Optional[List[str]] = Field(None, description="Conferences attended or presented at.")
 95 |     languages: Optional[List[Skill]] = Field(None, description="Languages known (natural languages).")
 96 |     volunteer_work: Optional[List[str]] = Field(None, description="Volunteer work or community service.")
 97 |     interests: Optional[List[str]] = Field(None, description="Personal interests or hobbies.")
 98 |     references: Optional[str] = Field(None, description="References or note about references.")
 99 |     additional_sections: Optional[Dict[str, List[str]]] = Field(
100 |         None, description="Any additional sections in the resume."
101 |     )
102 | 


--------------------------------------------------------------------------------
/tests/benchmarks/2025-01-10-llama3.2-vision-11b-instructor-results.md:
--------------------------------------------------------------------------------
 1 | ## Benchmark Results (model=llama3.2-vision:11b, date=2025-01-10)
 2 | 
 3 | <table>
 4 | <tr>
 5 | <td style='width: 5%;'> Domain </td>
 6 | <td style='width: 5%;'> Response Model </td>
 7 | <td style='width: 40%;'> Sample </td>
 8 | <td style='width: 50%;'> Response JSON </td>
 9 | </tr>
10 |     <tr><td> <kbd>document.invoice</kbd> </td>
11 | <td> <kbd>Invoice</kbd> </td>
12 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg' width='100%' /> </td>
13 | <td> <pre>{<br>  "invoice_id": null,<br>  "period_start": null,<br>  "period_end": null,<br>  "invoice_issue_date": null,<br>  "invoice_due_date": null,<br>  "order_id": null,<br>  "customer_id": null,<br>  "issuer": null,<br>  "issuer_address": null,<br>  "customer": null,<br>  "customer_email": null,<br>  "customer_phone": null,<br>  "customer_billing_address": null,<br>  "customer_shipping_address": null,<br>  "items": null,<br>  "subtotal": null,<br>  "tax": null,<br>  "total": null,<br>  "currency": null,<br>  "notes": null<br>}</pre> </td>
14 | </tr><tr><td> <kbd>document.receipt</kbd> </td>
15 | <td> <kbd>Receipt</kbd> </td>
16 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.receipt/sample_receipt.webp' width='100%' /> </td>
17 | <td> <pre>❌</pre> </td>
18 | </tr><tr><td> <kbd>document.resume</kbd> </td>
19 | <td> <kbd>Resume</kbd> </td>
20 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.resume/fake-resume.webp' width='100%' /> </td>
21 | <td> <pre>❌</pre> </td>
22 | </tr><tr><td> <kbd>document.us-drivers-license</kbd> </td>
23 | <td> <kbd>USDriversLicense</kbd> </td>
24 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.us-drivers-license/dl3.jpg' width='100%' /> </td>
25 | <td> <pre>❌</pre> </td>
26 | </tr><tr><td> <kbd>document.utility-bill</kbd> </td>
27 | <td> <kbd>UtilityBill</kbd> </td>
28 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.utility-bill/utility-bill-example.webp' width='100%' /> </td>
29 | <td> <pre>{<br>  "account_number": null,<br>  "date_mailed": null,<br>  "service_for": null,<br>  "service_address": {<br>    "street": null,<br>    "city": null,<br>    "state": null,<br>    "zip_code": null<br>  },<br>  "billing_period_start": null,<br>  "billing_period_end": null,<br>  "date_due": null,<br>  "amount_due": 88.14,<br>  "previous_balance": null,<br>  "payment_received": null,<br>  "current_charges": null,<br>  "breakdown_of_charges": [<br>    {<br>      "description": "Electricity",<br>      "amount": 50.0,<br>      "usage": null,<br>      "rate": null<br>    },<br>    {<br>      "description": "Water",<br>      "amount": 20.0,<br>      "usage": null,<br>      "rate": null<br>    }<br>  ],<br>  "payment_options": [<br>    "Check",<br>    "Online Payment"<br>  ],<br>  "contact_information": {<br>    "phone_number": "+1-800-123-4567"<br>  }<br>}</pre> </td>
30 | </tr><tr><td> <kbd>document.w2-form</kbd> </td>
31 | <td> <kbd>W2Form</kbd> </td>
32 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.w2-form/w2-form.jpg' width='100%' /> </td>
33 | <td> <pre>{<br>  "control_number": null,<br>  "ein": null,<br>  "ssn": null,<br>  "employee_name": "Anastasia Hodges",<br>  "employee_address": {<br>    "street": null,<br>    "city": null,<br>    "state": null,<br>    "zip_code": null<br>  },<br>  "employer_name": null,<br>  "employer_address": {<br>    "street": null,<br>    "city": null,<br>    "state": null,<br>    "zip_code": null<br>  },<br>  "wages_tips_other_compensation": 0.0,<br>  "federal_income_tax_withheld": 0.0,<br>  "social_security_wages": 0.0,<br>  "social_security_tax_withheld": 0.0,<br>  "medicare_wages_and_tips": 0.0,<br>  "medicare_tax_withheld": 0.0,<br>  "tax_year": null<br>}</pre> </td>
34 | </tr><tr><td> <kbd>aerospace.remote-sensing</kbd> </td>
35 | <td> <kbd>RemoteSensing</kbd> </td>
36 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/aerospace.remote-sensing/planet_labs_port.jpg' width='100%' /> </td>
37 | <td> <pre>❌</pre> </td>
38 | </tr><tr><td> <kbd>healthcare.medical-insurance-card</kbd> </td>
39 | <td> <kbd>MedicalInsuranceCard</kbd> </td>
40 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/healthcare.medical-insurance-card/blue_cross_example.jpg' width='100%' /> </td>
41 | <td> <pre>{<br>  "provider_service": {<br>    "provider_service_number": null,<br>    "precertification_number": null<br>  },<br>  "member_information": {<br>    "member_name": "John Doe",<br>    "member_id": "1234567890",<br>    "group_number": "ABC123"<br>  },<br>  "pharmacy_plan": {<br>    "rx_bin": null,<br>    "rx_pcn": null,<br>    "rx_grp": null,<br>    "pharmacy_help_desk": null<br>  },<br>  "insurance_provider": {<br>    "provider_name": "Blue Cross Blue Shield",<br>    "network": "PPO"<br>  },<br>  "coverage": {<br>    "office_visit": null,<br>    "specialist_visit": null,<br>    "urgent_care": null,<br>    "emergency_room": null,<br>    "inpatient_hospital": null<br>  }<br>}</pre> </td>
42 | </tr><tr><td> <kbd>retail.ecommerce-product-caption</kbd> </td>
43 | <td> <kbd>RetailEcommerceProductCaption</kbd> </td>
44 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/retail.ecommerce-product-caption/Electronics%20-%20Kindle.webp' width='100%' /> </td>
45 | <td> <pre>❌</pre> </td>
46 | </tr><tr><td> <kbd>media.tv-news</kbd> </td>
47 | <td> <kbd>TVNews</kbd> </td>
48 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.tv-news/bbc_news_ukraine_screenshot.jpg' width='100%' /> </td>
49 | <td> <pre>{<br>  "description": "Biden criticises Netanyahu in an interview",<br>  "chyron": null,<br>  "network": "CNN",<br>  "reporters": null<br>}</pre> </td>
50 | </tr><tr><td> <kbd>media.nfl-game-state</kbd> </td>
51 | <td> <kbd>NFLGameState</kbd> </td>
52 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.nfl-game-state/packers_cardinals_screenshot.png' width='100%' /> </td>
53 | <td> <pre>{<br>  "description": null,<br>  "teams": [<br>    {<br>      "name": "GB",<br>      "score": null<br>    },<br>    {<br>      "name": "AZ",<br>      "score": 7<br>    }<br>  ],<br>  "status": "in_progress",<br>  "quarter": 2,<br>  "clock_time": "14:56",<br>  "possession_team": "GB",<br>  "down": "1st",<br>  "distance": null,<br>  "yard_line": null,<br>  "network": null,<br>  "is_shown": true<br>}</pre> </td>
54 | </tr><tr><td> <kbd>media.nba-game-state</kbd> </td>
55 | <td> <kbd>NBAGameState</kbd> </td>
56 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.nba-game-state/heats_spurs.png' width='100%' /> </td>
57 | <td> <pre>{<br>  "description": null,<br>  "teams": [<br>    {<br>      "name": "MIA",<br>      "score": 7<br>    },<br>    {<br>      "name": "SA",<br>      "score": 6<br>    }<br>  ],<br>  "status": "in_progress",<br>  "quarter": 1,<br>  "clock_time": "9:09",<br>  "shot_clock": null,<br>  "network": "ESPN",<br>  "is_shown": true<br>}</pre> </td>
58 | </tr>
59 | </table>
60 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/document/form_work_order.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from decimal import Decimal
  3 | from enum import Enum
  4 | from typing import Optional, List, Union
  5 | 
  6 | from pydantic import BaseModel, Field
  7 | 
  8 | 
  9 | class CompanyInfo(BaseModel):
 10 |     """Company information header"""
 11 |     name: Optional[str] = Field(None, description="Name of the company")
 12 |     address: Optional[str] = Field(None, description="Street address of the company")
 13 |     suite: Optional[str] = Field(None, description="Suite or unit number")
 14 |     city: Optional[str] = Field(None, description="City name")
 15 |     state: Optional[str] = Field(None, description="State abbreviation")
 16 |     zip_code: Optional[str] = Field(None, description="ZIP/Postal code")
 17 |     phone: Optional[str] = Field(None, description="Company phone number")
 18 |     email: Optional[str] = Field(None, description="Company email address")
 19 | 
 20 | 
 21 | class ServiceType(str, Enum):
 22 |     """Types of services offered"""
 23 |     # Automotive Services
 24 |     LUBRICATE = "Lubricate"
 25 |     CHANGE_OIL = "Change Oil"
 26 |     TRANSMISSION_SERVICE = "Transmission Service"
 27 |     BATTERY_SERVICE = "Battery Replacement / Check"
 28 |     DIFFERENTIAL_SERVICE = "Differential Service"
 29 |     FLAT_TIRE_REPAIR = "Flat Tire Repair"
 30 |     ADAS_CALIBRATION = "ADAS Calibration"
 31 |     WIPER_REPLACEMENT = "Wiper Replacement"
 32 |     CAR_WASH = "Car Wash"
 33 |     POLISH_DETAILING = "Polish / Detailing"
 34 |     TIRE_SERVICE = "Tire Rotation and Alignment"
 35 |     BRAKE_SERVICE = "Brake Inspection / Replacement"
 36 |     COOLANT_FLUSH = "Coolant System Flush"
 37 |     AIR_FILTER = "Air Filter Replacement"
 38 |     SPARK_PLUG = "Spark Plug Replacement"
 39 |     EXHAUST_CHECK = "Exhaust System Check"
 40 |     ELECTRONICS = "Electronics"
 41 |     SUSPENSION_CHECK = "Suspension and Steering Check"
 42 |     BATTERY_CHECK = "Electric/Hybrid Vehicle Battery Svc"
 43 |     
 44 |     # Landscaping Services
 45 |     AERATION = "Aeration"
 46 |     BORDERS_EDGING = "Borders / Edging"
 47 |     FERTILIZATION = "Fertilization"
 48 |     GARDEN_DESIGN = "Garden Design"
 49 |     GARDEN_INSTALLATION = "Garden Installation"
 50 |     HEDGE_TRIMMING = "Hedge Trimming"
 51 |     IRRIGATION_INSTALL = "Irrigation Installation"
 52 |     IRRIGATION_MAINTENANCE = "Irrigation Maintenance"
 53 |     LANDSCAPE_LIGHTING = "Landscape Lighting Installation"
 54 |     MOWING = "Mowing"
 55 |     TRIM_MOWING = "Trim Mowing"
 56 |     MULCHING = "Mulching"
 57 |     PATIO_DECK_CONSTRUCTION = "Patio / Deck Construction"
 58 |     PATIO_DECK_MAINTENANCE = "Patio / Deck Maintenance"
 59 |     PAVING = "Paving / Walkway Installation"
 60 |     RETAINING_WALL = "Retaining Wall Construction"
 61 |     SEASONAL_CLEANUP = "Seasonal Clean-up"
 62 |     SOD_INSTALLATION = "Sod Installation"
 63 |     TREE_PRUNING = "Tree Pruning"
 64 |     TREE_TRIMMING = "Tree Trimming"
 65 |     WEED_CONTROL = "Weed Control"
 66 |     WINTERIZING = "Winterizing"
 67 | 
 68 |     # Electrical Services
 69 |     ELECTRICAL_REPAIR = "Electrical Repair"
 70 |     CIRCUIT_BREAKER = "Circuit Breaker Service"
 71 |     FAN_INSTALLATION = "Fan Installation"
 72 |     OUTLET_REPAIR = "Outlet Repair"
 73 |     WIRING_INSTALLATION = "Wiring Installation"
 74 | 
 75 | 
 76 | class PriorityLevel(str, Enum):
 77 |     """Priority levels for work orders"""
 78 |     LOW = "Low"
 79 |     MEDIUM = "Medium"
 80 |     HIGH = "High"
 81 | 
 82 | 
 83 | class VehicleInfo(BaseModel):
 84 |     """Vehicle information for automotive work orders"""
 85 |     vin: Optional[str] = Field(None, description="Vehicle Identification Number")
 86 |     make_model: Optional[str] = Field(None, description="Make and model of the vehicle")
 87 |     year: Optional[int] = Field(None, description="Year of the vehicle")
 88 |     odometer: Optional[float] = Field(None, description="Current odometer reading")
 89 |     license_number: Optional[str] = Field(None, description="License plate number")
 90 |     state: Optional[str] = Field(None, description="State of registration")
 91 |     motor_number: Optional[str] = Field(None, description="Motor/Engine number")
 92 | 
 93 | 
 94 | class LineItem(BaseModel):
 95 |     """Line item for materials, parts, or labor"""
 96 |     description: Optional[str] = Field(None, description="Description of the item or service")
 97 |     quantity: Optional[Decimal] = Field(None, description="Quantity of the item")
 98 |     price_per_unit: Optional[Decimal] = Field(None, description="Price per unit")
 99 |     amount: Optional[Decimal] = Field(None, description="Total amount (quantity * price_per_unit)")
100 |     part_number: Optional[str] = Field(None, description="Part number if applicable")
101 | 
102 | 
103 | class WorkOrder(BaseModel):
104 |     """Unified schema for all types of work orders"""
105 |     
106 |     # Company Information
107 |     company_info: Optional[CompanyInfo] = Field(None, description="Company information")
108 |     
109 |     # Basic Information
110 |     order_number: Optional[str] = Field(None, description="Work order number/identifier")
111 |     order_type: Optional[str] = Field(None, description="Type of work order (Automotive/Landscaping/Electrical)")
112 |     
113 |     # Client Information
114 |     client_name: Optional[str] = Field(None, description="Name of the client")
115 |     client_phone: Optional[str] = Field(None, description="Client's phone number")
116 |     client_email: Optional[str] = Field(None, description="Client's email address")
117 |     service_location: Optional[str] = Field(None, description="Address where service will be performed")
118 |     
119 |     # Timing Information
120 |     order_date: Optional[datetime] = Field(None, description="Date and time the order was created")
121 |     start_date: Optional[datetime] = Field(None, description="Expected start date")
122 |     end_date: Optional[datetime] = Field(None, description="Expected end date")
123 |     date_completed: Optional[datetime] = Field(None, description="Actual completion date")
124 |     
125 |     # Vehicle Information (for automotive work orders)
126 |     vehicle_info: Optional[VehicleInfo] = Field(None, description="Vehicle information for automotive work orders")
127 |     
128 |     # Service Details
129 |     services_requested: Optional[List[ServiceType]] = Field(default_factory=list, description="List of services requested")
130 |     job_description: Optional[str] = Field(None, description="Detailed description of work to be performed")
131 |     priority_level: Optional[PriorityLevel] = Field(None, description="Priority level of the work order")
132 |     
133 |     # Cost Information
134 |     materials: Optional[List[LineItem]] = Field(default_factory=list, description="List of materials used")
135 |     labor_items: Optional[List[LineItem]] = Field(default_factory=list, description="List of labor charges")
136 |     materials_total: Optional[Decimal] = Field(None, description="Total cost of materials")
137 |     labor_total: Optional[Decimal] = Field(None, description="Total cost of labor")
138 |     subtotal: Optional[Decimal] = Field(None, description="Subtotal before tax")
139 |     tax_rate: Optional[Decimal] = Field(None, description="Tax rate as a percentage")
140 |     tax_amount: Optional[Decimal] = Field(None, description="Calculated tax amount")
141 |     total_amount: Optional[Decimal] = Field(None, description="Final total amount")
142 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/contrib/healthcare/pathology_report.py:
--------------------------------------------------------------------------------
  1 | from datetime import date, datetime
  2 | from typing import List, Optional
  3 | 
  4 | from pydantic import BaseModel, Field
  5 | 
  6 | 
  7 | class Addendum(BaseModel):
  8 |     addendum_date: Optional[datetime] = Field(None, description="Date and time the addendum/amendment was created")
  9 |     reason: Optional[str] = Field(None, description="Reason for the addendum (e.g., corrected data, additional info)")
 10 |     text: Optional[str] = Field(None, description="Content or changes described in the addendum")
 11 | 
 12 | 
 13 | class PatientInformation(BaseModel):
 14 |     patient_name: Optional[str] = Field(None, description="Full name of the patient")
 15 |     patient_id: Optional[str] = Field(None, description="Medical record number or patient identifier")
 16 |     date_of_birth: Optional[date] = Field(None, description="Patient's date of birth")
 17 |     gender: Optional[str] = Field(None, description="Patient's gender")
 18 |     clinical_history: Optional[str] = Field(None, description="Relevant clinical history")
 19 | 
 20 | 
 21 | class FacilityInformation(BaseModel):
 22 |     facility_name: Optional[str] = Field(None, description="Name of the medical facility")
 23 |     facility_address: Optional[str] = Field(None, description="Facility address or contact information")
 24 | 
 25 | 
 26 | class GrossExamination(BaseModel):
 27 |     description: Optional[str] = Field(None, description="Narrative description of the specimen at gross examination")
 28 |     specimen_measurements: Optional[List[str]] = Field(
 29 |         None, description="List of measurement strings (e.g. '3 x 2 x 1 cm')"
 30 |     )
 31 |     specimen_weight: Optional[float] = Field(None, description="Weight in grams if relevant")
 32 |     gross_margins: Optional[str] = Field(None, description="Gross margin description (if applicable)")
 33 |     cassette_details: Optional[List[str]] = Field(
 34 |         None, description="Details of cassettes used, how tissue was sectioned"
 35 |     )
 36 |     gross_findings: Optional[List[str]] = Field(None, description="Key gross findings (e.g., necrosis, hemorrhage)")
 37 | 
 38 | 
 39 | class MicroscopicExamination(BaseModel):
 40 |     description: Optional[str] = Field(None, description="Detailed microscopic or histological findings")
 41 |     cellular_features: Optional[str] = Field(None, description="Cellular characteristics (atypia, pleomorphism, etc.)")
 42 |     architectural_features: Optional[str] = Field(None, description="Architecture (glandular, papillary, etc.)")
 43 |     microscopic_margins: Optional[str] = Field(None, description="Microscopic margin status or description")
 44 |     lymphovascular_invasion: Optional[bool] = Field(
 45 |         None, description="Presence/absence of lymphovascular invasion (LVI)"
 46 |     )
 47 |     perineural_invasion: Optional[bool] = Field(None, description="Presence/absence of perineural invasion (PNI)")
 48 |     additional_findings: Optional[List[str]] = Field(None, description="Other significant findings")
 49 | 
 50 | 
 51 | class SpecimenDiagnosis(BaseModel):
 52 |     diagnosis_text: Optional[str] = Field(None, description="Summary of the pathological diagnosis or interpretation")
 53 |     additional_notes: Optional[str] = Field(None, description="Any extra notes or classification details")
 54 | 
 55 | 
 56 | class ImmunohistochemistryResults(BaseModel):
 57 |     marker_name: Optional[str] = Field(None, description="Name of IHC marker (e.g., ER, PR, CD20)")
 58 |     result: Optional[str] = Field(None, description="Interpretation (Positive, Negative, etc.)")
 59 |     percentage: Optional[float] = Field(
 60 |         None, description="Approx. percentage of positive cells if known (value between 0.0 and 1.0)"
 61 |     )
 62 |     intensity: Optional[str] = Field(None, description="Intensity of staining (e.g., 1+, 2+, 3+)")
 63 |     pattern: Optional[str] = Field(None, description="Staining pattern (nuclear, cytoplasmic, membranous)")
 64 |     control_validity: Optional[bool] = Field(None, description="Whether control stain was valid")
 65 | 
 66 | 
 67 | class MolecularStudies(BaseModel):
 68 |     test_name: Optional[str] = Field(None, description="Name of molecular test (e.g., EGFR, KRAS, BRAF)")
 69 |     result: Optional[str] = Field(None, description="Result or interpretation (e.g., Mutated, Wild-type, Negative)")
 70 |     methodology: Optional[str] = Field(None, description="Method used (PCR, NGS, FISH, etc.)")
 71 |     interpretation: Optional[str] = Field(None, description="Clinical or pathological significance if known")
 72 | 
 73 | 
 74 | class SpecimenInformation(BaseModel):
 75 |     specimen_id: Optional[str] = Field(
 76 |         None, description="Unique identifier or label for the specimen (e.g., 'Specimen A')"
 77 |     )
 78 |     specimen_source: Optional[str] = Field(
 79 |         None, description="Combined anatomic site/type (e.g., 'Biopsy of right lung')"
 80 |     )
 81 |     collection_date: Optional[datetime] = Field(None, description="Date/time of specimen collection")
 82 |     received_date: Optional[datetime] = Field(None, description="Date/time specimen was received in lab")
 83 |     preservation: Optional[str] = Field(None, description="Preservation method if relevant (e.g., Formalin)")
 84 | 
 85 |     gross_examination: Optional[List[GrossExamination]] = Field(None, description="List of gross exam details")
 86 |     microscopic_examination: Optional[List[MicroscopicExamination]] = Field(
 87 |         None, description="List of microscopic exam details"
 88 |     )
 89 | 
 90 |     immunohistochemistry: Optional[List[ImmunohistochemistryResults]] = Field(
 91 |         None, description="Any IHC results for this specimen"
 92 |     )
 93 |     molecular_studies: Optional[List[MolecularStudies]] = Field(
 94 |         None, description="Any molecular tests on this specimen"
 95 |     )
 96 | 
 97 |     diagnosis: Optional[SpecimenDiagnosis] = Field(
 98 |         None, description="Diagnosis/interpretation specific to this specimen"
 99 |     )
100 | 
101 | 
102 | class PathologyReport(BaseModel):
103 |     accession_number: Optional[str] = Field(None, description="Unique report identifier (accession)")
104 |     report_type: Optional[str] = Field(
105 |         None, description="Type of laboratory report (e.g., 'Surgical Pathology', 'Cytology')"
106 |     )
107 |     report_date: Optional[datetime] = Field(None, description="Date/time report was generated or finalized")
108 |     report_status: Optional[str] = Field(None, description="Status of the report")
109 | 
110 |     patient: Optional[PatientInformation] = Field(None, description="Patient demographics (all optional)")
111 |     facility: Optional[FacilityInformation] = Field(None, description="Facility/lab info, simplified")
112 | 
113 |     specimens: Optional[List[SpecimenInformation]] = Field(
114 |         None, description="List of all specimens examined in this report"
115 |     )
116 | 
117 |     integrated_diagnosis: Optional[str] = Field(
118 |         None, description="An overall or integrated interpretation across specimens"
119 |     )
120 |     clinical_notes: Optional[str] = Field(None, description="High-level clinical notes, if relevant")
121 |     comments: Optional[str] = Field(None, description="General comments, disclaimers, or remarks")
122 | 
123 |     pathologist_name: Optional[str] = Field(None, description="Name of the reporting laboratory professional")
124 | 
125 |     addenda: Optional[List[Addendum]] = Field(None, description="Any amendments or addenda appended to this report")
126 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/document/utility_bill.py:
--------------------------------------------------------------------------------
  1 | from datetime import date
  2 | from typing import Dict, List, Optional
  3 | 
  4 | from pydantic import BaseModel, Field
  5 | 
  6 | 
  7 | class ChargeDetail(BaseModel):
  8 |     description: Optional[str] = Field(None, description="Description of the specific charge or service.")
  9 |     amount: Optional[float] = Field(None, description="Amount charged for the specific service or item.")
 10 |     currency: Optional[str] = Field(None, description="3-letter currency code for the amount, if different from the main bill currency.")
 11 |     usage: Optional[str] = Field(None, description="Usage details, such as '31 kWh' or '10 CCF'.")
 12 |     rate: Optional[float] = Field(None, description="Rate per unit for the service or item.")
 13 |     period_start: Optional[date] = Field(None, description="Start date for this specific charge, if applicable.")
 14 |     period_end: Optional[date] = Field(None, description="End date for this specific charge, if applicable.")
 15 | 
 16 | 
 17 | class Address(BaseModel):
 18 |     street: Optional[str] = Field(None, description="Street address, including apartment or suite number.")
 19 |     city: Optional[str] = Field(None, description="City of the address.")
 20 |     state: Optional[str] = Field(None, description="State, province, or region of the address.")
 21 |     zip_code: Optional[str] = Field(None, description="Postal or ZIP code of the address.")
 22 |     country: Optional[str] = Field(None, description="Country of the address.")
 23 | 
 24 | 
 25 | class ProviderContactInfo(BaseModel):
 26 |     phone_numbers: Optional[List[str]] = Field(None, description="List of contact phone numbers for the provider.")
 27 |     email_addresses: Optional[List[str]] = Field(None, description="List of contact email addresses for the provider.")
 28 |     websites: Optional[List[str]] = Field(
 29 |         None, description="List of relevant websites for the provider (e.g., customer portal, payment page)."
 30 |     )
 31 |     customer_service_hours: Optional[str] = Field(None, description="Operating hours for customer service.")
 32 | 
 33 | 
 34 | class UsageDetail(BaseModel):
 35 |     period_description: str = Field(
 36 |         ..., description="Description of the period (e.g., 'Current Period', 'Last Period', 'Year Ago Period')."
 37 |     )
 38 |     usage_value: float = Field(..., description="The usage value for that period.")
 39 |     # unit is part of the parent UsageSummary
 40 | 
 41 | 
 42 | class UsageSummary(BaseModel):
 43 |     service_type: str = Field(..., description="Type of service (e.g., 'Electric', 'Gas', 'Water').")
 44 |     unit: str = Field(..., description="Unit of measurement for the usage_value (e.g., 'kWh', 'Therms', 'Gallons').")
 45 |     details: List[UsageDetail] = Field(..., description="List of usage details for different periods or categories.")
 46 |     meter_number: Optional[str] = Field(None, description="Identifier for the meter associated with this usage.")
 47 |     reading_type: Optional[str] = Field(None, description="Type of reading (e.g., 'Actual', 'Estimated').")
 48 | 
 49 | 
 50 | class UtilityBill(BaseModel):
 51 |     # Provider Information
 52 |     provider_name: Optional[str] = Field(None, description="Name of the utility provider .")
 53 |     provider_address: Optional[Address] = Field(None, description="Address of the utility provider.")
 54 |     provider_logo_description: Optional[str] = Field(None, description="Textual description of the provider's logo if present.")
 55 |     contact_information: Optional[ProviderContactInfo] = Field(
 56 |         None, description="Contact details for the utility provider."
 57 |     )
 58 | 
 59 |     # Account and Statement Information
 60 |     account_number: Optional[str] = Field(None, description="The unique identifier for the utility account.")
 61 |     statement_date: Optional[date] = Field(None, description="The date the bill or statement was issued (previously date_mailed).")
 62 |     statement_title: Optional[str] = Field(None, description="Title of the statement (e.g., 'ENERGY STATEMENT', 'Water Bill').")
 63 |     invoice_number: Optional[str] = Field(None, description="Invoice or bill number, if different from account number.") # Added for completeness
 64 | 
 65 |     # Customer Information
 66 |     service_for: Optional[str] = Field(None, description="Name of the entity or person the service is billed to.")
 67 |     service_address: Optional[Address] = Field(None, description="The address where the utility services are provided.")
 68 |     billing_address: Optional[Address] = Field(None, description="The mailing address for the bill, if different from service address.") # Added for completeness
 69 | 
 70 |     # Billing Period and Due Dates
 71 |     billing_period_start: Optional[date] = Field(None, description="The start date of the billing period covered by this bill.")
 72 |     billing_period_end: Optional[date] = Field(None, description="The end date of the billing period covered by this bill.")
 73 |     date_due: Optional[date] = Field(None, description="The due date for bill payment.")
 74 | 
 75 |     # Financial Summary
 76 |     currency: Optional[str] = Field(None, description="3-letter currency code for amounts on the bill (e.g., USD, CAD, EUR).")
 77 |     amount_due: Optional[float] = Field(None, description="The total amount payable by the due date.")
 78 |     previous_balance: Optional[float] = Field(None, description="The balance carried over from the previous billing cycle.")
 79 |     previous_unpaid_balance: Optional[float] = Field(
 80 |         None, description="Balance remaining from previous periods after payments were applied."
 81 |     )
 82 |     payment_received: Optional[float] = Field(None, description="Total payments received and applied since the last bill.")
 83 |     adjustments_credits: Optional[float] = Field(None, description="Total adjustments or credits applied during this billing period.") # Added for completeness
 84 |     current_charges: Optional[float] = Field(None, description="Total new charges for the current billing cycle.")
 85 |     
 86 |     # Charges Breakdown
 87 |     breakdown_of_charges: Optional[List[ChargeDetail]] = Field(
 88 |         None, description="Itemized list of charges, services, taxes, and fees with descriptions and amounts."
 89 |     )
 90 | 
 91 |     # Usage Information
 92 |     usage_summaries: Optional[List[UsageSummary]] = Field(
 93 |         None, description="Summaries of utility usage, possibly comparing different periods or types of service."
 94 |     )
 95 | 
 96 |     # Payment Information
 97 |     payment_options: Optional[List[str]] = Field(
 98 |         None, description="Accepted methods for bill payment (e.g., 'Online at provider.com/pay', 'Mail-in check')."
 99 |     )
100 |     payment_instructions: Optional[str] = Field(
101 |         None, description="Specific instructions for making a payment (e.g., 'Return this portion with your payment')."
102 |     )
103 |     payment_remittance_address: Optional[Address] = Field(None, description="Address to mail payments to.")
104 | 
105 |     # Miscellaneous
106 |     important_messages: Optional[List[str]] = Field(
107 |         None, description="List of important messages, announcements, or regulatory notices on the bill."
108 |     )
109 |     page_information: Optional[str] = Field(
110 |         None, description="Page numbering or other page-specific information (e.g., 'Page 1 of 2')."
111 |     )
112 |     notes: Optional[str] = Field(None, description="General notes or miscellaneous information on the bill.") # Added for completeness
113 | 


--------------------------------------------------------------------------------
/docs/SCHEMA-GUIDELINES.md:
--------------------------------------------------------------------------------
  1 | # Schema Guidelines
  2 | 
  3 | Thank you for contributing to the VLM Run Hub! To maintain consistency and adhere to industry best practices, please follow these guidelines when creating a new schema.
  4 | 
  5 | 
  6 | ## ✏️ Guidelines for Writing a Schema
  7 | 
  8 | 1. **Use Pydantic’s BaseModel**: All schemas must inherit from Pydantic’s `BaseModel`.
  9 |    ```python
 10 |    from pydantic import BaseModel
 11 | 
 12 |    class ExampleSchema(BaseModel):
 13 |       ...
 14 |    ```
 15 | 
 16 | 2. **Strongly-Typed Fields**: Define each field with precise, strongly-typed annotations (e.g., `str`, `int`, `float`, `list`, `dict`).
 17 | 
 18 | 3. **Optional Fields**: Use `Optional[...]` to define optional fields. This is critical as some fields may not be present in the document, and we want to make sure that Pydantic data validation does not fail when the JSON returned does not contain the relevant key.
 19 | 
 20 |    Example:
 21 |    ```python
 22 |    class CustomerInvoice(BaseModel):
 23 |       invoice_id: str = Field(..., description="The invoice number, typically represented as a string of alphanumeric characters.")
 24 |       ...
 25 |       invoice_email: Optional[str] = Field(None, description="The email address of the customer, typically represented as a string of alphanumeric characters.")
 26 |    ```
 27 |    In the example above, the `invoice_email` field is optional as it may or may not be present in the input document. If it is not present in the JSON, the Pydantic model will not fail the schema validation. If the field is present in the JSON, the Pydantic model will validate the field against the schema.
 28 | 
 29 | 4. **Descriptive Field Names**: Use clear, descriptive, and `snake_case` field names, along with a short `description` field that explains the field's purpose. This is critical for the model to interpret the field to be mapped from.
 30 | 
 31 |    Good example:
 32 |    ```python
 33 |    class CustomerInvoice(BaseModel):
 34 |       invoice_number: str = Field(..., description="The invoice number, typically represented as a string of alphanumeric characters.")
 35 |    ```
 36 | 
 37 |    Bad example:
 38 |    ```python
 39 |    class CustomerInvoice(BaseModel):
 40 |       invoice_number: str = Field(..., description="The invoice number.")
 41 |    ```
 42 | 
 43 | 5. **Field Metadata**:
 44 |    - Use the `Field` class to provide:
 45 |      - `default`: If applicable (e.g., `Field(None, ...)`).
 46 |      - `description`: Include a short, clear explanation of the field’s purpose. (e.g., `Field(..., description="The invoice number, typically represented as a string of alphanumeric characters.")`)
 47 |      - Other constraints: For validation (e.g., `max_length`, `regex`).
 48 |      - Validation: Add custom validators where necessary to enforce domain-specific rules.
 49 | 
 50 | 6. **Nested Models**: Use nested Pydantic models for complex structures (e.g., lists of dictionaries).
 51 | 
 52 |    ```python
 53 |    class CustomerInvoice(BaseModel):
 54 |       invoice_number: str = Field(..., description="The invoice number, typically represented as a string of alphanumeric characters.")
 55 |       items: list[Item] = Field(..., description="A list of items in the invoice.")
 56 |    ```
 57 | 
 58 | 7. **Enums**: Use enums or `Literal` for fixed choices.
 59 | 
 60 |    Using `Enum`:
 61 |    ```python
 62 |    class Status(Enum):
 63 |       pending = "pending"
 64 |       paid = "paid"
 65 |       cancelled = "cancelled"
 66 | 
 67 |    class CustomerInvoice(BaseModel):
 68 |       ...
 69 |       status: Status = Field(..., description="The status of the invoice, which can be either 'pending', 'paid', or 'cancelled'.")
 70 |    ```
 71 | 
 72 |    Using `Literal`:
 73 |    ```python
 74 |    class CustomerInvoice(BaseModel):
 75 |       status: Literal["pending", "paid", "cancelled"] = Field(..., description="The status of the invoice, which can be either 'pending', 'paid', or 'cancelled'.")
 76 |    ```
 77 | 
 78 | ### ✅ Schema Review Checklist
 79 | 
 80 | Before submitting your schema:
 81 | 
 82 | - [ ] **Field Types**: Ensure all fields are strongly-typed.
 83 | - [ ] **Field Metadata**: Check that all fields include descriptions and constraints where applicable.
 84 | - [ ] **Optional Fields**: Use `Optional[...]` to define optional fields.
 85 | - [ ] **Validation**: Add custom validators for domain-specific rules.
 86 | - [ ] **Reusability**: Use nested models for complex types and avoid redundancy.
 87 | - [ ] **Tests**: Provide unit tests to validate the schema against valid and invalid data.
 88 | 
 89 | ### 👩‍💻 Adding a New Schema to the Hub
 90 | 
 91 | 1. **Create a new schema file**: Create a new file in the [`schemas/contrib`](../vlmrun/hub/schemas/contrib) directory, under the appropriate industry and use case (e.g., `schemas/contrib/retail/ecommerce_product_caption.py`). Follow the [Schema Guidelines](#✏️-guidelines-for-writing-a-schema) to write the schema.
 92 | 
 93 | 2. **Add sample image, prompt and schema reference in `catalog.yaml`:** Add a sample image for the schema, a prompt that can be used with VLMs to appropriately extract the JSON, and a reference to the schema in the [`contrib/catalog.yaml`](../vlmrun/hub/contrib/catalog.yaml) file. You can also refer to the [Catalog Specification Guidelines](./catalog-spec.yaml) for more information on the catalog format.
 94 | 
 95 |    Example:
 96 |    ```yaml
 97 |    - domain: media.nfl-game-state
 98 |     schema: vlmrun.hub.schemas.contrib.media.nfl_game_state.NFLGameState
 99 |     prompt: "You are a detail-oriented NFL Game Analyst. Extract all the relevant game state information from the video feed or screenshot as accurately as possible."
100 |     description: "NFL game state extraction system that processes game footage or screenshots to extract structured information including teams, scores, game clock, possession, and other relevant game state details."
101 |     sample_data: "https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.nfl-game-state/packers_cardinals_screenshot.png"
102 |     metadata:
103 |       supported_inputs: ["image", "video"]
104 |       tags: ["media", "sports"]
105 |    ```
106 | 
107 | 3. **Test the schema against the sample data**: Run the following test to ensure the schema is working as expected. Let's say you just added the above schema with `domain=media.nfl-game-state`.
108 | 
109 | You can run:
110 | ```bash
111 | pytest -sv tests/test_instructor.py -k test_instructor_hub_sample --domain media.nfl-game-state
112 | ```
113 | 
114 | This will download the sample data from the URL and call [Instructor](https://github.com/jxnl/instructor/) with `gpt-4o-mini` to generate a JSON output against the schema. It will then validate the JSON output against the schema and print the output to the console.
115 | 
116 | You will see the output in the console.
117 |    Example:
118 |    ```bash
119 |    {
120 |   "description": null,
121 |   "teams": [
122 |     {
123 |       "name": "Green Bay Packers",
124 |       "score": 0
125 |     },
126 |     {
127 |       "name": "Arizona Cardinals",
128 |       "score": 7
129 |     }
130 |   ],
131 |   "status": "in_progress",
132 |   "quarter": 2,
133 |   "clock_time": "12:12",
134 |   "possession_team": "Green Bay Packers",
135 |   "down": "2nd",
136 |   "distance": 10,
137 |   "yard_line": -10,
138 |   "network": "NBC",
139 |   "is_shown": true
140 | }
141 | ```
142 | 
143 | > [!NOTE]
144 | > You can optionally change the provider and model to test against different models. For example, to test against `llama3.2-vision:11b` using `ollama`, you can run:
145 | > ```bash
146 | > pytest -sv tests/test_instructor.py -k test_instructor_hub_sample --domain media.nfl-game-state --provider ollama --model llama3.2-vision:11b
147 | > ```
148 | 


--------------------------------------------------------------------------------
/vlmrun/hub/schemas/document/invoice.py:
--------------------------------------------------------------------------------
  1 | from datetime import date
  2 | from typing import List, Optional
  3 | 
  4 | from pydantic import BaseModel, Field
  5 | 
  6 | 
  7 | class Address(BaseModel):
  8 |     street: Optional[str] = Field(None, description="Street address")
  9 |     city: Optional[str] = Field(None, description="City")
 10 |     state: Optional[str] = Field(None, description="State, province, or region")
 11 |     postal_code: Optional[str] = Field(None, description="Postal or ZIP code")
 12 |     country: Optional[str] = Field(None, description="Country")
 13 | 
 14 | 
 15 | class BankDetails(BaseModel):
 16 |     bank_name: Optional[str] = Field(None, description="Name of the bank")
 17 |     account_holder_name: Optional[str] = Field(None, description="Name of the account holder")
 18 |     account_number: Optional[str] = Field(None, description="Bank account number")
 19 |     routing_number: Optional[str] = Field(None, description="Bank routing number (e.g., ABA for US)")
 20 |     bsb_number: Optional[str] = Field(None, description="Branch Sort Code (BSB) or similar local bank code")
 21 |     iban: Optional[str] = Field(None, description="International Bank Account Number (IBAN)")
 22 |     swift_bic: Optional[str] = Field(None, description="SWIFT/BIC code")
 23 | 
 24 | 
 25 | class Item(BaseModel):
 26 |     description: Optional[str] = Field(None, description="Description or name of the item or service")
 27 |     quantity: Optional[float] = Field(None, description="Quantity of the item, which can be a float for hours or partial units")
 28 |     unit_price: Optional[float] = Field(None, description="Unit price or rate of the item")
 29 |     
 30 |     # This is typically quantity * unit_price.
 31 |     total_price: Optional[float] = Field(None, description="Total price for this line item, typically quantity multiplied by unit price, before item-specific adjustments/taxes")
 32 | 
 33 |     currency: Optional[str] = Field(None, description="3-digit currency code for this item, if it differs from the main invoice currency")
 34 | 
 35 |     item_tax_details: Optional[str] = Field(None, description="Tax details or rate specific to this item (e.g., 'VAT 0%[1]', 'Sales Tax Exempt')")
 36 |     item_adjustment_details: Optional[str] = Field(None, description="Adjustment or discount details specific to this item (e.g., '10% off promo', 'Volume discount')")
 37 |     # If item-level tax/adjustment amounts are needed, they can be added:
 38 |     # item_tax_amount: Optional[float] = Field(None, description="Tax amount for this specific item")
 39 |     # item_adjustment_amount: Optional[float] = Field(None, description="Adjustment/discount amount for this specific item")
 40 | 
 41 | 
 42 | class Invoice(BaseModel):
 43 |     # Core Invoice Information
 44 |     invoice_id: Optional[str] = Field(None, description="Unique invoice identifier or number")
 45 |     invoice_issue_date: Optional[date] = Field(None, description="Date when the invoice was issued")
 46 |     invoice_due_date: Optional[date] = Field(None, description="Date by which the invoice payment is due")
 47 |     period_start: Optional[date] = Field(None, description="Start date of the billing period covered by the invoice, if applicable")
 48 |     period_end: Optional[date] = Field(None, description="End date of the billing period covered by the invoice, if applicable")
 49 | 
 50 |     # Related Identifiers
 51 |     order_id: Optional[str] = Field(None, description="Unique order identifier related to this invoice, if applicable")
 52 |     customer_id: Optional[str] = Field(None, description="Unique customer identifier, if applicable")
 53 | 
 54 |     # Issuer Details
 55 |     issuer_name: Optional[str] = Field(None, description="Name of the invoice issuer (company or individual)")
 56 |     issuer_address: Optional[Address] = Field(None, description="Address of the invoice issuer")
 57 |     issuer_email: Optional[str] = Field(None, description="Email address of the invoice issuer")
 58 |     issuer_phone: Optional[str] = Field(None, description="Phone number of the invoice issuer")
 59 |     issuer_vat_id: Optional[str] = Field(None, description="VAT identification number or other tax ID of the issuer")
 60 |     issuer_website: Optional[str] = Field(None, description="Website of the invoice issuer")
 61 | 
 62 |     # Customer/Recipient Details
 63 |     customer_name: Optional[str] = Field(None, description="Name of the invoice recipient (company or individual)")
 64 |     customer_email: Optional[str] = Field(None, description="Email address of the recipient")
 65 |     customer_phone: Optional[str] = Field(None, description="Phone number of the recipient")
 66 |     customer_billing_address: Optional[Address] = Field(None, description="Billing address of the recipient")
 67 |     customer_shipping_address: Optional[Address] = Field(None, description="Shipping address of the recipient, if different from billing")
 68 |     customer_vat_id: Optional[str] = Field(None, description="VAT identification number or other tax ID of the customer")
 69 | 
 70 |     # Invoice Line Items
 71 |     items: Optional[List[Item]] = Field(None, description="List of items or services detailed in the invoice")
 72 | 
 73 |     # Financial Summary
 74 |     currency: Optional[str] = Field(None, description="Primary 3-digit currency code for the invoice amounts (e.g., USD, EUR)")
 75 |     subtotal: Optional[float] = Field(None, description="Total amount of all line items before any discounts, taxes, and shipping")
 76 |     
 77 |     discount_amount: Optional[float] = Field(None, description="Total discount amount applied to the invoice subtotal")
 78 |     discount_percentage: Optional[float] = Field(None, description="Overall discount percentage applied to the invoice")
 79 |     discount_description: Optional[str] = Field(None, description="Description of the discount applied (e.g., 'Early payment discount', 'Volume discount')")
 80 | 
 81 |     shipping_cost: Optional[float] = Field(None, description="Shipping and handling charges")
 82 |     shipping_description: Optional[str] = Field(None, description="Description of shipping charges or method")
 83 | 
 84 |     tax_amount: Optional[float] = Field(None, description="Total tax amount for the invoice")
 85 |     tax_percentage: Optional[float] = Field(None, description="Overall tax rate percentage applied to the taxable amount")
 86 |     tax_description: Optional[str] = Field(None, description="Description of the tax applied, such as type, rate, or jurisdiction")
 87 |     overall_tax_notes: Optional[str] = Field(None, description="Additional notes regarding taxes for the entire invoice (e.g., 'Tax to be paid on reverse charge basis')")
 88 | 
 89 |     total_amount: Optional[float] = Field(None, description="The final total amount of the invoice after all deductions and additions (subtotal - discounts + shipping + taxes)")
 90 |     amount_paid: Optional[float] = Field(None, description="Amount already paid by the customer towards this invoice")
 91 |     balance_due: Optional[float] = Field(None, description="Remaining amount due for payment (total_amount - amount_paid)")
 92 | 
 93 |     # Payment Information
 94 |     payment_terms: Optional[str] = Field(None, description="Payment terms and conditions (e.g., 'Net 30 days', 'Due upon receipt')")
 95 |     payment_instructions: Optional[str] = Field(None, description="Specific instructions for making payment")
 96 |     payment_link: Optional[str] = Field(None, description="A URL for online payment, if available")
 97 |     bank_details: Optional[BankDetails] = Field(None, description="Bank account details for payment transfers")
 98 | 
 99 |     # Miscellaneous
100 |     notes: Optional[str] = Field(None, description="General notes, comments, or miscellaneous information on the invoice")
101 |     terms_and_conditions: Optional[str] = Field(None, description="General terms and conditions related to the invoice or service")
102 |     footer_text: Optional[str] = Field(None, description="Text appearing in the footer of the invoice (e.g., thank you message, company slogan)")
103 |     page_information: Optional[str] = Field(None, description="Page numbering or other page-specific information (e.g., 'Page 1 of 1')")
104 |     logo_url: Optional[str] = Field(None, description="URL of the company logo displayed on the invoice")
105 | 


--------------------------------------------------------------------------------
/tests/benchmarks/2025-01-10-gemini-2.0-flash-exp-instructor-results.md:
--------------------------------------------------------------------------------
 1 | ## Benchmark Results (model=gemini-2.0-flash-exp, date=2025-01-10)
 2 | 
 3 | <table>
 4 | <tr>
 5 | <td style='width: 5%;'> Domain </td>
 6 | <td style='width: 5%;'> Response Model </td>
 7 | <td style='width: 40%;'> Sample </td>
 8 | <td style='width: 50%;'> Response JSON </td>
 9 | </tr>
10 |     <tr><td> <kbd>document.invoice</kbd> </td>
11 | <td> <kbd>Invoice</kbd> </td>
12 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg' width='100%' /> </td>
13 | <td> <pre>❌</pre> </td>
14 | </tr><tr><td> <kbd>document.receipt</kbd> </td>
15 | <td> <kbd>Receipt</kbd> </td>
16 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.receipt/sample_receipt.webp' width='100%' /> </td>
17 | <td> <pre>❌</pre> </td>
18 | </tr><tr><td> <kbd>document.resume</kbd> </td>
19 | <td> <kbd>Resume</kbd> </td>
20 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.resume/fake-resume.webp' width='100%' /> </td>
21 | <td> <pre>❌</pre> </td>
22 | </tr><tr><td> <kbd>document.us-drivers-license</kbd> </td>
23 | <td> <kbd>USDriversLicense</kbd> </td>
24 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.us-drivers-license/dl3.jpg' width='100%' /> </td>
25 | <td> <pre>{<br>  "issuing_state": "MT",<br>  "license_number": "0812319684104",<br>  "full_name": "BRENDA LYNN",<br>  "first_name": "BRENDA",<br>  "middle_name": null,<br>  "last_name": "LYNN",<br>  "address": {<br>    "street": "123 MAIN STREET",<br>    "city": "HELENA",<br>    "state": "MT",<br>    "zip_code": "59601"<br>  },<br>  "date_of_birth": "1968-08-04",<br>  "gender": "F",<br>  "height": "5'-06\"",<br>  "weight": 150.0,<br>  "eye_color": "BRO",<br>  "hair_color": null,<br>  "issue_date": "2015-02-15",<br>  "expiration_date": "2023-08-04",<br>  "license_class": "D",<br>  "donor": null,<br>  "veteran": null<br>}</pre> </td>
26 | </tr><tr><td> <kbd>document.utility-bill</kbd> </td>
27 | <td> <kbd>UtilityBill</kbd> </td>
28 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.utility-bill/utility-bill-example.webp' width='100%' /> </td>
29 | <td> <pre>{<br>  "account_number": "1234567890-1",<br>  "date_mailed": "2019-09-07",<br>  "service_for": "SPARKY JOULE",<br>  "service_address": {<br>    "street": "12345 ENERGY CT",<br>    "city": null,<br>    "state": null,<br>    "zip_code": null<br>  },<br>  "billing_period_start": null,<br>  "billing_period_end": null,<br>  "date_due": "2019-09-28",<br>  "amount_due": 88.14,<br>  "previous_balance": 0.0,<br>  "payment_received": -91.57,<br>  "current_charges": 88.14,<br>  "breakdown_of_charges": [<br>    {<br>      "description": "Current PG&E Electric Delivery Charges",<br>      "amount": 55.66,<br>      "usage": null,<br>      "rate": null<br>    },<br>    {<br>      "description": "Silicon Valley Clean Energy Electric Generation Charges",<br>      "amount": 32.48,<br>      "usage": null,<br>      "rate": null<br>    }<br>  ],<br>  "payment_options": [<br>    "www.pge.com/waystopay"<br>  ],<br>  "contact_information": {<br>    "phone": "1-800-743-5000",<br>    "website": "www.pge.com/MyEnergy"<br>  }<br>}</pre> </td>
30 | </tr><tr><td> <kbd>document.w2-form</kbd> </td>
31 | <td> <kbd>W2Form</kbd> </td>
32 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.w2-form/w2-form.jpg' width='100%' /> </td>
33 | <td> <pre>{<br>  "control_number": "GN1851",<br>  "ein": "63-0065650",<br>  "ssn": "554-03-0876",<br>  "employee_name": "Anastasia Hodges",<br>  "employee_address": {<br>    "street": "200 2nd Street NE",<br>    "city": "Waseca",<br>    "state": "MN",<br>    "zip_code": "56093"<br>  },<br>  "employer_name": "NORTH 312",<br>  "employer_address": {<br>    "street": "151 N Market Street",<br>    "city": "Wooster",<br>    "state": "OH",<br>    "zip_code": "44691"<br>  },<br>  "wages_tips_other_compensation": 23677.7,<br>  "federal_income_tax_withheld": 2841.32,<br>  "social_security_wages": 24410.0,<br>  "social_security_tax_withheld": 1513.42,<br>  "medicare_wages_and_tips": 24410.0,<br>  "medicare_tax_withheld": 353.95,<br>  "tax_year": 2020<br>}</pre> </td>
34 | </tr><tr><td> <kbd>aerospace.remote-sensing</kbd> </td>
35 | <td> <kbd>RemoteSensing</kbd> </td>
36 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/aerospace.remote-sensing/planet_labs_port.jpg' width='100%' /> </td>
37 | <td> <pre>{<br>  "description": "This satellite image shows a large port area with numerous docks, ships, and storage facilities. Adjacent to the port is a dense urban area with a grid-like street pattern. The coastline is visible with a beach area and a breakwater extending into the ocean. The image is clear with no cloud cover.",<br>  "objects": [<br>    "ships",<br>    "docks",<br>    "storage facilities",<br>    "urban area",<br>    "beach",<br>    "breakwater",<br>    "roads",<br>    "railway",<br>    "parking lots",<br>    "marina"<br>  ],<br>  "categories": [<br>    "port",<br>    "residential-area",<br>    "beach",<br>    "commercial-area",<br>    "industrial-area",<br>    "parking-lot",<br>    "other"<br>  ],<br>  "is_visible": true<br>}</pre> </td>
38 | </tr><tr><td> <kbd>healthcare.medical-insurance-card</kbd> </td>
39 | <td> <kbd>MedicalInsuranceCard</kbd> </td>
40 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/healthcare.medical-insurance-card/blue_cross_example.jpg' width='100%' /> </td>
41 | <td> <pre>{<br>  "provider_service": null,<br>  "member_information": {<br>    "member_name": "Member Name",<br>    "member_id": "XY2123456789",<br>    "group_number": "023457"<br>  },<br>  "pharmacy_plan": {<br>    "rx_bin": "987654",<br>    "rx_pcn": null,<br>    "rx_grp": null,<br>    "pharmacy_help_desk": null<br>  },<br>  "insurance_provider": {<br>    "provider_name": "BlueCross BlueShield",<br>    "network": "PPO"<br>  },<br>  "coverage": {<br>    "office_visit": "$15",<br>    "specialist_visit": "$15",<br>    "urgent_care": null,<br>    "emergency_room": "$75",<br>    "inpatient_hospital": null<br>  }<br>}</pre> </td>
42 | </tr><tr><td> <kbd>retail.ecommerce-product-caption</kbd> </td>
43 | <td> <kbd>RetailEcommerceProductCaption</kbd> </td>
44 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/retail.ecommerce-product-caption/Electronics%20-%20Kindle.webp' width='100%' /> </td>
45 | <td> <pre>{<br>  "description": "The product is a black Kindle Paperwhite with a 6.8\" display and adjustable warm light. It is shown being held in a person's hand with a book displayed on the screen.",<br>  "rating": 95,<br>  "name": "Kindle Paperwhite (8 GB)",<br>  "brand": "Amazon",<br>  "category": "Amazon Devices / Kindle",<br>  "price": "$139.99",<br>  "color": "Black"<br>}</pre> </td>
46 | </tr><tr><td> <kbd>media.tv-news</kbd> </td>
47 | <td> <kbd>TVNews</kbd> </td>
48 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.tv-news/bbc_news_ukraine_screenshot.jpg' width='100%' /> </td>
49 | <td> <pre>{<br>  "description": "A female news anchor is reporting from a studio. The background is blurred and shows multiple screens. The chyron at the bottom of the screen displays the main headline and a secondary news item.",<br>  "chyron": "Biden criticises Netanyahu in an interview\n• Alan Bates: Post Office would do 'anything to hide Horizon failures'",<br>  "network": "BBC NEWS",<br>  "reporters": null<br>}</pre> </td>
50 | </tr><tr><td> <kbd>media.nfl-game-state</kbd> </td>
51 | <td> <kbd>NFLGameState</kbd> </td>
52 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.nfl-game-state/packers_cardinals_screenshot.png' width='100%' /> </td>
53 | <td> <pre>{<br>  "description": null,<br>  "teams": [<br>    {<br>      "name": "GB",<br>      "score": 0<br>    },<br>    {<br>      "name": "AZ",<br>      "score": 7<br>    }<br>  ],<br>  "status": "in_progress",<br>  "quarter": 2,<br>  "clock_time": "12:12",<br>  "possession_team": null,<br>  "down": "2nd",<br>  "distance": 10,<br>  "yard_line": null,<br>  "network": "NBC",<br>  "is_shown": true<br>}</pre> </td>
54 | </tr><tr><td> <kbd>media.nba-game-state</kbd> </td>
55 | <td> <kbd>NBAGameState</kbd> </td>
56 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.nba-game-state/heats_spurs.png' width='100%' /> </td>
57 | <td> <pre>{<br>  "description": "The Miami Heat are playing against the San Antonio Spurs in Game 1 of the NBA Finals.",<br>  "teams": [<br>    {<br>      "name": "MIA",<br>      "score": 7<br>    },<br>    {<br>      "name": "SA",<br>      "score": 6<br>    }<br>  ],<br>  "status": "in_progress",<br>  "quarter": 1,<br>  "clock_time": "9:09",<br>  "shot_clock": 11,<br>  "network": "ESPN",<br>  "is_shown": true<br>}</pre> </td>
58 | </tr>
59 | </table>
60 | 


--------------------------------------------------------------------------------
/vlmrun/hub/registry.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import importlib
  3 | import json
  4 | from functools import cached_property
  5 | from pathlib import Path
  6 | from typing import Dict, List, Literal, Optional, Tuple, Type, Union
  7 | 
  8 | from loguru import logger
  9 | from pydantic import BaseModel, Field, model_validator
 10 | from pydantic_yaml import parse_yaml_raw_as
 11 | 
 12 | 
 13 | class Registry:
 14 |     """A singleton registry for schemas.
 15 | 
 16 |     Examples:
 17 |         >>> from vlmrun.hub.registry import registry
 18 |         >>> schema = registry["document.invoice"]
 19 |         >>> registry.list_schemas()
 20 |         ['document.invoice', 'document.receipt', ...]
 21 |     """
 22 | 
 23 |     _instance = None
 24 | 
 25 |     def __new__(cls):
 26 |         if cls._instance is None:
 27 |             cls._instance = super().__new__(cls)
 28 |             cls._instance._schemas = {}
 29 |             cls._instance._initialized = False
 30 |             cls._instance._schema_metadata = {}  # Store metadata when registering schemas
 31 |         return cls._instance
 32 | 
 33 |     @property
 34 |     def schemas(self) -> Dict[str, Type[BaseModel]]:
 35 |         """Lazily load schemas when first accessed."""
 36 |         if not self._initialized:
 37 |             self.load_schemas()
 38 |         return self._schemas
 39 | 
 40 |     def _extract_metadata(self, schema) -> dict:
 41 |         return {
 42 |             "description": schema.description,
 43 |             "supported_inputs": schema.metadata.supported_inputs if schema.metadata else None,
 44 |             "tags": schema.metadata.tags if schema.metadata else None,
 45 |             "sample_data": ([schema.sample_data] if isinstance(schema.sample_data, str) else schema.sample_data),
 46 |         }
 47 | 
 48 |     def _load_catalog(self, path: Path) -> None:
 49 |         catalog = SchemaCatalogYaml.from_yaml(path)
 50 |         for schema in catalog.schemas:
 51 |             metadata = self._extract_metadata(schema)
 52 |             self.register(schema.domain, schema.schema_class, metadata)
 53 |         logger.debug(f"Loaded schemas from {path}")
 54 | 
 55 |     def register(self, name: str, schema: Type[BaseModel], metadata: Optional[dict] = None) -> None:
 56 |         """Register a schema with the registry."""
 57 |         if not issubclass(schema, BaseModel):
 58 |             raise ValueError(f"Schema {name} is not a subclass of BaseModel, type={type(schema)}")
 59 |         self._schemas[name] = schema
 60 |         if metadata:
 61 |             self._schema_metadata[name] = metadata
 62 | 
 63 |     def load_schemas(self, catalog_paths: Optional[Tuple[Union[str, Path]]] = None) -> None:
 64 |         from vlmrun.hub.constants import VLMRUN_HUB_CATALOG_PATH, VLMRUN_HUB_PATH
 65 | 
 66 |         if not self._initialized:
 67 |             try:
 68 |                 # Load default catalog
 69 |                 self._load_catalog(VLMRUN_HUB_CATALOG_PATH)
 70 | 
 71 |                 # Load contrib catalog if exists
 72 |                 contrib_path = VLMRUN_HUB_PATH / "schemas/contrib/catalog.yaml"
 73 |                 if contrib_path.exists():
 74 |                     try:
 75 |                         self._load_catalog(contrib_path)
 76 |                     except Exception as e:
 77 |                         logger.error(f"Failed to load contrib schemas: {e}")
 78 | 
 79 |                 self._initialized = True
 80 |             except Exception as e:
 81 |                 logger.error(f"Failed to load default schemas: {e}")
 82 |                 raise
 83 | 
 84 |         # Load additional catalogs if provided
 85 |         if catalog_paths is not None:
 86 |             for path in catalog_paths:
 87 |                 path = Path(path)
 88 |                 if not path.exists():
 89 |                     raise FileNotFoundError(f"Catalog file not found: {path}")
 90 |                 self._load_catalog(path)
 91 | 
 92 |     def get_domain_info(self, domain: str) -> dict:
 93 |         """Get metadata for a domain."""
 94 |         return self._schema_metadata.get(domain, {})
 95 | 
 96 |     def list_schemas(self) -> List[str]:
 97 |         return sorted(self.schemas.keys())
 98 | 
 99 |     def __contains__(self, name: str) -> bool:
100 |         return name in self.schemas
101 | 
102 |     def __getitem__(self, name: str) -> Type[BaseModel]:
103 |         try:
104 |             return self.schemas[name]
105 |         except KeyError:
106 |             raise KeyError(f"Schema '{name}' not found. Available schemas: {', '.join(self.list_schemas())}")
107 | 
108 |     def __repr__(self) -> str:
109 |         repr_str = f"Registry [schemas={len(self.schemas)}]"
110 |         for name, schema in sorted(self.schemas.items()):
111 |             repr_str += f"\n  {name} :: {schema.__name__}"
112 |         return repr_str
113 | 
114 | 
115 | registry = Registry()
116 | 
117 | 
118 | class SchemaCatalogMetadata(BaseModel):
119 |     """Represents the metadata for a schema in the catalog."""
120 | 
121 |     supported_inputs: Optional[List[Literal["image", "audio", "video", "document"]]] = Field(
122 |         None, description="List of supported input types"
123 |     )
124 |     tags: Optional[List[str]] = Field(None, description="List of tags")
125 | 
126 | 
127 | class SchemaCatalogItem(BaseModel):
128 |     """Represents a single schema entry in the catalog."""
129 | 
130 |     domain: str = Field(..., description="Domain identifier for the schema")
131 |     schema_path: str = Field(..., alias="schema", description="Fully qualified path to the schema class")
132 |     prompt: str = Field(..., description="Task-specific prompt for the schema")
133 |     description: Optional[str] = Field(None, description="Detailed description of the schema's purpose")
134 |     sample_data: Optional[Union[str, List[str]]] = Field(None, description="URL to sample data for testing")
135 |     version: Optional[str] = Field(None, description="Optional schema version in semver format")
136 |     metadata: Optional[SchemaCatalogMetadata] = Field(None, description="Additional metadata including tags")
137 | 
138 |     @model_validator(mode="after")
139 |     def validate_supported_inputs(self):
140 |         if self.metadata and self.metadata.supported_inputs:
141 |             assert all(
142 |                 input in ["image", "audio", "video", "document"] for input in self.metadata.supported_inputs
143 |             ), "Supported inputs must be valid"
144 |         return self
145 | 
146 |     @model_validator(mode="after")
147 |     def validate_domain(self):
148 |         assert "." in self.domain, "Domain must be in format: category.name"
149 |         category, name = self.domain.split(".", 1)
150 |         assert category and name, "Both category and name must be non-empty"
151 |         return self
152 | 
153 |     @property
154 |     def module_name(self) -> str:
155 |         return self.schema_path.rsplit(".", 1)[0]
156 | 
157 |     @property
158 |     def class_name(self) -> str:
159 |         return self.schema_path.rsplit(".", 1)[1]
160 | 
161 |     @cached_property
162 |     def schema_class(self) -> type[BaseModel]:
163 |         try:
164 |             module = importlib.import_module(self.module_name)
165 |             schema_class = getattr(module, self.class_name)
166 |         except (ImportError, AttributeError) as e:
167 |             raise ValueError(f"Unable to import {self.schema_path}: {e}")
168 |         return schema_class
169 | 
170 |     @cached_property
171 |     def schema_hash(self) -> str:
172 |         """Compute a hash of the schema JSON."""
173 |         schema_json: dict = self.schema_class.model_json_schema()
174 |         schema_hash: str = hashlib.sha256(json.dumps(schema_json).encode()).hexdigest()[:8]
175 |         return schema_hash
176 | 
177 | 
178 | class SchemaCatalogYaml(BaseModel):
179 |     """Root model for the catalog.yaml file."""
180 | 
181 |     apiVersion: str = Field(..., description="API version of the catalog format")
182 |     catalogs: Union[List[str], None] = Field(None, description="List of catalog files to include as references")
183 |     schemas: List[SchemaCatalogItem] = Field(default_factory=list, description="List of schema entries")
184 | 
185 |     @classmethod
186 |     def from_yaml(cls, yaml_path: Path) -> "SchemaCatalogYaml":
187 |         if not yaml_path.exists():
188 |             raise FileNotFoundError(f"Catalog file not found: {yaml_path}")
189 |         catalog: SchemaCatalogYaml = parse_yaml_raw_as(cls, yaml_path.read_text())
190 |         catalog = catalog.load_catalogs(yaml_path.parent)
191 |         return catalog
192 | 
193 |     def load_catalogs(self, subdirectory: Union[str, Path]) -> "SchemaCatalogYaml":
194 |         """Unroll the catalog references into a single list of schemas."""
195 |         if self.catalogs:
196 |             for catalog in self.catalogs:
197 |                 logger.debug(f"Loading sub-catalog [catalog={catalog}, dir={subdirectory}]")
198 |                 catalog_path = Path(subdirectory) / catalog
199 |                 assert catalog_path.exists(), f"Catalog {catalog} not found in schemas"
200 |                 catalog_yaml = SchemaCatalogYaml.from_yaml(catalog_path)
201 |                 n_schemas = len(catalog_yaml.schemas)
202 |                 self.schemas.extend(catalog_yaml.schemas)
203 |                 logger.debug(f"Loaded sub-catalog [catalog={catalog}, n_schemas={n_schemas}]")
204 |             logger.debug(f"Loaded full catalog [n_catalogs={len(self.catalogs)}, n_schemas={len(self.schemas)}]")
205 |         return self
206 | 


--------------------------------------------------------------------------------
/tests/benchmarks/2025-01-10-gpt-4o-mini-2024-07-18-instructor-results.md:
--------------------------------------------------------------------------------
 1 | ## Benchmark Results (model=gpt-4o-mini-2024-07-18, date=2025-01-10)
 2 | 
 3 | <table>
 4 | <tr>
 5 | <td style='width: 5%;'> Domain </td>
 6 | <td style='width: 5%;'> Response Model </td>
 7 | <td style='width: 40%;'> Sample </td>
 8 | <td style='width: 50%;'> Response JSON </td>
 9 | </tr>
10 |     <tr><td> <kbd>document.invoice</kbd> </td>
11 | <td> <kbd>Invoice</kbd> </td>
12 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg' width='100%' /> </td>
13 | <td> <pre>❌</pre> </td>
14 | </tr><tr><td> <kbd>document.receipt</kbd> </td>
15 | <td> <kbd>Receipt</kbd> </td>
16 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.receipt/sample_receipt.webp' width='100%' /> </td>
17 | <td> <pre>{<br>  "receipt_id": null,<br>  "transaction_date": "2021-01-26T22:36:22",<br>  "merchant_name": "Walmart",<br>  "merchant_address": {<br>    "street": "8060 W TROPICAL PKWY",<br>    "city": "LAS VEGAS",<br>    "state": "NV",<br>    "postal_code": "89149",<br>    "country": null<br>  },<br>  "merchant_phone": null,<br>  "cashier_name": "SARAH",<br>  "register_number": "35",<br>  "customer_name": null,<br>  "customer_id": null,<br>  "items": [<br>    {<br>      "description": "BOYS CREW",<br>      "quantity": 1.0,<br>      "unit_price": 9.48,<br>      "total_price": 9.48<br>    },<br>    {<br>      "description": "BOYS SOCKS",<br>      "quantity": 1.0,<br>      "unit_price": 6.97,<br>      "total_price": 6.97<br>    },<br>    {<br>      "description": "BOXER BRIEF",<br>      "quantity": 1.0,<br>      "unit_price": 10.98,<br>      "total_price": 10.98<br>    }<br>  ],<br>  "subtotal": 27.43,<br>  "tax": 2.3,<br>  "total": 29.73,<br>  "currency": "USD",<br>  "payment_method": {<br>    "type": "Debit",<br>    "card_last_4": "****",<br>    "card_type": null<br>  },<br>  "discount_amount": null,<br>  "discount_description": null,<br>  "tip_amount": null,<br>  "return_policy": null,<br>  "barcode": null,<br>  "additional_charges": null,<br>  "notes": null,<br>  "others": null<br>}</pre> </td>
18 | </tr><tr><td> <kbd>document.resume</kbd> </td>
19 | <td> <kbd>Resume</kbd> </td>
20 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.resume/fake-resume.webp' width='100%' /> </td>
21 | <td> <pre>❌</pre> </td>
22 | </tr><tr><td> <kbd>document.us-drivers-license</kbd> </td>
23 | <td> <kbd>USDriversLicense</kbd> </td>
24 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.us-drivers-license/dl3.jpg' width='100%' /> </td>
25 | <td> <pre>{<br>  "issuing_state": "MT",<br>  "license_number": "0812319684104",<br>  "full_name": "Brenda Lynn Sample",<br>  "first_name": "Brenda",<br>  "middle_name": "Lynn",<br>  "last_name": "Sample",<br>  "address": {<br>    "street": "123 MAIN STREET",<br>    "city": "HELENA",<br>    "state": "MT",<br>    "zip_code": "59601"<br>  },<br>  "date_of_birth": "1968-08-04",<br>  "gender": "F",<br>  "height": "5'06\"",<br>  "weight": 150.0,<br>  "eye_color": "BRO",<br>  "hair_color": null,<br>  "issue_date": "2015-02-15",<br>  "expiration_date": "2023-08-04",<br>  "license_class": "D",<br>  "donor": null,<br>  "veteran": null<br>}</pre> </td>
26 | </tr><tr><td> <kbd>document.utility-bill</kbd> </td>
27 | <td> <kbd>UtilityBill</kbd> </td>
28 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.utility-bill/utility-bill-example.webp' width='100%' /> </td>
29 | <td> <pre>{<br>  "account_number": "1234567890-1",<br>  "date_mailed": "2019-09-07",<br>  "service_for": "SPARKY JOULE",<br>  "service_address": {<br>    "street": "12345 ENERGY CT",<br>    "city": null,<br>    "state": null,<br>    "zip_code": null<br>  },<br>  "billing_period_start": null,<br>  "billing_period_end": null,<br>  "date_due": "2019-09-28",<br>  "amount_due": 88.14,<br>  "previous_balance": 0.0,<br>  "payment_received": 91.57,<br>  "current_charges": 88.14,<br>  "breakdown_of_charges": [<br>    {<br>      "description": "Current PG&E Electric Delivery Charges",<br>      "amount": 55.66,<br>      "usage": null,<br>      "rate": null<br>    },<br>    {<br>      "description": "Silicon Valley Clean Energy Electric Generation Charges",<br>      "amount": 32.48,<br>      "usage": null,<br>      "rate": null<br>    }<br>  ],<br>  "payment_options": [<br>    "www.pge.com/waystopay"<br>  ],<br>  "contact_information": {<br>    "phone": "1-800-743-5000",<br>    "website": "www.pge.com/MyEnergy"<br>  }<br>}</pre> </td>
30 | </tr><tr><td> <kbd>document.w2-form</kbd> </td>
31 | <td> <kbd>W2Form</kbd> </td>
32 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.w2-form/w2-form.jpg' width='100%' /> </td>
33 | <td> <pre>{<br>  "control_number": "GNI851",<br>  "ein": "63-0065650",<br>  "ssn": "554-03-0876",<br>  "employee_name": "Anastasia Hodges",<br>  "employee_address": {<br>    "street": "200 2nd Street NE",<br>    "city": "Waseca",<br>    "state": "MN",<br>    "zip_code": "56093"<br>  },<br>  "employer_name": "NORTH 312",<br>  "employer_address": {<br>    "street": "151 N Market Street",<br>    "city": "Wooster",<br>    "state": "OH",<br>    "zip_code": "44691"<br>  },<br>  "wages_tips_other_compensation": 23677.7,<br>  "federal_income_tax_withheld": 2841.32,<br>  "social_security_wages": 24410.0,<br>  "social_security_tax_withheld": 1513.42,<br>  "medicare_wages_and_tips": 24410.0,<br>  "medicare_tax_withheld": 353.95,<br>  "tax_year": 2020<br>}</pre> </td>
34 | </tr><tr><td> <kbd>aerospace.remote-sensing</kbd> </td>
35 | <td> <kbd>RemoteSensing</kbd> </td>
36 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/aerospace.remote-sensing/planet_labs_port.jpg' width='100%' /> </td>
37 | <td> <pre>{<br>  "description": "The satellite image captures a coastal urban area adjacent to a busy port. The scene includes residential neighborhoods, a marina, and extensive shipping facilities with numerous cargo containers visible. The waterway is active with vessels, indicating significant maritime activity.",<br>  "objects": [<br>    "residential buildings",<br>    "marina",<br>    "cargo containers",<br>    "shipping docks",<br>    "waterway",<br>    "roads",<br>    "beach"<br>  ],<br>  "categories": [<br>    "commercial-area",<br>    "port",<br>    "residential-area",<br>    "water-treatment",<br>    "beach"<br>  ],<br>  "is_visible": true<br>}</pre> </td>
38 | </tr><tr><td> <kbd>healthcare.medical-insurance-card</kbd> </td>
39 | <td> <kbd>MedicalInsuranceCard</kbd> </td>
40 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/healthcare.medical-insurance-card/blue_cross_example.jpg' width='100%' /> </td>
41 | <td> <pre>{<br>  "provider_service": {<br>    "provider_service_number": null,<br>    "precertification_number": null<br>  },<br>  "member_information": {<br>    "member_name": "Member Name",<br>    "member_id": "XY2 123456789",<br>    "group_number": "023457"<br>  },<br>  "pharmacy_plan": {<br>    "rx_bin": "987654",<br>    "rx_pcn": null,<br>    "rx_grp": "HIOPT",<br>    "pharmacy_help_desk": null<br>  },<br>  "insurance_provider": {<br>    "provider_name": "BlueCross BlueShield",<br>    "network": "PPO"<br>  },<br>  "coverage": {<br>    "office_visit": "$15",<br>    "specialist_visit": null,<br>    "urgent_care": null,<br>    "emergency_room": "$75",<br>    "inpatient_hospital": null<br>  }<br>}</pre> </td>
42 | </tr><tr><td> <kbd>retail.ecommerce-product-caption</kbd> </td>
43 | <td> <kbd>RetailEcommerceProductCaption</kbd> </td>
44 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/retail.ecommerce-product-caption/Electronics%20-%20Kindle.webp' width='100%' /> </td>
45 | <td> <pre>{<br>  "description": "The Kindle Paperwhite features a 6.8\" display and adjustable warm light for a comfortable reading experience. It is designed for easy portability and offers a sleek black finish.",<br>  "rating": 85,<br>  "name": "Kindle Paperwhite",<br>  "brand": "Amazon",<br>  "category": "Electronics / E-readers",<br>  "price": "$139.99",<br>  "color": "Black"<br>}</pre> </td>
46 | </tr><tr><td> <kbd>media.tv-news</kbd> </td>
47 | <td> <kbd>TVNews</kbd> </td>
48 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.tv-news/bbc_news_ukraine_screenshot.jpg' width='100%' /> </td>
49 | <td> <pre>{<br>  "description": "A news anchor presenting a segment about President Biden criticizing Netanyahu in an interview.",<br>  "chyron": "Biden criticises Netanyahu in an interview",<br>  "network": "BBC News",<br>  "reporters": null<br>}</pre> </td>
50 | </tr><tr><td> <kbd>media.nfl-game-state</kbd> </td>
51 | <td> <kbd>NFLGameState</kbd> </td>
52 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.nfl-game-state/packers_cardinals_screenshot.png' width='100%' /> </td>
53 | <td> <pre>{<br>  "description": null,<br>  "teams": [<br>    {<br>      "name": "GB",<br>      "score": 0<br>    },<br>    {<br>      "name": "AZ",<br>      "score": 7<br>    }<br>  ],<br>  "status": "in_progress",<br>  "quarter": 2,<br>  "clock_time": "12:12",<br>  "possession_team": "GB",<br>  "down": "2nd",<br>  "distance": 10,<br>  "yard_line": -10,<br>  "network": "NBC",<br>  "is_shown": true<br>}</pre> </td>
54 | </tr><tr><td> <kbd>media.nba-game-state</kbd> </td>
55 | <td> <kbd>NBAGameState</kbd> </td>
56 | <td> <img src='https://storage.googleapis.com/vlm-data-public-prod/hub/examples/media.nba-game-state/heats_spurs.png' width='100%' /> </td>
57 | <td> <pre>{<br>  "description": null,<br>  "teams": [<br>    {<br>      "name": "MIA",<br>      "score": 7<br>    },<br>    {<br>      "name": "SA",<br>      "score": 6<br>    }<br>  ],<br>  "status": "in_progress",<br>  "quarter": 1,<br>  "clock_time": "9:09",<br>  "shot_clock": 11,<br>  "network": "ESPN",<br>  "is_shown": true<br>}</pre> </td>
58 | </tr>
59 | </table>
60 | 


--------------------------------------------------------------------------------