├── .flake8 ├── .github └── workflows │ ├── ci_cd.yml │ └── docs.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CITATION.cff ├── LICENSE ├── README.md ├── assets ├── gemini.png ├── logo_dark.svg ├── logo_light.svg └── screenshot.png ├── docs ├── _overrides │ └── main.html ├── api │ ├── agent.md │ ├── configs.md │ ├── lmm.md │ ├── models.md │ ├── sim.md │ └── tools.md └── index.md ├── examples ├── chat │ ├── .env │ ├── Makefile │ ├── README.md │ ├── app.py │ ├── chat-app │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── ResultVisualizer.tsx │ │ ├── components.json │ │ ├── next-env.d.ts │ │ ├── next.config.ts │ │ ├── package-lock.json │ │ ├── package.json │ │ ├── postcss.config.mjs │ │ ├── public │ │ │ ├── file.svg │ │ │ ├── globe.svg │ │ │ ├── next.svg │ │ │ ├── vercel.svg │ │ │ └── window.svg │ │ ├── src │ │ │ ├── app │ │ │ │ ├── favicon.ico │ │ │ │ ├── fonts │ │ │ │ │ ├── GeistMonoVF.woff │ │ │ │ │ └── GeistVF.woff │ │ │ │ ├── globals.css │ │ │ │ ├── layout.tsx │ │ │ │ └── page.tsx │ │ │ ├── components │ │ │ │ ├── ChatSection.tsx │ │ │ │ ├── GroupedVisualizer.tsx │ │ │ │ ├── ImageVisualizer.tsx │ │ │ │ ├── PolygonDrawer.tsx │ │ │ │ ├── PreviewSection.tsx │ │ │ │ ├── VideoVisualizer.tsx │ │ │ │ ├── types.tsx │ │ │ │ ├── ui │ │ │ │ │ ├── button.tsx │ │ │ │ │ ├── card.tsx │ │ │ │ │ ├── collapsible.tsx │ │ │ │ │ ├── scroll-area.tsx │ │ │ │ │ └── tabs.tsx │ │ │ │ └── utils.tsx │ │ │ └── lib │ │ │ │ └── utils.ts │ │ ├── tailwind.config.ts │ │ └── tsconfig.json │ ├── package-lock.json │ ├── requirements.txt │ ├── run.py │ ├── run.sh │ └── setup.py ├── custom_tools │ ├── README.md │ ├── pid.png │ ├── pid_template.png │ ├── requirements.txt │ ├── run_custom_tool.py │ └── template_match.py ├── mask_app │ ├── README.md │ ├── app.py │ └── requirements.txt └── notebooks │ └── counting_cans.ipynb ├── mkdocs.yml ├── poetry.lock ├── pyproject.toml ├── tests ├── __init__.py ├── integ │ ├── __init__.py │ └── test_tools.py └── unit │ ├── __init__.py │ ├── fixtures.py │ ├── test_lmm.py │ ├── test_meta_tools.py │ ├── test_planner_tools.py │ ├── test_utils.py │ ├── test_vac.py │ └── tools │ ├── test_tools.py │ └── test_video.py ├── uv.lock └── vision_agent ├── .sim_tools ├── df.csv └── embs.npy ├── __init__.py ├── agent ├── README.md ├── __init__.py ├── agent.py ├── vision_agent_coder_prompts_v2.py ├── vision_agent_coder_v2.py ├── vision_agent_planner_prompts_v2.py ├── vision_agent_planner_v2.py ├── vision_agent_prompts_v2.py └── vision_agent_v2.py ├── clients ├── __init__.py └── http.py ├── configs ├── __init__.py ├── anthropic_config.py ├── config.py └── openai_config.py ├── fonts ├── __init__.py └── default_font_ch_en.ttf ├── lmm ├── __init__.py └── lmm.py ├── models ├── __init__.py ├── agent_types.py ├── lmm_types.py └── tools_types.py ├── sim ├── __init__.py └── sim.py ├── tools ├── __init__.py ├── meta_tools.py ├── planner_tools.py ├── prompts.py └── tools.py └── utils ├── __init__.py ├── agent.py ├── exceptions.py ├── execute.py ├── image_utils.py ├── tools.py ├── tools_doc.py ├── video.py └── video_tracking.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | extend-ignore = E501,E203 3 | max-line-length = 88 4 | max-complexity = 15 5 | per-file-ignores = __init__.py:F401 6 | -------------------------------------------------------------------------------- /.github/workflows/ci_cd.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | VISION_AGENT_API_KEY: ${{ secrets.VISION_AGENT_API_KEY }} 11 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} 12 | GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} 13 | OPENAI_API_KEY: 123test 14 | PYTHONUTF8: 1 15 | 16 | jobs: 17 | unit_test: 18 | name: Test 19 | strategy: 20 | matrix: 21 | python-version: [3.9, 3.11] 22 | os: [ ubuntu-22.04, windows-2022, macos-14 ] 23 | runs-on: ${{ matrix.os }} 24 | env: 25 | RUNTIME_TAG: ci_job 26 | steps: 27 | - uses: actions/checkout@v3 28 | - uses: actions/setup-python@v4 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | - name: Install uv 32 | shell: bash 33 | run: | 34 | pip install uv 35 | uv --version 36 | - name: Print Python environment information 37 | run: | 38 | uv run which python 39 | uv run python --version 40 | uv run pip --version 41 | - name: Install dependencies 42 | run: | 43 | # Install main dependencies first so we can see their size 44 | uv sync --all-extras 45 | - name: Linting 46 | run: | 47 | # stop the build if there are Python syntax errors or undefined names 48 | uv run flake8 . --exclude .venv,examples,tests --count --show-source --statistics 49 | - name: Check Format 50 | run: | 51 | uv run black --check --diff --color vision_agent/ 52 | - name: Type Checking 53 | run: | 54 | uv run mypy vision_agent 55 | - name: Test with pytest 56 | run: | 57 | uv run pytest -s -vvv tests/unit 58 | 59 | integ_test: 60 | name: Integration Test 61 | runs-on: ubuntu-22.04 62 | env: 63 | RUNTIME_TAG: ci_job 64 | steps: 65 | - uses: actions/checkout@v3 66 | - uses: actions/setup-python@v4 67 | with: 68 | python-version: 3.11 69 | - name: Install uv 70 | shell: bash 71 | run: | 72 | pip install uv 73 | uv --version 74 | - name: Print Python environment information 75 | run: | 76 | uv run which python 77 | uv run python --version 78 | uv run pip --version 79 | - name: Install dependencies 80 | run: | 81 | # Install main dependencies first so we can see their size 82 | uv sync --all-extras 83 | - name: Test with pytest 84 | run: | 85 | uv run pytest -v tests/integ 86 | 87 | release: 88 | name: Release 89 | needs: unit_test 90 | # https://github.community/t/how-do-i-specify-job-dependency-running-in-another-workflow/16482 91 | if: github.event_name == 'push' && github.ref == 'refs/heads/main' && !contains(github.event.head_commit.message, 'chore(release):') && !contains(github.event.head_commit.message, '[skip release]') 92 | runs-on: ubuntu-latest 93 | steps: 94 | - uses: actions/setup-python@v4 95 | with: 96 | python-version: 3.10.11 97 | - name: Install uv 98 | shell: bash 99 | run: | 100 | pip install uv 101 | uv --version 102 | - name: Checkout code 103 | uses: actions/checkout@v3 104 | with: 105 | token: ${{ secrets.GH_TOKEN }} 106 | - name: setup git config 107 | run: | 108 | git config user.name "GitHub Actions Bot" 109 | git config user.email "yazhou.cao@landing.ai" 110 | - name: Bump up version 111 | run: | 112 | current_version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version) 113 | IFS='.' read -r major minor patch <<< "$current_version" 114 | patch=$((patch + 1)) 115 | new_version="${major}.${minor}.${patch}" 116 | uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version "$new_version" 117 | git add pyproject.toml 118 | git commit -m "[skip ci] chore(release): ${new_version}" 119 | git push -f 120 | - name: Publish to PyPI 121 | run: | 122 | uv build 123 | uv publish --token ${{ secrets.PYPI_TOKEN }} 124 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: publish-doc 2 | 3 | # build the documentation whenever there are new commits on main 4 | on: 5 | push: 6 | branches: 7 | - main 8 | 9 | # security: restrict permissions for CI jobs. 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | # Build the documentation and upload the static HTML files as an artifact. 15 | build: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v3 19 | - uses: actions/setup-python@v4 20 | with: 21 | python-version: 3.10.11 22 | - name: Install UV 23 | run: | 24 | pip install uv 25 | 26 | - name: Install dependencies 27 | run: | 28 | uv sync --all-extras 29 | uv add mkdocs mkdocs-material mkdocstrings[python] 30 | 31 | - run: mkdir -p docs-build 32 | - run: uv run mkdocs build -f mkdocs.yml -d docs-build/ 33 | 34 | - uses: actions/upload-pages-artifact@v3 35 | with: 36 | path: docs-build/ 37 | 38 | # Deploy the artifact to GitHub pages. 39 | # This is a separate job so that only actions/deploy-pages has the necessary permissions. 40 | deploy: 41 | needs: build 42 | runs-on: ubuntu-latest 43 | permissions: 44 | pages: write 45 | id-token: write 46 | environment: 47 | name: github-pages 48 | url: ${{ steps.deployment.outputs.page_url }} 49 | steps: 50 | - id: deployment 51 | uses: actions/deploy-pages@v4 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Env files 11 | .env 12 | 13 | # Precompiled Headers 14 | *.gch 15 | *.pch 16 | 17 | # Libraries 18 | *.lib 19 | *.a 20 | *.la 21 | *.lo 22 | 23 | # Shared objects (inc. Windows DLLs) 24 | *.dll 25 | *.so 26 | *.so.* 27 | *.dylib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | *.i*86 34 | *.x86_64 35 | *.hex 36 | 37 | # Debug files 38 | *.dSYM/ 39 | *.su 40 | 41 | # Mac files 42 | .DS_Store 43 | .DS_STORE 44 | 45 | # Old HG stuff 46 | .hg 47 | .hgignore 48 | .hgtags 49 | 50 | .git 51 | __pycache__ 52 | .ipynb_checkpoints 53 | */__pycache__ 54 | */.ipynb_checkpoints 55 | .local 56 | .jupyter 57 | .ipython 58 | */.terraform 59 | terraform.* 60 | .terraform.* 61 | shinobi-dvr/* 62 | .vscode/ 63 | 64 | # mypy 65 | .mypy_cache/* 66 | 67 | # Distribution / packaging 68 | .Python 69 | build/ 70 | develop-eggs/ 71 | dist/ 72 | downloads/ 73 | eggs/ 74 | .eggs/ 75 | lib/ 76 | lib64/ 77 | parts/ 78 | sdist/ 79 | var/ 80 | wheels/ 81 | pip-wheel-metadata/ 82 | share/python-wheels/ 83 | *.egg-info/ 84 | .installed.cfg 85 | *.egg 86 | MANIFEST 87 | 88 | # Output from various tools 89 | examples/output 90 | tests/output 91 | docs-build 92 | site 93 | 94 | # Local or WIP files 95 | local/ 96 | 97 | vision-agent-benchmark/ 98 | vision_agent/tools/suggestion.py 99 | vision_agent/agent/visual_design_patterns.py 100 | */node_modules -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 24.4.2 4 | hooks: 5 | - id: black 6 | language_version: python3.9 7 | - repo: https://github.com/pycqa/flake8 8 | rev: 7.0.0 9 | hooks: 10 | - id: flake8 11 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Laird" 5 | given-names: "Dillon" 6 | - family-names: "Jagadeesan" 7 | given-name: "Shankar" 8 | - family-names: "Cao" 9 | given-name: "Yazhou" 10 | - family-names: "Ng" 11 | given-name: "Andrew" 12 | title: "Vision Agent" 13 | version: 0.2 14 | date-released: 2024-02-12 15 | url: "https://github.com/landing-ai/vision-agent" 16 | -------------------------------------------------------------------------------- /assets/gemini.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/landing-ai/vision-agent/0539e545078a49dec422fba5656b80d6b3734197/assets/gemini.png -------------------------------------------------------------------------------- /assets/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/landing-ai/vision-agent/0539e545078a49dec422fba5656b80d6b3734197/assets/screenshot.png -------------------------------------------------------------------------------- /docs/_overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block footer %} 4 | {{ super() }} 5 | {% endblock %} -------------------------------------------------------------------------------- /docs/api/agent.md: -------------------------------------------------------------------------------- 1 | ::: vision_agent.agent.agent.Agent 2 | 3 | ::: vision_agent.agent.vision_agent_v2.VisionAgentV2 4 | 5 | ::: vision_agent.agent.vision_agent_coder_v2.VisionAgentCoderV2 6 | 7 | ::: vision_agent.agent.vision_agent_planner_v2.VisionAgentPlannerV2 -------------------------------------------------------------------------------- /docs/api/configs.md: -------------------------------------------------------------------------------- 1 | ::: vision_agent.configs.Config 2 | -------------------------------------------------------------------------------- /docs/api/lmm.md: -------------------------------------------------------------------------------- 1 | ::: vision_agent.lmm.OpenAILMM 2 | 3 | ::: vision_agent.lmm.AzureOpenAILMM 4 | 5 | ::: vision_agent.lmm.OllamaLMM 6 | 7 | ::: vision_agent.lmm.AnthropicLMM 8 | 9 | ::: vision_agent.lmm.GoogleLMM 10 | -------------------------------------------------------------------------------- /docs/api/models.md: -------------------------------------------------------------------------------- 1 | ::: vision_agent.models.AgentMessage 2 | 3 | ::: vision_agent.models.CodeContext 4 | 5 | ::: vision_agent.models.ErrorContext 6 | 7 | ::: vision_agent.models.InteractionContext 8 | 9 | ::: vision_agent.models.PlanContext 10 | 11 | ::: vision_agent.models.Message 12 | 13 | ::: vision_agent.models.TextOrImage 14 | 15 | ::: vision_agent.models.BboxInput 16 | 17 | ::: vision_agent.models.BboxInputBase64 18 | 19 | ::: vision_agent.models.BoundingBoxes 20 | 21 | ::: vision_agent.models.Florence2FtRequest 22 | 23 | ::: vision_agent.models.JobStatus 24 | 25 | ::: vision_agent.models.ODResponseData 26 | 27 | ::: vision_agent.models.PromptTask 28 | -------------------------------------------------------------------------------- /docs/api/sim.md: -------------------------------------------------------------------------------- 1 | ::: vision_agent.sim.AzureSim 2 | 3 | ::: vision_agent.sim.OllamaSim 4 | 5 | ::: vision_agent.sim.Sim 6 | 7 | ::: vision_agent.sim.StellaSim -------------------------------------------------------------------------------- /docs/api/tools.md: -------------------------------------------------------------------------------- 1 | ::: vision_agent.tools 2 | 3 | ::: vision_agent.tools.tools 4 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --8<-- "README.md:26" 2 | -------------------------------------------------------------------------------- /examples/chat/.env: -------------------------------------------------------------------------------- 1 | PORT_BACKEND=8000 2 | PORT_FRONTEND=3000 3 | DEBUG_HIL=false -------------------------------------------------------------------------------- /examples/chat/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: setup run 2 | 3 | setup: 4 | pip install -r requirements.txt 5 | cd chat-app && make && cd .. 6 | 7 | run: 8 | ./run.sh 9 | -------------------------------------------------------------------------------- /examples/chat/README.md: -------------------------------------------------------------------------------- 1 | # VisionAgentV2 Example App 2 | 3 | This is an example application to demonstrate how to run VisionAgentV2 locally. 4 | It only works with the **V2 version** of VisionAgent and is mainly used for debugging — expect to find bugs and issues. 5 | 6 | ![screenshot](https://github.com/landing-ai/vision-agent/blob/main/assets/screenshot.png?raw=true) 7 | 8 | ## Prerequisites 9 | 10 | - Python 3.7 or higher 11 | - Node.js 14 or higher 12 | - npm (comes with Node.js) 13 | 14 | ## Quick Start 15 | 16 | ### 1. Setup 17 | 18 | #### On Windows (PowerShell) 19 | 20 | Run the setup script: 21 | 22 | ```powershell 23 | python setup.py 24 | ``` 25 | 26 | #### On Linux/macOS (with Make) 27 | 28 | ```bash 29 | make setup 30 | ``` 31 | 32 | ### 2. Run the App 33 | 34 | #### On Windows (PowerShell) 35 | 36 | ```powershell 37 | python run.py 38 | ``` 39 | 40 | #### On Linux/macOS (with Make) 41 | 42 | ```bash 43 | make run 44 | ``` 45 | 46 | This will: 47 | - Launch the FastAPI backend 48 | - Start the React frontend 49 | - Open your browser to the application 50 | - Handle proper cleanup when you press Ctrl+C 51 | 52 | ## Human-in-the-loop Mode 53 | 54 | To enable human-in-the-loop support: 55 | 56 | 1. Open `.env` 57 | 2. Set: 58 | ```bash 59 | DEBUG_HIL=true 60 | ``` 61 | 62 | **Note:** Currently, only **object detection** and **segmentation** visualizations are supported. 63 | 64 | ## Configuration 65 | 66 | ### Changing Ports 67 | 68 | To modify the frontend or backend port: 69 | 70 | 1. Open `.env` 71 | 2. Change the `PORT_BACKEND` or `PORT_FRONTEND` variables: 72 | ```bash 73 | PORT_BACKEND = 8000 # Change to your preferred port 74 | PORT_FRONTEND = 3000 # Change to your preferred port 75 | ``` 76 | 77 | ## Troubleshooting 78 | 79 | - **Port conflicts**: The run script will attempt to free ports if they're already in use, but if not, either use another port or kill the process that is currently running on the conflicting port (make sure you know what is running on this port before killing it) 80 | - **Services not starting**: Verify you have the prerequisites installed and ran the setup 81 | - **Browser doesn't open**: Manually navigate to http://localhost:3000 (or whatever your frontend port is) 82 | - **Constant string of messages saying connection rejected/closed**: Check that you do not have multiple tabs open to http://localhost:3000 (or whatever your frontend port is) 83 | 84 | ## Support 85 | 86 | For issues and questions, please file an issue on the GitHub repository, or come ask questions in our Discord: 87 | 88 | [![](https://dcbadge.vercel.app/api/server/wPdN8RCYew?compact=true&style=flat)](https://discord.gg/wPdN8RCYew) 89 | -------------------------------------------------------------------------------- /examples/chat/app.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import base64 3 | import tempfile 4 | from typing import Any, Dict, List, Optional 5 | 6 | import cv2 7 | import httpx 8 | import numpy as np 9 | from fastapi import BackgroundTasks, FastAPI, WebSocket, WebSocketDisconnect 10 | from fastapi.middleware.cors import CORSMiddleware 11 | from pydantic import BaseModel 12 | 13 | from vision_agent.agent import VisionAgentV2 14 | from vision_agent.models import AgentMessage 15 | from vision_agent.lmm import AnthropicLMM 16 | from vision_agent.utils.execute import CodeInterpreterFactory 17 | 18 | from dotenv import load_dotenv 19 | import os 20 | 21 | PORT_FRONTEND = os.getenv("PORT_FRONTEND") 22 | DEBUG_HIL = os.getenv("DEBUG_HIL") 23 | 24 | app = FastAPI() 25 | 26 | # CORS config 27 | app.add_middleware( 28 | CORSMiddleware, 29 | allow_origins=[f"http://localhost:{PORT_FRONTEND}"], 30 | allow_credentials=True, 31 | allow_methods=["*"], 32 | allow_headers=["*"], 33 | ) 34 | 35 | # Single WebSocket client tracking 36 | active_client: Optional[WebSocket] = None 37 | active_client_lock = asyncio.Lock() 38 | 39 | # Add a global flag to track if processing should be canceled 40 | processing_canceled = False 41 | processing_canceled_lock = asyncio.Lock() 42 | 43 | async def _async_update_callback(message: Dict[str, Any]): 44 | global processing_canceled 45 | 46 | # Check if processing has been canceled 47 | async with processing_canceled_lock: 48 | if processing_canceled: 49 | # Skip sending updates if processing has been canceled 50 | return 51 | 52 | # Try to send message to active WebSocket client 53 | async with active_client_lock: 54 | if active_client: 55 | try: 56 | await active_client.send_json(message) 57 | except Exception: 58 | print("Client disconnected unexpectedly.") 59 | else: 60 | print("No active client to send to.") 61 | 62 | 63 | def update_callback(message: Dict[str, Any]): 64 | # Needed for non-async context 65 | loop = asyncio.new_event_loop() 66 | asyncio.set_event_loop(loop) 67 | loop.run_until_complete(_async_update_callback(message)) 68 | loop.close() 69 | 70 | 71 | # Agent setup 72 | if DEBUG_HIL: 73 | agent = VisionAgentV2( 74 | verbose=True, 75 | update_callback=update_callback, 76 | hil=True, 77 | ) 78 | code_interpreter = CodeInterpreterFactory.new_instance(non_exiting=True) 79 | else: 80 | agent = VisionAgentV2( 81 | agent=AnthropicLMM(model_name="claude-3-7-sonnet-20250219"), 82 | verbose=True, 83 | update_callback=update_callback, 84 | ) 85 | code_interpreter = CodeInterpreterFactory.new_instance() 86 | 87 | async def reset_cancellation_flag(): 88 | global processing_canceled 89 | async with processing_canceled_lock: 90 | processing_canceled = False 91 | 92 | 93 | def process_messages_background(messages: List[Dict[str, Any]]): 94 | global processing_canceled 95 | if processing_canceled: 96 | return 97 | 98 | for message in messages: 99 | if "media" in message and message["media"] is None: 100 | del message["media"] 101 | 102 | # Process messages normally (since cancellation is checked in the callback) 103 | 104 | response = agent.chat( 105 | [ 106 | AgentMessage( 107 | role=message["role"], 108 | content=message["content"], 109 | media=message.get("media", None), 110 | ) 111 | for message in messages 112 | ], 113 | code_interpreter=code_interpreter, 114 | ) 115 | 116 | 117 | class Message(BaseModel): 118 | role: str 119 | content: str 120 | media: Optional[List[str]] = None 121 | 122 | 123 | class Detection(BaseModel): 124 | label: str 125 | bbox: List[int] 126 | confidence: float 127 | mask: Optional[List[int]] = None 128 | 129 | 130 | def b64_video_to_frames(b64_video: str) -> List[np.ndarray]: 131 | video_bytes = base64.b64decode( 132 | b64_video.split(",")[1] if "," in b64_video else b64_video 133 | ) 134 | video_frames = [] 135 | with tempfile.NamedTemporaryFile(suffix=".mp4", delete=True) as temp_video: 136 | temp_video.write(video_bytes) 137 | temp_video.flush() 138 | 139 | cap = cv2.VideoCapture(temp_video.name) 140 | while cap.isOpened(): 141 | ret, frame = cap.read() 142 | if not ret: 143 | break 144 | video_frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) 145 | cap.release() 146 | return video_frames 147 | 148 | 149 | @app.post("/chat") 150 | async def chat( 151 | messages: List[Message], background_tasks: BackgroundTasks 152 | ) -> Dict[str, Any]: 153 | # Reset cancellation flag before starting new processing 154 | await reset_cancellation_flag() 155 | 156 | background_tasks.add_task( 157 | process_messages_background, [m.model_dump() for m in messages] 158 | ) 159 | return {"status": "Processing started"} 160 | 161 | 162 | @app.post("/cancel") 163 | async def cancel_processing(): 164 | """Cancel any ongoing message processing.""" 165 | global processing_canceled 166 | async with processing_canceled_lock: 167 | processing_canceled = True 168 | 169 | # Also clear the active websocket if possible 170 | async with active_client_lock: 171 | if active_client: 172 | try: 173 | # Send a cancellation message that the frontend can detect 174 | await active_client.send_json({ 175 | "role": "system", 176 | "content": "Processing canceled by user." 177 | }) 178 | except Exception: 179 | pass 180 | 181 | return {"status": "Processing canceled"} 182 | 183 | 184 | @app.websocket("/ws") 185 | async def websocket_endpoint(websocket: WebSocket): 186 | global active_client 187 | 188 | # First check if there's already a connection before accepting 189 | async with active_client_lock: 190 | if active_client: 191 | # Don't immediately accept if there's already a connection 192 | # Either reject or queue this connection 193 | await websocket.close(code=1000, reason="Only one connection allowed") 194 | return 195 | 196 | # Accept the connection only if there isn't an active client 197 | await websocket.accept() 198 | active_client = websocket 199 | 200 | try: 201 | while True: 202 | await websocket.receive_json() 203 | except WebSocketDisconnect: 204 | async with active_client_lock: 205 | if active_client == websocket: 206 | active_client = None 207 | 208 | 209 | @app.post("/send_message") 210 | async def send_message(message: Message): 211 | await _async_update_callback(message.model_dump()) -------------------------------------------------------------------------------- /examples/chat/chat-app/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .next -------------------------------------------------------------------------------- /examples/chat/chat-app/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | all: setup 3 | 4 | # Target to install dependencies using package-lock.json 5 | .PHONY: install-dependencies 6 | install-dependencies: 7 | npm install --legacy-peer-deps react-syntax-highlighter 8 | npm config set legacy-peer-deps true 9 | npm ci 10 | 11 | # Setup target to run all necessary commands 12 | .PHONY: setup 13 | setup: install-dependencies 14 | -------------------------------------------------------------------------------- /examples/chat/chat-app/ResultVisualizer.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { ChevronLeft, ChevronRight } from "lucide-react"; 4 | import React, { useRef, useState, useEffect } from "react"; 5 | 6 | interface DetectionItem { 7 | request: { 8 | prompts: string | string[]; 9 | confidence: number; 10 | function_name: string; 11 | }; 12 | 13 | response: { 14 | data: Array<{ 15 | label: string; 16 | score: number; 17 | bbox: [number, number, number, number]; 18 | bounding_box: [number, number, number, number]; 19 | mask: { 20 | counts: number[]; 21 | size: number[]; 22 | }; 23 | }>; 24 | }; 25 | files: Array<[string, string]>; 26 | } 27 | 28 | interface VisualizerProps { 29 | detectionResults: DetectionItem[]; 30 | onSubmit?: (functionName: string, boxThreshold: number) => void; 31 | } 32 | 33 | const VisualizerHiL: React.FC = ({ 34 | detectionResults, 35 | onSubmit, 36 | }) => { 37 | const [currentIndex, setCurrentIndex] = useState(0); 38 | const [threshold, setThreshold] = useState(0.05); 39 | const canvasRef = useRef(null); 40 | 41 | const handleNext = () => { 42 | setCurrentIndex((prev) => (prev + 1) % detectionResults.length); 43 | }; 44 | 45 | const handlePrevious = () => { 46 | setCurrentIndex( 47 | (prev) => (prev - 1 + detectionResults.length) % detectionResults.length, 48 | ); 49 | }; 50 | 51 | const handleThresholdChange = (e: React.ChangeEvent) => { 52 | setThreshold(parseFloat(e.target.value)); 53 | }; 54 | 55 | useEffect(() => { 56 | if (!detectionResults || detectionResults.length === 0) return; 57 | 58 | const canvas = canvasRef.current; 59 | if (!canvas) return; 60 | const ctx = canvas.getContext("2d"); 61 | if (!ctx) return; 62 | 63 | const currentResult = detectionResults[currentIndex]; 64 | 65 | const image = new Image(); 66 | image.onload = () => { 67 | canvas.width = image.width; 68 | canvas.height = image.height; 69 | 70 | ctx.clearRect(0, 0, canvas.width, canvas.height); 71 | ctx.drawImage(image, 0, 0); 72 | 73 | currentResult.response.data 74 | .filter((detection) => detection.score >= threshold) 75 | .forEach((detection) => { 76 | // Florence2 compatibility 77 | if (detection.bounding_box) { 78 | detection.bbox = detection.bounding_box; 79 | } 80 | 81 | // Draw mask 82 | if (detection.mask && detection.mask.counts && detection.mask.size) { 83 | const [height, width] = detection.mask.size; 84 | const counts = detection.mask.counts; 85 | 86 | const tempCanvas = document.createElement("canvas"); 87 | tempCanvas.width = width; 88 | tempCanvas.height = height; 89 | const tmpCtx = tempCanvas.getContext("2d"); 90 | if (!tmpCtx) return; 91 | 92 | const bitmap = new Uint8Array(width * height); 93 | let pixelIndex = 0; 94 | let isOne = false; 95 | 96 | for (const count of counts) { 97 | for (let i = 0; i < count; i++) { 98 | if (pixelIndex < bitmap.length) { 99 | // Convert from row-major to column-major order 100 | const x = Math.floor(pixelIndex / height); 101 | const y = pixelIndex % height; 102 | const newIndex = y * width + x; 103 | if (newIndex < bitmap.length) { 104 | bitmap[newIndex] = isOne ? 1 : 0; 105 | } 106 | pixelIndex++; 107 | } 108 | } 109 | isOne = !isOne; 110 | } 111 | 112 | const imageData = tmpCtx.createImageData(width, height); 113 | for (let i = 0; i < bitmap.length; i++) { 114 | const offset = i * 4; 115 | if (bitmap[i] === 1) { 116 | imageData.data[offset] = 255; 117 | imageData.data[offset + 1] = 0; 118 | imageData.data[offset + 2] = 0; 119 | imageData.data[offset + 3] = 170; 120 | } 121 | } 122 | 123 | tmpCtx.putImageData(imageData, 0, 0); 124 | 125 | ctx.save(); 126 | ctx.globalCompositeOperation = "source-over"; 127 | ctx.drawImage(tempCanvas, 0, 0, width, height); 128 | ctx.restore(); 129 | } 130 | 131 | // Draw bounding box 132 | if (detection.bbox) { 133 | const [x1, y1, x2, y2] = detection.bbox; 134 | const width = x2 - x1; 135 | const height = y2 - y1; 136 | 137 | ctx.strokeStyle = "rgba(255, 0, 0, 0.6)"; 138 | ctx.lineWidth = 3; 139 | ctx.strokeRect(x1, y1, width, height); 140 | 141 | ctx.font = "16px Arial"; 142 | const labelText = `${detection.label}: ${detection.score.toFixed( 143 | 2, 144 | )}`; 145 | const textMetrics = ctx.measureText(labelText); 146 | const textHeight = 20; // Approximate height of the text 147 | const padding = 4; 148 | 149 | // Draw semi-transparent background for text 150 | ctx.fillStyle = "rgba(0, 0, 0, 0.5)"; 151 | ctx.fillRect( 152 | x1 - padding, 153 | y1 - textHeight - padding, 154 | textMetrics.width + padding * 2, 155 | textHeight + padding * 2, 156 | ); 157 | 158 | // Draw text 159 | ctx.fillStyle = "white"; 160 | ctx.fillText(labelText, x1, y1 - 5); 161 | } 162 | }); 163 | }; 164 | 165 | image.src = `data:image/png;base64,${currentResult.files[0][1]}`; 166 | }, [detectionResults, currentIndex, threshold]); 167 | 168 | if (!detectionResults || detectionResults.length === 0) { 169 | return
No results to visualize
; 170 | } 171 | 172 | const currentResult = detectionResults[currentIndex]; 173 | 174 | return ( 175 |
176 |
177 |

178 | Function: {currentResult.request.function_name} 179 |

180 |

181 | Prompt:{" "} 182 | {Array.isArray(currentResult.request.prompts) 183 | ? currentResult.request.prompts.join(", ") 184 | : currentResult.request.prompts} 185 |

186 | 187 |
188 | 191 | 201 |
202 |
203 | 204 |
205 | {detectionResults.length > 1 && ( 206 | 212 | )} 213 | 214 | {currentResult.files[0][0] === "video" ? ( 215 |
233 | 234 |
235 |

236 | Image {currentIndex + 1} of {detectionResults.length} 237 |

238 |
239 | 240 |
241 | 249 |
250 |
251 | ); 252 | }; 253 | 254 | export { VisualizerHiL }; 255 | -------------------------------------------------------------------------------- /examples/chat/chat-app/components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "default", 4 | "rsc": true, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.ts", 8 | "css": "src/app/globals.css", 9 | "baseColor": "neutral", 10 | "cssVariables": true, 11 | "prefix": "" 12 | }, 13 | "aliases": { 14 | "components": "@/components", 15 | "utils": "@/lib/utils", 16 | "ui": "@/components/ui", 17 | "lib": "@/lib", 18 | "hooks": "@/hooks" 19 | } 20 | } -------------------------------------------------------------------------------- /examples/chat/chat-app/next-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /// 3 | 4 | // NOTE: This file should not be edited 5 | // see https://nextjs.org/docs/app/api-reference/config/typescript for more information. 6 | -------------------------------------------------------------------------------- /examples/chat/chat-app/next.config.ts: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | const dotenv = require('dotenv'); 3 | 4 | // Manually load the .env file from the parent directory 5 | dotenv.config({ path: path.resolve(__dirname, '..', '.env') }); 6 | 7 | module.exports = { 8 | reactStrictMode: true, 9 | env: { 10 | PORT_FRONTEND: process.env.PORT_FRONTEND, 11 | PORT_BACKEND: process.env.PORT_BACKEND, 12 | }, 13 | }; -------------------------------------------------------------------------------- /examples/chat/chat-app/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "chat-app", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@radix-ui/react-collapsible": "^1.1.8", 13 | "@radix-ui/react-scroll-area": "^1.2.6", 14 | "@radix-ui/react-slot": "^1.2.0", 15 | "@radix-ui/react-tabs": "^1.1.9", 16 | "@tailwindcss/postcss": "^4.1.4", 17 | "class-variance-authority": "^0.7.1", 18 | "clsx": "^2.1.1", 19 | "concurrently": "^9.1.2", 20 | "dotenv": "^16.5.0", 21 | "lucide-react": "^0.503.0", 22 | "next": "^15.3.1", 23 | "prismjs": "^1.30.0", 24 | "react": "^19.1.0", 25 | "react-dom": "^19.1.0", 26 | "react-markdown": "^10.1.0", 27 | "react-syntax-highlighter": "^15.6.1", 28 | "rehype-highlight": "^7.0.2", 29 | "remark-gfm": "^4.0.1", 30 | "tailwind-merge": "^3.2.0", 31 | "tailwind-scrollbar": "^4.0.2", 32 | "tailwindcss-animate": "^1.0.7", 33 | "zeromq": "^6.4.2" 34 | }, 35 | "devDependencies": { 36 | "@types/node": "^22", 37 | "@types/prismjs": "^1.26.5", 38 | "@types/react": "^19", 39 | "@types/react-dom": "^19", 40 | "@types/react-syntax-highlighter": "^15.5.13", 41 | "eslint": "^9", 42 | "eslint-config-next": "15.3.1", 43 | "postcss": "^8.5.3", 44 | "tailwindcss": "^4.1.4", 45 | "typescript": "^5" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /examples/chat/chat-app/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | "@tailwindcss/postcss": {}, 4 | } 5 | } -------------------------------------------------------------------------------- /examples/chat/chat-app/public/file.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/chat/chat-app/public/globe.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/chat/chat-app/public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/chat/chat-app/public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/chat/chat-app/public/window.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/chat/chat-app/src/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/landing-ai/vision-agent/0539e545078a49dec422fba5656b80d6b3734197/examples/chat/chat-app/src/app/favicon.ico -------------------------------------------------------------------------------- /examples/chat/chat-app/src/app/fonts/GeistMonoVF.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/landing-ai/vision-agent/0539e545078a49dec422fba5656b80d6b3734197/examples/chat/chat-app/src/app/fonts/GeistMonoVF.woff -------------------------------------------------------------------------------- /examples/chat/chat-app/src/app/fonts/GeistVF.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/landing-ai/vision-agent/0539e545078a49dec422fba5656b80d6b3734197/examples/chat/chat-app/src/app/fonts/GeistVF.woff -------------------------------------------------------------------------------- /examples/chat/chat-app/src/app/globals.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss"; 2 | @tailwind base; 3 | @tailwind components; 4 | @tailwind utilities; 5 | 6 | .markdown > * { 7 | all: revert; 8 | } 9 | 10 | /* For links */ 11 | .markdown a { 12 | color: #3b82f6; /* Tailwind's blue-500 */ 13 | text-decoration: underline; 14 | } 15 | 16 | .markdown a:hover { 17 | color: #1d4ed8; /* Tailwind's blue-700 */ 18 | } 19 | 20 | body { 21 | font-family: Arial, Helvetica, sans-serif; 22 | } 23 | 24 | @layer base { 25 | :root { 26 | --background: 0 0% 100%; 27 | --foreground: 0 0% 3.9%; 28 | --card: 0 0% 100%; 29 | --card-foreground: 0 0% 3.9%; 30 | --popover: 0 0% 100%; 31 | --popover-foreground: 0 0% 3.9%; 32 | --primary: 0 0% 9%; 33 | --primary-foreground: 0 0% 98%; 34 | --secondary: 0 0% 96.1%; 35 | --secondary-foreground: 0 0% 9%; 36 | --muted: 0 0% 96.1%; 37 | --muted-foreground: 0 0% 45.1%; 38 | --accent: 0 0% 96.1%; 39 | --accent-foreground: 0 0% 9%; 40 | --destructive: 0 84.2% 60.2%; 41 | --destructive-foreground: 0 0% 98%; 42 | --border: 0 0% 89.8%; 43 | --input: 0 0% 89.8%; 44 | --ring: 0 0% 3.9%; 45 | --chart-1: 12 76% 61%; 46 | --chart-2: 173 58% 39%; 47 | --chart-3: 197 37% 24%; 48 | --chart-4: 43 74% 66%; 49 | --chart-5: 27 87% 67%; 50 | --radius: 0.5rem; 51 | } 52 | .dark { 53 | --background: 0 0% 3.9%; 54 | --foreground: 0 0% 98%; 55 | --card: 0 0% 3.9%; 56 | --card-foreground: 0 0% 98%; 57 | --popover: 0 0% 3.9%; 58 | --popover-foreground: 0 0% 98%; 59 | --primary: 0 0% 98%; 60 | --primary-foreground: 0 0% 9%; 61 | --secondary: 0 0% 14.9%; 62 | --secondary-foreground: 0 0% 98%; 63 | --muted: 0 0% 14.9%; 64 | --muted-foreground: 0 0% 63.9%; 65 | --accent: 0 0% 14.9%; 66 | --accent-foreground: 0 0% 98%; 67 | --destructive: 0 62.8% 30.6%; 68 | --destructive-foreground: 0 0% 98%; 69 | --border: 0 0% 14.9%; 70 | --input: 0 0% 14.9%; 71 | --ring: 0 0% 83.1%; 72 | --chart-1: 220 70% 50%; 73 | --chart-2: 160 60% 45%; 74 | --chart-3: 30 80% 55%; 75 | --chart-4: 280 65% 60%; 76 | --chart-5: 340 75% 55%; 77 | } 78 | } 79 | 80 | @layer base { 81 | * { 82 | @reference border-border; 83 | } 84 | body { 85 | @reference bg-background text-foreground; 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /examples/chat/chat-app/src/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from "next"; 2 | import localFont from "next/font/local"; 3 | import "./globals.css"; 4 | 5 | const geistSans = localFont({ 6 | src: "./fonts/GeistVF.woff", 7 | variable: "--font-geist-sans", 8 | weight: "100 900", 9 | }); 10 | const geistMono = localFont({ 11 | src: "./fonts/GeistMonoVF.woff", 12 | variable: "--font-geist-mono", 13 | weight: "100 900", 14 | }); 15 | 16 | export const metadata: Metadata = { 17 | title: "Create Next App", 18 | description: "Generated by create next app", 19 | }; 20 | 21 | export default function RootLayout({ 22 | children, 23 | }: Readonly<{ 24 | children: React.ReactNode; 25 | }>) { 26 | return ( 27 | 28 | 31 | {children} 32 | 33 | 34 | ); 35 | } 36 | -------------------------------------------------------------------------------- /examples/chat/chat-app/src/app/page.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { useState } from "react"; 4 | import { ChatSection } from "@/components/ChatSection"; 5 | import { PreviewSection } from "@/components/PreviewSection"; 6 | import { Polygon } from "@/components/PolygonDrawer"; 7 | 8 | export default function Component() { 9 | const [uploadedFile, setUploadedFile] = useState(null); 10 | const handleFileUpload = (file: string) => setUploadedFile(file); 11 | 12 | const [uploadedImage, setUploadedMedia] = useState(null); 13 | const handleMediaUpload = (image: string) => setUploadedMedia(image); 14 | 15 | const [uploadedResult, setUploadedResult] = useState(null); 16 | const handleResultUpload = (result: string) => setUploadedResult(result); 17 | 18 | const [polygons, setPolygons] = useState([]); 19 | const handlePolygonChange = (polygons: Polygon[]) => setPolygons(polygons); 20 | 21 | return ( 22 |
23 | 32 | 38 |
39 | ); 40 | } 41 | -------------------------------------------------------------------------------- /examples/chat/chat-app/src/components/GroupedVisualizer.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import React, { useEffect, useMemo, useState } from "react"; 4 | import { ChevronLeft, ChevronRight, ChevronDown } from "lucide-react"; 5 | import { Detection, DetectionItem } from "./types"; 6 | import { ImageVisualizer } from "./ImageVisualizer"; // Your image canvas component 7 | import { VideoVisualizer } from "./VideoVisualizer"; // Your video visualizer component 8 | 9 | 10 | // --- Group Type --- 11 | // Each group contains all items that share the same function_name. 12 | interface GroupedDetection { 13 | functionName: string; 14 | items: DetectionItem[]; 15 | } 16 | 17 | interface VisualizerProps { 18 | detectionResults: DetectionItem[]; 19 | onSubmit?: (functionName: string, boxThreshold: number) => void; 20 | } 21 | 22 | // --- GroupedVisualizer Component --- 23 | 24 | const GroupedVisualizer: React.FC = ({ 25 | detectionResults, 26 | onSubmit, 27 | }) => { 28 | // 1. Group detectionResults by function_name. 29 | const groups: GroupedDetection[] = useMemo(() => { 30 | const groupMap: Record = {}; 31 | detectionResults.forEach((item) => { 32 | const fn = item.request.function_name; 33 | if (!groupMap[fn]) { 34 | groupMap[fn] = []; 35 | } 36 | groupMap[fn].push(item); 37 | }); 38 | return Object.entries(groupMap).map(([functionName, items]) => ({ 39 | functionName, 40 | items, 41 | })); 42 | }, [detectionResults]); 43 | 44 | // 2. Maintain state for the currently active group (across different function_names) 45 | const [currentGroupIndex, setCurrentGroupIndex] = useState(0); 46 | 47 | // 3. Maintain state for the currently selected image index within each group. 48 | // The key is the group’s function name. 49 | const [selectedIndices, setSelectedIndices] = useState>( 50 | {} 51 | ); 52 | 53 | // When groups change, initialize the selectedIndices for each group to zero. 54 | useEffect(() => { 55 | const initialIndices: Record = {}; 56 | groups.forEach((group) => { 57 | initialIndices[group.functionName] = 0; 58 | }); 59 | setSelectedIndices(initialIndices); 60 | }, [groups]); 61 | 62 | // 4. Global threshold state. 63 | const [threshold, setThreshold] = useState(0.05); 64 | 65 | // 5. Determine the current group and current item. 66 | const currentGroup = groups[currentGroupIndex]; 67 | const currentItem = 68 | currentGroup.items[selectedIndices[currentGroup.functionName] ?? 0]; 69 | 70 | // 6. Navigation handlers: 71 | 72 | // For switching groups (different function_names) 73 | const handlePreviousGroup = () => { 74 | setCurrentGroupIndex((prev) => (prev - 1 + groups.length) % groups.length); 75 | }; 76 | 77 | const handleNextGroup = () => { 78 | setCurrentGroupIndex((prev) => (prev + 1) % groups.length); 79 | }; 80 | 81 | // For cycling images within the same group. 82 | const handleNextImageInGroup = () => { 83 | setSelectedIndices((prev) => { 84 | const currentIndex = prev[currentGroup.functionName] ?? 0; 85 | const nextIndex = (currentIndex + 1) % currentGroup.items.length; 86 | return { ...prev, [currentGroup.functionName]: nextIndex }; 87 | }); 88 | }; 89 | 90 | return ( 91 |
92 | {/* Group Info and Threshold */} 93 |
94 |

95 | Function: {currentGroup.functionName} 96 |

97 |

98 | Prompt:{" "} 99 | {Array.isArray(currentItem.request.prompts) 100 | ? currentItem.request.prompts.join(", ") 101 | : currentItem.request.prompts || currentItem.request.prompt} 102 |

103 |
104 | 107 | setThreshold(parseFloat(e.target.value))} 115 | className="w-full" 116 | /> 117 |
118 |
119 | 120 | {/* Visualization Area */} 121 |
122 | {/* Left/Right Buttons: Navigate between groups (different function_names) */} 123 | {groups.length > 1 && ( 124 | <> 125 | 132 | 139 | 140 | )} 141 | 142 | {/* Render image or video visualizer */} 143 | {currentItem.files[0][0] === "video" ? ( 144 | 151 | ) : ( 152 | 153 | )} 154 | 155 | {/* Down Arrow: Cycle within images/videos of the same group */} 156 | {currentGroup.items.length > 1 && ( 157 | 164 | )} 165 |
166 | 167 | {/* Navigation Info */} 168 |
169 |

170 | Tool {currentGroupIndex + 1} of {groups.length} — Tool Media{" "} 171 | {(selectedIndices[currentGroup.functionName] ?? 0) + 1} of{" "} 172 | {currentGroup.items.length} 173 |

174 |
175 | 176 | {/* Submit/Choose Button */} 177 |
178 | 186 |
187 |
188 | ); 189 | }; 190 | 191 | export { GroupedVisualizer }; 192 | -------------------------------------------------------------------------------- /examples/chat/chat-app/src/components/ImageVisualizer.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import React, { useEffect, useRef } from "react"; 4 | import { Detection, DetectionItem } from "./types"; 5 | import { drawBoundingBox } from "./utils"; 6 | 7 | // (Re-use your Detection and DetectionItem types and drawBoundingBox function here) 8 | 9 | interface ImageVisualizerProps { 10 | detectionItem: DetectionItem; 11 | threshold: number; 12 | } 13 | 14 | const ImageVisualizer: React.FC = ({ 15 | detectionItem, 16 | threshold, 17 | }) => { 18 | const canvasRef = useRef(null); 19 | 20 | useEffect(() => { 21 | // Skip if this is a video. 22 | if (detectionItem.files[0][0] === "video") return; 23 | const canvas = canvasRef.current; 24 | if (!canvas) return; 25 | const ctx = canvas.getContext("2d"); 26 | if (!ctx) return; 27 | 28 | const image = new Image(); 29 | image.onload = () => { 30 | canvas.width = image.width; 31 | canvas.height = image.height; 32 | ctx.clearRect(0, 0, canvas.width, canvas.height); 33 | ctx.drawImage(image, 0, 0); 34 | 35 | if (typeof detectionItem.response.data === "string") { 36 | // Draw response string (for text-based responses). 37 | const fontSize = Math.min(canvas.width, canvas.height) * 0.05; 38 | ctx.font = `${fontSize}px Arial`; 39 | 40 | // Text wrapping configuration 41 | const maxWidth = canvas.width - 40; // Padding on both sides 42 | const lineHeight = fontSize * 1.2; 43 | const padding = 20; 44 | 45 | // Wrap text into lines 46 | const words = detectionItem.response.data.split(' '); 47 | const lines: string[] = []; 48 | let currentLine = words[0]; 49 | 50 | for (let i = 1; i < words.length; i++) { 51 | const testLine = currentLine + ' ' + words[i]; 52 | const metrics = ctx.measureText(testLine); 53 | if (metrics.width > maxWidth) { 54 | lines.push(currentLine); 55 | currentLine = words[i]; 56 | } else { 57 | currentLine = testLine; 58 | } 59 | } 60 | lines.push(currentLine); 61 | 62 | // Calculate background height based on number of lines 63 | const bgHeight = (lines.length * lineHeight) + (padding * 2); 64 | 65 | // Draw background 66 | ctx.fillStyle = "rgba(0, 0, 0, 0.7)"; 67 | ctx.fillRect(10, 10, canvas.width - 20, bgHeight); 68 | 69 | // Draw text lines 70 | ctx.fillStyle = "white"; 71 | lines.forEach((line, i) => { 72 | ctx.fillText(line, padding, padding + (i + 1) * lineHeight); 73 | }); 74 | } else if (Array.isArray(detectionItem.response.data)) { 75 | // For images, assume response.data is an array of Detection. 76 | (detectionItem.response.data as Detection[]) 77 | .filter((detection) => detection.score >= threshold) 78 | .forEach((detection) => { 79 | // Draw mask if available (only for images). 80 | if ( 81 | detection.mask && 82 | detection.mask.counts && 83 | detection.mask.size 84 | ) { 85 | const [height, width] = detection.mask.size; 86 | const counts = detection.mask.counts; 87 | 88 | const tempCanvas = document.createElement("canvas"); 89 | tempCanvas.width = width; 90 | tempCanvas.height = height; 91 | const tmpCtx = tempCanvas.getContext("2d"); 92 | if (!tmpCtx) return; 93 | 94 | const bitmap = new Uint8Array(width * height); 95 | let pixelIndex = 0; 96 | let isOne = false; 97 | 98 | for (const count of counts) { 99 | for (let i = 0; i < count; i++) { 100 | if (pixelIndex < bitmap.length) { 101 | // Convert from row-major to column-major order. 102 | const x = Math.floor(pixelIndex / height); 103 | const y = pixelIndex % height; 104 | const newIndex = y * width + x; 105 | if (newIndex < bitmap.length) { 106 | bitmap[newIndex] = isOne ? 1 : 0; 107 | } 108 | pixelIndex++; 109 | } 110 | } 111 | isOne = !isOne; 112 | } 113 | 114 | const imageData = tmpCtx.createImageData(width, height); 115 | for (let i = 0; i < bitmap.length; i++) { 116 | const offset = i * 4; 117 | if (bitmap[i] === 1) { 118 | imageData.data[offset] = 255; 119 | imageData.data[offset + 1] = 0; 120 | imageData.data[offset + 2] = 0; 121 | imageData.data[offset + 3] = 170; 122 | } 123 | } 124 | tmpCtx.putImageData(imageData, 0, 0); 125 | 126 | ctx.save(); 127 | ctx.globalCompositeOperation = "source-over"; 128 | ctx.drawImage(tempCanvas, 0, 0, width, height); 129 | ctx.restore(); 130 | } 131 | 132 | // (If needed, draw the mask here as in your original code.) 133 | drawBoundingBox(ctx, detection); 134 | }); 135 | } 136 | }; 137 | image.src = `data:image/png;base64,${detectionItem.files[0][1]}`; 138 | }, [detectionItem, threshold]); 139 | 140 | return ; 141 | }; 142 | 143 | export { ImageVisualizer }; 144 | -------------------------------------------------------------------------------- /examples/chat/chat-app/src/components/PreviewSection.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { useState } from "react"; 4 | import { ScrollArea } from "@/components/ui/scroll-area"; 5 | import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; 6 | import { Card } from "@/components/ui/card"; 7 | import { Prism as SyntaxHighlighter } from "react-syntax-highlighter"; 8 | import { gruvboxLight } from "react-syntax-highlighter/dist/esm/styles/prism"; 9 | import { PolygonDrawer, Polygon } from "@/components/PolygonDrawer"; 10 | 11 | interface PreviewSectionProps { 12 | uploadedMedia: string | null; 13 | uploadedFile: string | null; 14 | uploadedResult: string | null; 15 | onPolygonsChange: (polygons: Polygon[]) => void; 16 | } 17 | 18 | interface File { 19 | name: string; 20 | content: string; 21 | type: "code" | "image"; 22 | } 23 | 24 | export function PreviewSection({ 25 | uploadedMedia, 26 | uploadedFile, 27 | uploadedResult, 28 | onPolygonsChange, 29 | }: PreviewSectionProps) { 30 | return ( 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | Media 41 | 42 | 43 | 44 | 45 | 46 | 47 | Code 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | Result 57 | 58 | 59 | 60 | 61 | 62 |
63 | {uploadedMedia ? ( 64 | 68 | ) : ( 69 |
70 | 71 | 72 | 73 | 74 | 75 |

No media uploaded yet.

76 |

Upload media to begin annotation.

77 |
78 | )} 79 |
80 |
81 |
82 | 83 | 84 | 85 |
86 | {uploadedFile ? ( 87 | 100 | {uploadedFile || ""} 101 | 102 | ) : ( 103 |
104 | 105 | 106 | 107 | 108 |

No code uploaded yet.

109 |

Ask VisionAgent a question and wait for it to generate code.

110 |
111 | )} 112 |
113 |
114 |
115 | 116 | 117 | 118 |
119 | {uploadedResult ? ( 120 | Uploaded 125 | ) : ( 126 |
127 | 128 | 129 | 130 | 131 | 132 | 133 |

No result uploaded yet.

134 |

Results will appear here after processing.

135 |
136 | )} 137 |
138 |
139 |
140 |
141 |
142 | ); 143 | } -------------------------------------------------------------------------------- /examples/chat/chat-app/src/components/VideoVisualizer.tsx: -------------------------------------------------------------------------------- 1 | import { Detection } from "./types"; 2 | import { useRef, useEffect } from "react"; 3 | import { drawBoundingBox } from "./utils"; 4 | 5 | interface VideoVisualizerProps { 6 | videoSrc: string; // Base64 video source (with data URI prefix) 7 | detections: Detection[][] | string; // Allow both array of detections or string response 8 | threshold: number; 9 | fps?: number; 10 | } 11 | 12 | // --- VideoVisualizer Component --- 13 | // This component renders a