├── .github
    └── workflows
    │   ├── jekyll-gh-pages.yml
    │   ├── mcp_release.yml
    │   ├── update_docs_context.yml
    │   ├── update_examples_context.yml
    │   ├── update_master_context.yml
    │   └── update_sdk_context.yml
├── .gitignore
├── .python-version
├── README.md
├── config.yaml
├── context
    ├── __init__.py
    ├── count_tokens.py
    ├── docs
    │   ├── crawl_coda_tree.py
    │   ├── doc_tree.json
    │   ├── docs_context.md
    │   ├── fragments
    │   │   ├── advanced_visual_search_pipelines_82.txt
    │   │   ├── callback_details_66.txt
    │   │   ├── collections_68.txt
    │   │   ├── custom_annotations_81.txt
    │   │   ├── deep_dive_into_prompt_engineering_mastering_video_scene_indexing_93.txt
    │   │   ├── guide_subtitles_73.txt
    │   │   ├── how_accurate_is_your_search_88.txt
    │   │   ├── index.txt
    │   │   ├── language_support_79.txt
    │   │   ├── playground_for_scene_extractions_83.txt
    │   │   ├── public_collections_102.txt
    │   │   ├── quick_start_guide_38.txt
    │   │   ├── ref_subtitle_styles_57.txt
    │   │   ├── scene_extraction_algorithms_84.txt
    │   │   ├── scene_level_metadata_smarter_video_search_retrieval_107.txt
    │   │   ├── semantic_search_89.txt
    │   │   └── video_indexing_guide_101.txt
    │   └── process_docs.py
    ├── examples
    │   ├── __init__.py
    │   ├── examples_context.md
    │   ├── fragments
    │   │   ├── Cleanup.txt
    │   │   ├── Multimodal_Quickstart.txt
    │   │   ├── Scene_Index_QuickStart.txt
    │   │   ├── Subtitle.txt
    │   │   ├── TextAsset.txt
    │   │   ├── VideoDB_Quickstart.txt
    │   │   └── scene_level_metadata_indexing.txt
    │   └── process_examples.py
    ├── instructions
    │   └── prompt.md
    ├── llms-full.md
    ├── llms-full.txt
    ├── llms.md
    ├── llms.txt
    ├── merge_llms_full_txt.py
    ├── merge_llms_txt.py
    ├── prompts
    │   ├── custom_1.txt
    │   ├── custom_2.txt
    │   ├── default_docs.txt
    │   ├── default_ipynb.txt
    │   ├── refine_docs.txt
    │   └── refine_ipynb.txt
    ├── sdk
    │   ├── context
    │   │   └── index.md
    │   └── sphinx_config
    │   │   ├── conf.py
    │   │   └── index.rst
    └── utils.py
├── modelcontextprotocol
    ├── .python-version
    ├── Dockerfile
    ├── README.md
    ├── pyproject.toml
    ├── smithery.yaml
    ├── uv.lock
    └── videodb_director_mcp
    │   ├── __init__.py
    │   ├── cli_commands.py
    │   ├── constants.py
    │   └── main.py
├── pyproject.toml
├── readme_shields.json
├── token_breakdown.png
└── uv.lock


/.github/workflows/jekyll-gh-pages.yml:
--------------------------------------------------------------------------------
 1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages
 2 | name: Deploy Jekyll with GitHub Pages dependencies preinstalled
 3 | 
 4 | on:
 5 |   # Runs on pushes targeting the default branch
 6 |   push:
 7 |     branches: ["main"]
 8 | 
 9 |   # Allows you to run this workflow manually from the Actions tab
10 |   workflow_dispatch:
11 | 
12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13 | permissions:
14 |   contents: read
15 |   pages: write
16 |   id-token: write
17 | 
18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
20 | concurrency:
21 |   group: "pages"
22 |   cancel-in-progress: true
23 | 
24 | jobs:
25 |   # Build job
26 |   build:
27 |     runs-on: ubuntu-latest
28 |     steps:
29 |       - name: Checkout
30 |         uses: actions/checkout@v4
31 |       - name: Setup Pages
32 |         uses: actions/configure-pages@v5
33 |       - name: Build with Jekyll
34 |         uses: actions/jekyll-build-pages@v1
35 |         with:
36 |           source: ./
37 |           destination: ./_site
38 |       - name: Upload artifact
39 |         uses: actions/upload-pages-artifact@v3
40 | 
41 |   # Deployment job
42 |   deploy:
43 |     environment:
44 |       name: github-pages
45 |       url: ${{ steps.deployment.outputs.page_url }}
46 |     runs-on: ubuntu-latest
47 |     needs: build
48 |     steps:
49 |       - name: Deploy to GitHub Pages
50 |         id: deployment
51 |         uses: actions/deploy-pages@v4
52 | 


--------------------------------------------------------------------------------
/.github/workflows/mcp_release.yml:
--------------------------------------------------------------------------------
 1 | name: MCP Release
 2 | 
 3 | on:
 4 |   workflow_dispatch: {}
 5 | 
 6 | jobs:
 7 |   python-build:
 8 |     name: Build for PyPi
 9 |     runs-on: ubuntu-latest
10 |     environment: pypi
11 | 
12 |     defaults:
13 |       run:
14 |         working-directory: ./modelcontextprotocol
15 |     steps:
16 |       - name: Checkout
17 |         uses: actions/checkout@v4
18 | 
19 |       - name: Set up Python
20 |         uses: actions/setup-python@v4
21 |         with:
22 |           python-version: "3.12"
23 | 
24 |       - name: Install dependencies
25 |         run: |
26 |           python -m venv venv
27 |           source venv/bin/activate
28 |           python -m pip install --upgrade pip build twine
29 | 
30 |       - name: Build package
31 |         run: |
32 |           source venv/bin/activate
33 |           rm -rf build dist *.egg-info
34 |           python -m build
35 |           python -m twine check dist/*
36 | 
37 |       - name: Upload artifact
38 |         uses: actions/upload-artifact@v4
39 |         with:
40 |           name: release-dists
41 |           path: ./modelcontextprotocol/dist/
42 | 
43 |   python-release:
44 |     name: Publish to PyPi
45 |     runs-on: ubuntu-latest
46 |     environment: pypi
47 |     needs:
48 |       - python-build
49 | 
50 |     defaults:
51 |       run:
52 |         working-directory: ./modelcontextprotocol
53 | 
54 |     permissions:
55 |       id-token: write
56 | 
57 |     steps:
58 |       - name: Retrieve distribution
59 |         uses: actions/download-artifact@v4
60 |         with:
61 |           name: release-dists
62 |           path: dist/
63 | 
64 |       - name: Publish package distributions to PyPI
65 |         uses: pypa/gh-action-pypi-publish@release/v1
66 | 


--------------------------------------------------------------------------------
/.github/workflows/update_docs_context.yml:
--------------------------------------------------------------------------------
  1 | name: Update Docs Context
  2 | 
  3 | on:
  4 |   workflow_dispatch: # Manually triggered via GitHub Actions UI
  5 | 
  6 | jobs:
  7 |   scrape-doc-tree:
  8 |     runs-on: ubuntu-latest
  9 | 
 10 |     permissions:
 11 |       contents: write
 12 | 
 13 |     steps:
 14 |       - name: Checkout Repository
 15 |         uses: actions/checkout@v4
 16 | 
 17 |       - name: Install yq
 18 |         run: |
 19 |           sudo apt-get update
 20 |           sudo apt-get install -y yq
 21 | 
 22 |       - name: Parse Config for Doc Tree Scraper
 23 |         id: config
 24 |         run: |
 25 |           SCRIPT=$(yq '.docs_context.doc_tree.scrape_config.script' config.yaml)
 26 |           OUTPUT=$(yq '.docs_context.doc_tree.scrape_config.output' config.yaml)
 27 |           URL=$(yq '.docs_context.doc_tree.scrape_config.url' config.yaml)
 28 |           SELECTOR=$(yq '.docs_context.doc_tree.scrape_config.selector' config.yaml)
 29 |           SELECTOR_VALUE=$(yq '.docs_context.doc_tree.scrape_config.selector_value' config.yaml)
 30 |           echo "script=$SCRIPT" >> $GITHUB_OUTPUT
 31 |           echo "output=$OUTPUT" >> $GITHUB_OUTPUT
 32 |           echo "url=$URL" >> $GITHUB_OUTPUT
 33 |           echo "selector=$SELECTOR" >> $GITHUB_OUTPUT
 34 |           echo "selector_value=$SELECTOR_VALUE" >> $GITHUB_OUTPUT
 35 | 
 36 |       - name: Set up Python
 37 |         uses: actions/setup-python@v4
 38 |         with:
 39 |           python-version: "3.9"
 40 | 
 41 |       - name: Create Virtual Environment and Install Dependencies
 42 |         run: |
 43 |           python -m venv venv
 44 |           source venv/bin/activate
 45 |           pip install --upgrade pip
 46 |           # Install dependencies from your pyproject.toml (assumed at repo root)
 47 |           pip install .
 48 | 
 49 |       - name: Run Doc Tree Scraper
 50 |         run: |
 51 |           source venv/bin/activate
 52 |           python ${{ steps.config.outputs.script }} ${{ steps.config.outputs.output }} \
 53 |             --url ${{ steps.config.outputs.url }} \
 54 |             --selector ${{ steps.config.outputs.selector }} \
 55 |             --selector-value ${{ steps.config.outputs.selector_value }}
 56 | 
 57 |       - name: Commit and Push Doc Tree
 58 |         run: |
 59 |           git config --global user.name "github-actions[bot]"
 60 |           git config --global user.email "github-actions[bot]@users.noreply.github.com"
 61 |           # Add the output file specified in config.yaml.
 62 |           git add . 
 63 |           git commit -m "Update doc tree with latest changes" || echo "No changes to commit."
 64 |           git push
 65 | 
 66 |   build-docs-context:
 67 |     needs: scrape-doc-tree
 68 |     runs-on: ubuntu-latest
 69 |     permissions:
 70 |       contents: write
 71 |       pull-requests: write
 72 |     env:
 73 |       PROJECT_ID: ${{ secrets.PROJECT_ID }}
 74 |       GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
 75 |       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
 76 |       FIRECRAWL_API_KEY: ${{ secrets.FIRECRAWL_API_KEY }}
 77 | 
 78 |     steps:
 79 |       - name: Checkout This Repo
 80 |         uses: actions/checkout@v4
 81 | 
 82 |       - name: Install yq
 83 |         run: |
 84 |           sudo apt-get update
 85 |           sudo apt-get install -y yq
 86 | 
 87 |       - name: Parse Configuration
 88 |         id: parse_config
 89 |         run: |
 90 |           # Read from config.yaml (examples_context) with fallback defaults
 91 |           CLONE_URL=$(yq '.docs_context.clone_url // "https://github.com/your-username/your-notebook-repo"' config.yaml)
 92 |           CLONE_DIR=$(yq '.docs_context.clone_dir // "examples_source"' config.yaml)
 93 |           SCRIPT_PATH=$(yq '.docs_context.script_path // "context_examples/process_examples.py"' config.yaml)
 94 |           BRANCH_NAME=$(yq '.docs_context.branch_name // "examples-md-update"' config.yaml)
 95 |           COMMIT_MESSAGE=$(yq '.docs_context.commit_message // "Add combined Markdown output for examples context"' config.yaml)
 96 |           OUTPUT_FILE=$(yq '.docs_context.output_file' config.yaml)
 97 |           OUTPUT_FRAGMENTS=$(yq '.docs_context.output_fragments' config.yaml)
 98 | 
 99 | 
100 |           echo "clone_url=$CLONE_URL" >> $GITHUB_OUTPUT
101 |           echo "clone_dir=$CLONE_DIR" >> $GITHUB_OUTPUT
102 |           echo "script_path=$SCRIPT_PATH" >> $GITHUB_OUTPUT
103 |           echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT
104 |           echo "commit_message=$COMMIT_MESSAGE" >> $GITHUB_OUTPUT
105 |           echo "output_file=$OUTPUT_FILE" >> $GITHUB_OUTPUT
106 |           echo "output_fragments=$OUTPUT_FRAGMENTS" >> $GITHUB_OUTPUT
107 | 
108 |       - name: Set up Python
109 |         uses: actions/setup-python@v4
110 |         with:
111 |           python-version: "3.9"
112 | 
113 |       - name: Create Virtual Environment
114 |         run: |
115 |           python -m venv venv
116 |           source venv/bin/activate
117 |           pip install --upgrade pip
118 | 
119 |       - name: Install Dependencies Using pyproject.toml
120 |         run: |
121 |           source venv/bin/activate
122 |           # Installs dependencies as specified in your pyproject.toml (assumed to be at repo root)
123 |           pip install .
124 | 
125 |       - name: Run Docs Processing
126 |         run: |
127 |           source venv/bin/activate
128 |           export PYTHONPATH=$PYTHONPATH:$(pwd)
129 |           python ${{ steps.parse_config.outputs.script_path }}
130 | 
131 |       - name: Remove Cloned Repository
132 |         run: rm -rf ${{ steps.parse_config.outputs.clone_dir }}
133 | 
134 |       - name: Commit and Push Combined MD
135 |         run: |
136 |           git config --global user.name "github-actions[bot]"
137 |           git config --global user.email "github-actions[bot]@users.noreply.github.com"
138 |           git checkout -b ${{ steps.parse_config.outputs.branch_name }}
139 |           git add ${{ steps.parse_config.outputs.output_file}}
140 |           git add ${{ steps.parse_config.outputs.output_fragments }}
141 |           git commit -m ${{ steps.parse_config.outputs.commit_message }}
142 |           git push --force --set-upstream origin ${{ steps.parse_config.outputs.branch_name }}
143 | 
144 |       - name: Create Pull Request
145 |         run: |
146 |           gh pr create \
147 |             --base main \
148 |             --head ${{ steps.parse_config.outputs.branch_name }} \
149 |             --title ${{ steps.parse_config.outputs.commit_message }} \
150 |             --body "This PR adds the simplified Markdown output from the example notebooks." || true
151 |         env:
152 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
153 | 


--------------------------------------------------------------------------------
/.github/workflows/update_examples_context.yml:
--------------------------------------------------------------------------------
  1 | name: Update Examples Context
  2 | 
  3 | on:
  4 |   workflow_dispatch: # Manually triggered via GitHub Actions UI
  5 |   repository_dispatch:
  6 |     types: [examples-context-update]
  7 | 
  8 | permissions:
  9 |   contents: write
 10 |   pull-requests: write
 11 | 
 12 | jobs:
 13 |   build-examples:
 14 |     runs-on: ubuntu-latest
 15 | 
 16 | 
 17 |     env:
 18 |       PROJECT_ID: ${{ secrets.PROJECT_ID }}
 19 |       GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
 20 |       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
 21 | 
 22 |     steps:
 23 |       - name: Checkout This Repo
 24 |         uses: actions/checkout@v4
 25 | 
 26 |       - name: Install yq
 27 |         run: |
 28 |           sudo apt-get update
 29 |           sudo apt-get install -y yq
 30 | 
 31 |       - name: Parse Configuration
 32 |         id: parse_config
 33 |         run: |
 34 |           # Read from config.yaml (examples_context) with fallback defaults
 35 |           CLONE_URL=$(yq '.examples_context.clone_url // "https://github.com/your-username/your-notebook-repo"' config.yaml)
 36 |           CLONE_DIR=$(yq '.examples_context.clone_dir // "examples_source"' config.yaml)
 37 |           SCRIPT_PATH=$(yq '.examples_context.script_path // "context_examples/process_examples.py"' config.yaml)
 38 |           BRANCH_NAME=$(yq '.examples_context.branch_name // "examples-md-update"' config.yaml)
 39 |           COMMIT_MESSAGE=$(yq '.examples_context.commit_message // "Add combined Markdown output for examples context"' config.yaml)
 40 |           OUTPUT_FILE=$(yq '.examples_context.output_file // "videodb_helper/context_examples/context/index_ipynb.md"' config.yaml)
 41 |           OUTPUT_FRAGMENTS=$(yq '.examples_context.output_fragments' config.yaml)
 42 | 
 43 | 
 44 |           echo "clone_url=$CLONE_URL" >> $GITHUB_OUTPUT
 45 |           echo "clone_dir=$CLONE_DIR" >> $GITHUB_OUTPUT
 46 |           echo "script_path=$SCRIPT_PATH" >> $GITHUB_OUTPUT
 47 |           echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT
 48 |           echo "commit_message=$COMMIT_MESSAGE" >> $GITHUB_OUTPUT
 49 |           echo "output_file=$OUTPUT_FILE" >> $GITHUB_OUTPUT
 50 |           echo "output_fragments=$OUTPUT_FRAGMENTS" >> $GITHUB_OUTPUT
 51 | 
 52 |       - name: Clone Examples Repo
 53 |         run: git clone ${{ steps.parse_config.outputs.clone_url }} ${{ steps.parse_config.outputs.clone_dir }}
 54 | 
 55 |       - name: Set up Python
 56 |         uses: actions/setup-python@v4
 57 |         with:
 58 |           python-version: "3.9"
 59 | 
 60 |       - name: Create Virtual Environment
 61 |         run: |
 62 |           python -m venv venv
 63 |           source venv/bin/activate
 64 |           pip install --upgrade pip
 65 | 
 66 |       - name: Install Dependencies Using pyproject.toml
 67 |         run: |
 68 |           source venv/bin/activate
 69 |           # Installs dependencies as specified in your pyproject.toml (assumed to be at repo root)
 70 |           pip install .
 71 | 
 72 |       - name: Run Examples Processing
 73 |         run: |
 74 |           source venv/bin/activate
 75 |           export PYTHONPATH=$PYTHONPATH:$(pwd)
 76 |           python ${{ steps.parse_config.outputs.script_path }}
 77 | 
 78 |       - name: Remove Cloned Repository
 79 |         run: rm -rf ${{ steps.parse_config.outputs.clone_dir }}
 80 | 
 81 |       - name: Commit and Push Combined MD
 82 |         run: |
 83 |           git config --global user.name "github-actions[bot]"
 84 |           git config --global user.email "github-actions[bot]@users.noreply.github.com"
 85 |           git checkout -b ${{ steps.parse_config.outputs.branch_name }}
 86 |           git add ${{ steps.parse_config.outputs.output_file }}
 87 |           git add ${{ steps.parse_config.outputs.output_fragments }}
 88 |           git commit -m ${{ steps.parse_config.outputs.commit_message }}
 89 |           git push --force --set-upstream origin ${{ steps.parse_config.outputs.branch_name }}
 90 | 
 91 |       - name: Create Pull Request
 92 |         run: |
 93 |           gh pr create \
 94 |             --base main \
 95 |             --head ${{ steps.parse_config.outputs.branch_name }} \
 96 |             --title ${{ steps.parse_config.outputs.commit_message }} \
 97 |             --body "This PR adds the simplified Markdown output from the example notebooks." || true
 98 |         env:
 99 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
100 | 


--------------------------------------------------------------------------------
/.github/workflows/update_master_context.yml:
--------------------------------------------------------------------------------
  1 | name: Update Master Markdown File and Tag
  2 | 
  3 | on:
  4 |   push:
  5 |     # Trigger when any markdown file is changed (adjust paths as needed)
  6 |     paths:
  7 |       - "**/*.md"
  8 |   workflow_dispatch:
  9 | 
 10 | jobs:
 11 |   update-master:
 12 |     runs-on: ubuntu-latest
 13 | 
 14 |     permissions:
 15 |       contents: write
 16 | 
 17 |     steps:
 18 |       - name: Checkout Repository
 19 |         uses: actions/checkout@v4
 20 | 
 21 |       - name: Install yq
 22 |         run: |
 23 |           sudo apt-get update
 24 |           sudo apt-get install -y yq
 25 | 
 26 |       - name: Parse Configuration
 27 |         id: parse_config
 28 |         run: |
 29 |           MERGE_SCRIPT_PATH=$(yq '.llms_full_txt_file.merge_script_path // "context_examples/process_examples.py"' config.yaml)
 30 |           COUNT_TOKENS_SCRIPT_PATH=$(yq '.token_count.script_path // "context_examples/process_examples.py"' config.yaml)
 31 | 
 32 |           echo "merge_script_path=$MERGE_SCRIPT_PATH" >> $GITHUB_OUTPUT
 33 |           echo "count_tokens_script_path=$COUNT_TOKENS_SCRIPT_PATH" >> $GITHUB_OUTPUT
 34 | 
 35 |       - name: Set up Python
 36 |         uses: actions/setup-python@v4
 37 |         with:
 38 |           python-version: "3.9"
 39 | 
 40 |       - name: Create Virtual Environment and Install Dependencies
 41 |         run: |
 42 |           python -m venv venv
 43 |           source venv/bin/activate
 44 |           pip install --upgrade pip
 45 |           pip install .
 46 | 
 47 |       - name: Run Master File Generation Script
 48 |         run: |
 49 |           source venv/bin/activate
 50 |           export PYTHONPATH=$PYTHONPATH:$(pwd)
 51 |           python ${{ steps.parse_config.outputs.merge_script_path}}
 52 |           python ${{ steps.parse_config.outputs.count_tokens_script_path }}
 53 | 
 54 |       - name: Commit and Push Master File
 55 |         run: |
 56 |           git config --global user.name "github-actions[bot]"
 57 |           git config --global user.email "github-actions[bot]@users.noreply.github.com"
 58 |           # Add the output file specified in config.yaml.
 59 |           git add . 
 60 |           git commit -m "Update master file with latest changes" || echo "No changes to commit."
 61 |           git push
 62 | 
 63 |       - name: Create New Tag (Minor Version Bump)
 64 |         run: |
 65 |           # Fetch all tags
 66 |               git fetch --tags
 67 | 
 68 |               # List all tags that match v<major>.<minor>, sort descending by version
 69 |               LATEST_TAG=$(git tag -l 'v[0-9]*.[0-9]*' --sort=-v:refname | head -n 1)
 70 | 
 71 |               # If none found, default to v0.0
 72 |               if [ -z "$LATEST_TAG" ]; then
 73 |                 LATEST_TAG="v0.0"
 74 |               fi
 75 | 
 76 |               echo "Latest matching tag: $LATEST_TAG"
 77 | 
 78 |               # Parse out major/minor from LATEST_TAG if it matches v#.#, else default to 0.0
 79 |               if [[ $LATEST_TAG =~ ^v([0-9]+)\.([0-9]+)$ ]]; then
 80 |                 MAJOR="${BASH_REMATCH[1]}"
 81 |                 MINOR="${BASH_REMATCH[2]}"
 82 |               else
 83 |                 MAJOR=0
 84 |                 MINOR=0
 85 |               fi
 86 | 
 87 |               # Start by incrementing the minor
 88 |               NEW_MINOR=$((MINOR+1))
 89 | 
 90 |               # If v<major>.<new_minor> already exists, keep incrementing until we find a free one
 91 |               while git rev-parse -q --verify "refs/tags/v${MAJOR}.${NEW_MINOR}" >/dev/null; do
 92 |                 NEW_MINOR=$((NEW_MINOR+1))
 93 |               done
 94 | 
 95 |               NEW_TAG="v${MAJOR}.${NEW_MINOR}"
 96 |               echo "Creating new tag: $NEW_TAG"
 97 | 
 98 |               git tag "$NEW_TAG"
 99 |               git push origin "$NEW_TAG"
100 | 


--------------------------------------------------------------------------------
/.github/workflows/update_sdk_context.yml:
--------------------------------------------------------------------------------
 1 | name: Update SDK Context
 2 | 
 3 | on:
 4 |   workflow_dispatch: # Manually triggered via GitHub Actions UI or via respository dispatch
 5 |   repository_dispatch:
 6 |     types: [sdk-context-update]
 7 | 
 8 | jobs:
 9 |   build-docs:
10 |     runs-on: ubuntu-latest
11 | 
12 |     permissions:
13 |       contents: write
14 |       pull-requests: write
15 | 
16 |     steps:
17 |       - name: Checkout Repository
18 |         uses: actions/checkout@v4
19 | 
20 |       - name: Install yq
21 |         run: |
22 |           sudo apt-get update
23 |           sudo apt-get install -y yq
24 | 
25 |       - name: Parse Configuration
26 |         id: parse_config
27 |         run: |
28 |           # Read from config.yaml (sdk_context) with fallback defaults
29 |           CLONE_URL=$(yq '.sdk_context.clone_url // "https://github.com/video-db/videodb-python"' config.yaml)
30 |           CLONE_DIR=$(yq '.sdk_context.clone_dir // "sdk_source"' config.yaml)
31 |           SPHINX_CONFIG_DIR=$(yq '.sdk_context.sphinx_config_dir // "sphinx_config"' config.yaml)
32 |           OUTPUT_DIR=$(yq '.sdk_context.output_dir // "sdk_build"' config.yaml)
33 |           COMMIT_MESSAGE=$(yq '.sdk_context.commit_message // "Add Sphinx markdown build output (sdk_build)"' config.yaml)
34 |           BRANCH_NAME=$(yq '.sdk_context.branch_name // "sdk-context-branch"' config.yaml)
35 | 
36 |           echo "clone_url=$CLONE_URL" >> $GITHUB_OUTPUT
37 |           echo "clone_dir=$CLONE_DIR" >> $GITHUB_OUTPUT
38 |           echo "sphinx_config_dir=$SPHINX_CONFIG_DIR" >> $GITHUB_OUTPUT
39 |           echo "output_dir=$OUTPUT_DIR" >> $GITHUB_OUTPUT
40 |           echo "commit_message=$COMMIT_MESSAGE" >> $GITHUB_OUTPUT
41 |           echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT
42 | 
43 |       - name: Clone SDK Repository
44 |         run: |
45 |           git clone ${{ steps.parse_config.outputs.clone_url }} ${{ steps.parse_config.outputs.clone_dir }}
46 | 
47 |       - name: Set up Python
48 |         uses: actions/setup-python@v4
49 |         with:
50 |           python-version: "3.9"
51 | 
52 |       - name: Create Virtual Environment
53 |         run: |
54 |           python -m venv venv
55 |           source venv/bin/activate
56 |           pip install --upgrade pip
57 | 
58 |       - name: Install Dependencies
59 |         run: |
60 |           source venv/bin/activate
61 |           pip install -r ${{ steps.parse_config.outputs.clone_dir }}/requirements.txt
62 |           pip install myst-parser sphinx sphinx-markdown-builder
63 | 
64 |       - name: Build Sphinx Docs
65 |         run: |
66 |           source venv/bin/activate
67 |           sphinx-build -b markdown \
68 |             ${{ steps.parse_config.outputs.sphinx_config_dir }} \
69 |             ${{ steps.parse_config.outputs.output_dir }}
70 | 
71 |       - name: Remove Cloned Repository
72 |         run: rm -rf ${{ steps.parse_config.outputs.clone_dir }}
73 | 
74 |       - name: Commit and Push SDK Build Folder
75 |         run: |
76 |           git config --global user.name "github-actions[bot]"
77 |           git config --global user.email "github-actions[bot]@users.noreply.github.com"
78 |           git checkout -b ${{ steps.parse_config.outputs.branch_name }}
79 | 
80 |           # Add only the generated output folder
81 |           git pull origin main
82 |           git add ${{ steps.parse_config.outputs.output_dir }}
83 |           git commit -m ${{ steps.parse_config.outputs.commit_message }}
84 | 
85 |           # Force push to new branch
86 |           git push --force --set-upstream origin ${{ steps.parse_config.outputs.branch_name }}
87 | 
88 |       - name: Create Pull Request
89 |         run: |
90 |           gh pr create \
91 |             --base main \
92 |             --head ${{ steps.parse_config.outputs.branch_name }} \
93 |             --title ${{ steps.parse_config.outputs.commit_message }} \
94 |             --body "This PR adds the latest markdown build output from Sphinx." || true
95 |         env:
96 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
97 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python-generated files
 2 | __pycache__/
 3 | *.py[oc]
 4 | build/
 5 | dist/
 6 | wheels/
 7 | *.egg-info
 8 | .DS_Store
 9 | **/.doctrees
10 | 
11 | # Virtual environments
12 | .venv
13 | .env
14 | venv/
15 | logs/
16 | results/
17 | runs/
18 | 
19 | 
20 | context/examples/source
21 | context/sdk/source


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.9
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Latest Number][token-length-shield]][token-length-url]
  2 | [![GitHub tag (latest SemVer)][tag-shield]][ tag-url]
  3 | [![Stars][stars-shield]][stars-url]
  4 | [![Issues][issues-shield]][issues-url]
  5 | 
  6 | <!-- PROJECT LOGO -->
  7 | <br />
  8 | <p align="center">
  9 |   <a href="https://videodb.io/">
 10 |     <img src="https://codaio.imgix.net/docs/_s5lUnUCIU/blobs/bl-RgjcFrrJjj/d3cbc44f8584ecd42f2a97d981a144dce6a66d83ddd5864f723b7808c7d1dfbc25034f2f25e1b2188e78f78f37bcb79d3c34ca937cbb08ca8b3da1526c29da9a897ab38eb39d084fd715028b7cc60eb595c68ecfa6fa0bb125ec2b09da65664a4f172c2f" alt="Logo" width="300" height="">
 11 |   </a>
 12 | 
 13 |   <h3 align="center">VideoDB Agent Toolkit</h3>
 14 | 
 15 |   <p align="center">
 16 |     AI Agent toolkit for VideoDB
 17 |     <br />
 18 |     <a href="https://videodb.io/llms.txt"><strong>llms.txt >></strong></a> 
 19 |     <a href="https://videodb.io/llms-full.txt"><strong>llms-full.txt</strong></a>  <br />
 20 |     <a href="https://videodb.io/mcp"><strong>MCP</strong></a>
 21 |     <br />
 22 |   </p>
 23 | </p>
 24 | 
 25 | # VideoDB Agent Toolkit
 26 | 
 27 | The VideoDB Agent Toolkit exposes VideoDB context to LLMs and agents. It enables integration to AI-driven IDEs like Cursor, chat agents like Claude Code etc. This toolkit automates context generation, maintenance, and discoverability. It auto-syncs SDK versions, docs, and examples and is distributed through MCP and `llms.txt` 
 28 | 
 29 | 
 30 | ## 🚀 Quick Overview
 31 | 
 32 | The toolkit offers context files designed for use with LLMs, structured around key components:
 33 | 
 34 | `llms-full.txt` — Comprehensive context for deep integration.
 35 | 
 36 | `llms.txt` — Lightweight metadata for quick discovery.
 37 | 
 38 | `MCP (Model Context Protocol)` — A standardized protocol.
 39 | 
 40 | These components leverage automated workflows to ensure your AI applications always operate with accurate, up-to-date context.
 41 | 
 42 | ## 📦 Toolkit Components
 43 | 
 44 | ### 1. llms-full.txt ([View »](https://videodb.io/llms-full.txt))
 45 | 
 46 | ---
 47 | 
 48 | `llms-full.txt` consolidates everything your LLM agent needs, including:
 49 | 
 50 | - Comprehensive VideoDB overview.
 51 | 
 52 | - Complete SDK usage instructions and documentation.
 53 | 
 54 | - Detailed integration examples and best practices.
 55 | 
 56 | **Real-world Examples:**
 57 | 
 58 | - [VideoDB's Director](https://chat.videodb.io) `code-assistant` agent ([View Implementation ](https://github.com/video-db/Director/blob/main/backend/director/agents/code_assitant.py))
 59 | - [VideoDB's Discord Bot](https://discord.com/invite/py9P639jGz) to power customer support and community help ([View Implementation ]())
 60 | - Integrate `llms-full.txt` directly into your LLM-powered workflows, agent systems, or AI coding environments.
 61 | 
 62 | ### 2. llms.txt ([View »](https://videodb.io/llms.txt))
 63 | 
 64 | ---
 65 | 
 66 | A streamlined file following the [Answer.AI llms.txt proposal](https://github.com/answerdotai/llms-txt). Ideal for quick metadata exposure and LLM discovery.
 67 | 
 68 | > **ℹ️ Recommendation**: Use `llms.txt` for lightweight discovery and metadata integration.  Use `llms-full.txt` for complete functionality.
 69 | 
 70 | ### 3. MCP (Model Context Protocol)
 71 | 
 72 | The VideoDB MCP Server connects with the Director backend framework, providing a single tool for many workflows. For development, it can be installed and used via uvx for isolated environments. For more details on MCPs, please visit [here](https://docs.videodb.io/add-videodb-mcp-server-in-clients-108)
 73 | 
 74 | **Install `uv`**
 75 | 
 76 | We need to install uv first.
 77 | 
 78 | For macOS/Linux:
 79 | ```
 80 | curl -LsSf https://astral.sh/uv/install.sh | sh
 81 | ```
 82 | For Windows:
 83 | 
 84 | ```
 85 | powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
 86 | ```
 87 | 
 88 | You can also visit the installation steps of `uv` for more details [here](https://docs.astral.sh/uv/getting-started/installation)
 89 | 
 90 | **Run the MCP Server**
 91 | 
 92 | You can run the MCP server using `uvx` using the following command
 93 | 
 94 | ```
 95 | uvx videodb-director-mcp --api-key=VIDEODB_API_KEY
 96 | ```
 97 | 
 98 | **Update VideoDB Director MCP package**
 99 | 
100 | To ensure you're using the latest version of the MCP server with `uvx`, start by clearing the cache:
101 | 
102 | ```
103 | uv cache clean
104 | ```
105 | 
106 | This command removes any outdated cached packages of `videodb-director-mcp`, allowing `uvx` to fetch the most recent version.
107 | 
108 | If you always want to use the latest version of the MCP server, update your command as follows:
109 | ```
110 | uvx videodb-director-mcp@latest --api-key=<VIDEODB_API_KEY>
111 | ```
112 | 
113 | <br/>
114 |        
115 | ## 🧠 Anatomy of LLM Context Files
116 | 
117 | LLM context files in VideoDB are modular, automatically generated, and continuously updated from multiple sources:
118 | 
119 | ### 🧩 Modular Structure:
120 | 
121 | - **Instructions** — Best practices and prompt guidelines [View »](https://github.com/video-db/agent-toolkit/blob/main/context/instructions/prompt.md)
122 | 
123 | - **SDK Context** — SDK structure, classes, and interface definitions [View »](https://github.com/video-db/agent-toolkit/blob/main/context/sdk/context/index.md)
124 | 
125 | - **Docs Context** — Summarized product documentation [View »](https://github.com/video-db/agent-toolkit/blob/main/context/docs/docs_context.md)
126 | 
127 | - **Examples Context** — Real-world notebook examples [View »](https://github.com/video-db/agent-toolkit/blob/main/context/examples/examples_context.md)
128 | <img src="./token_breakdown.png" alt="Token Breakdown" width="400"/>
129 | 
130 | 
131 | 
132 | ### Automated Maintenance:
133 | - Managed through GitHub Actions for automated updates.
134 | - Triggered by changes to SDK repositories, documentation, or examples.
135 | - Maintained centrally via a [`config.yaml`](https://github.com/video-db/agent-toolkit/blob/readme-refactor/config.yaml) file.
136 | 
137 | ---
138 | 
139 | ## 🛠️ Automation with GitHub Actions
140 | 
141 | Automatic context generation ensures your applications always have the latest information:
142 | 
143 | ### 🔹 SDK Context Workflow ([View](https://github.com/video-db/agent-toolkit/blob/main/.github/workflows/update_sdk_context.yml))
144 | - **Automatically generates documentation** from SDK repo updates.
145 | - Uses [Sphinx](https://www.sphinx-doc.org/en/master/) for Python SDKs.
146 | 
147 | ### 🔹 Docs Context Workflow ([View](https://github.com/video-db/agent-toolkit/blob/main/.github/workflows/update_docs_context.yml))
148 | - **Scrapes and summarizes documentation** using [FireCrawl](https://www.firecrawl.dev/) and LLM-powered summarization.
149 | 
150 | ### 🔹 Examples Context Workflow ([View](https://github.com/video-db/agent-toolkit/blob/main/.github/workflows/update_examples_context.yml))
151 | - Converts and summarizes notebooks into practical context examples.
152 | 
153 | ### 🔹 Master Context Workflow ([View](https://github.com/video-db/agent-toolkit/blob/main/.github/workflows/update_master_context.yml))
154 | - Combines all sub-components into unified `llms-full.txt`.
155 | - Generates standards-compliant `llms.txt`.
156 | - Updates documentation with token statistics for transparency.
157 | 
158 | ---
159 | 
160 | 
161 | ## 🛠️ Customization via `config.yaml`
162 | 
163 | The [`config.yaml`](https://github.com/video-db/agent-toolkit/blob/readme-refactor/config.yaml) file centralizes all configurations, allowing easy customization:
164 | 
165 | - **Inclusion & Exclusion Patterns** for documentation and notebook processing
166 | - **Custom LLM Prompts** for precise summarization tailored to each document type
167 | - **Layout Configuration** for combining context components seamlessly
168 | 
169 | `config.yaml` > `llms_full_txt_file` defines how `llms-full.txt` is assembled:
170 | 
171 | ```yaml
172 | llms_full_txt_file:
173 |   input_files:
174 |     - name: Instructions
175 |       file_path: "context/instructions/prompt.md"
176 |     - name: SDK Context
177 |       file_path: "context/sdk/context/index.md"
178 |     - name: Docs Context
179 |       file_path: "context/docs/docs_context.md"
180 |     - name: Examples Context
181 |       file_path: "context/examples/examples_context.md"
182 |   output_files:
183 |     - name: llms_full_txt
184 |       file_path: "context/llms-full.txt"
185 |     - name: llms_full_md
186 |       file_path: "context/llms-full.md"
187 |   layout: |
188 |     {{FILE1}}
189 | 
190 |     {{FILE2}}
191 | 
192 |     {{FILE3}}
193 | 
194 |     {{FILE4}}
195 | 
196 |   ```
197 | 
198 | ## 💡 Best Practices for Context-Driven Development
199 | 
200 | - **Automate Context Updates:** Leverage GitHub Actions to maintain accuracy.
201 | - **Tailored Summaries:** Use custom LLM prompts to ensure context relevance.
202 | - **Seamless Integration:** Continuously integrate with existing LLM agents or IDEs.
203 | 
204 | By following these practices, you ensure your AI applications have reliable, relevant, and up-to-date context—critical for effective agent performance and developer productivity.
205 | 
206 | ---
207 | 
208 | ## 🚀 Get Started
209 | 
210 | Clone the toolkit repository and follow the setup instructions in [`config.yaml`](https://github.com/video-db/agent-toolkit/blob/readme-refactor/config.yaml) to start integrating VideoDB contexts into your LLM-powered applications today.
211 | 
212 | **Explore further:**
213 | - [VideoDB SDK](https://github.com/video-db/videodb-python)
214 | - [Documentation](https://docs.videodb.io)
215 | - [Cookbook Examples](https://github.com/video-db/videodb-cookbook)
216 | 
217 | ---
218 | <!-- MARKDOWN LINKS & IMAGES -->
219 | <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
220 | 
221 | [token-length-shield]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/video-db/agent-toolkit/refs/heads/main/readme_shields.json&style=for-the-badge
222 | [token-length-url]: https://github.com/video-db/agent-toolkit/blob/main/token_breakdown.png
223 | [tag-shield]: https://img.shields.io/github/v/tag/video-db/agent-toolkit?style=for-the-badge
224 | [tag-url]: https://github.com/video-db/agent-toolkit/tags
225 | [stars-shield]: https://img.shields.io/github/stars/video-db/agent-toolkit.svg?style=for-the-badge
226 | [stars-url]: https://github.com/video-db/agent-toolkit/stargazers
227 | [issues-shield]: https://img.shields.io/github/issues/video-db/agent-toolkit.svg?style=for-the-badge
228 | [issues-url]: https://github.com/video-db/agent-toolkit/issues
229 | 


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
  1 | sdk_context:
  2 |   clone_url: "https://github.com/video-db/videodb-python"
  3 |   clone_dir: "context/sdk/source"
  4 |   sphinx_config_dir: "context/sdk/sphinx_config"
  5 |   output_dir: "context/sdk/context"
  6 |   commit_message: "Add Sphinx markdown build output (sdk_build)"
  7 |   branch_name: "sdk-context-branch"
  8 | 
  9 | docs_context:
 10 |   doc_tree:
 11 |     scrape_config:
 12 |       script: "context/docs/crawl_coda_tree.py"
 13 |       output: "context/docs/doc_tree.json"
 14 |       url: "https://docs.videodb.io"
 15 |       selector: "data-coda-ui-id"
 16 |       selector_value: "page-list"
 17 |   include:
 18 |     # - "Welcome to VideoDB Docs"
 19 |     - "Quick Start Guide"
 20 |     # - "Visual Search and Indexing/*"
 21 |   prompts:
 22 |     prompt_folder: "context/prompts"
 23 |     default_prompt: "default_docs.txt"
 24 |   base_url: "https://docs.videodb.io"
 25 |   tree_file: "context/docs/doc_tree.json"
 26 |   output_file: "context/docs/docs_context.md"
 27 |   output_fragments: "context/docs/fragments"
 28 |   script_path: "context/docs/process_docs.py" # New field for script path
 29 |   branch_name: "docs-context-update"
 30 |   commit_message: "Add combined Markdown output for docs context"
 31 |   llm: "gemini"
 32 | 
 33 | examples_context:
 34 |   clone_url: "https://github.com/video-db/videodb-cookbook"
 35 |   clone_dir: "context/examples/source"
 36 |   include:
 37 |     - "quickstart/*"
 38 |     - "guides/*.ipynb"
 39 |   exclude:
 40 |     - "guides/VideoDB_Search_and_Evaluation.ipynb"
 41 |   prompts:
 42 |     prompt_folder: "context/prompts"
 43 |     default_prompt: "default_ipynb.txt"
 44 |     custom_prompts:
 45 |       - pattern: "quickstart/Multimodal_Quickstart.ipynb"
 46 |         prompt: "custom_2.txt"
 47 |   output_file: "context/examples/examples_context.md"
 48 |   output_fragments: "context/examples/fragments"
 49 |   script_path: "context/examples/process_examples.py" # New field for script path
 50 |   branch_name: "examples-context-update"
 51 |   commit_message: "Add combined Markdown output for examples context"
 52 | 
 53 | llms_full_txt_file:
 54 |   merge_script_path: "context/merge_llms_full_txt.py"
 55 |   input_files:
 56 |     - name: Instructions
 57 |       file_path: "context/instructions/prompt.md"
 58 |     - name: SDK Context
 59 |       file_path: "context/sdk/context/index.md"
 60 |     - name: Docs Context
 61 |       file_path: "context/docs/docs_context.md"
 62 |     - name: Examples Context
 63 |       file_path: "context/examples/examples_context.md"
 64 |   output_files:
 65 |     - name: llms_full_txt
 66 |       file_path: "context/llms-full.txt"
 67 |     - name: llms_full_md
 68 |       file_path: "context/llms-full.md"
 69 |   layout: |
 70 |     {{FILE1}}
 71 | 
 72 |     {{FILE2}}
 73 | 
 74 |     {{FILE3}}
 75 | 
 76 |     {{FILE4}}
 77 | 
 78 | llms_txt_file:
 79 |   merge_script_path: "context/merge_llms_txt.py"
 80 |   input_files:
 81 |     - name: Instructions
 82 |       file_path: "context/instructions/prompt.md"
 83 |     - name: SDK Context
 84 |       file_path: "context/sdk/context/index.md"
 85 |     - name: Docs Context
 86 |       folder_path: "context/docs/fragments"
 87 |     - name: Examples Context
 88 |       folder_path: "context/examples/fragments"
 89 |   output_files:
 90 |     - name: llms_txt
 91 |       file_path: "context/llms.txt"
 92 |     - name: llms_md
 93 |       file_path: "context/llms.md"
 94 |   layout: |
 95 |     {{INPUT1}}
 96 | 
 97 |     {{INPUT2}}
 98 | 
 99 |     {{INPUT3}}
100 | 
101 |     {{INPUT4}}
102 | 
103 | token_count:
104 |   script_path: "context/count_tokens.py"
105 |   tiktoken_encoding_model: "gpt-4"
106 |   token_breakdown_file: "token_breakdown.png"
107 |   readme_shields_file: "readme_shields.json"
108 | 


--------------------------------------------------------------------------------
/context/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/video-db/agent-toolkit/030bba81cbe63fdd1728796c50ffe9d750daad31/context/__init__.py


--------------------------------------------------------------------------------
/context/count_tokens.py:
--------------------------------------------------------------------------------
  1 | import tiktoken
  2 | import yaml
  3 | import matplotlib.pyplot as plt
  4 | import json
  5 | 
  6 | 
  7 | def load_config_yaml():
  8 |     with open("config.yaml", "r", encoding="utf-8") as f:
  9 |         return yaml.safe_load(f)
 10 | 
 11 | 
 12 | def format_k(num):
 13 |     if num >= 1000:
 14 |         return f"{num / 1000:.1f}k"
 15 |     else:
 16 |         return str(num)
 17 | 
 18 | 
 19 | yml = load_config_yaml()
 20 | master_config = yml.get("llms_full_txt_file")
 21 | tkn_config = yml.get("token_count")
 22 | 
 23 | tkn_encoding_model = tkn_config.get("tiktoken_encoding_model")
 24 | sub_component_files = master_config.get("input_files")
 25 | final_files = master_config.get("output_files")
 26 | final_file = next((obj for obj in final_files if obj["name"] == "llms_full_txt"), None)
 27 | 
 28 | 
 29 | sub_component_files_result = []
 30 | final_file_result = {}
 31 | 
 32 | 
 33 | def count_token(text, model="gpt-4"):
 34 |     enc = tiktoken.encoding_for_model(model)
 35 |     return len(enc.encode(text))
 36 | 
 37 | 
 38 | for file in sub_component_files:
 39 |     with open(file.get("file_path"), "r") as f:
 40 |         num_tokens = count_token(f.read(), tkn_encoding_model)
 41 |         sub_component_files_result.append(
 42 |             {"name": file.get("name"), "tokens": num_tokens}
 43 |         )
 44 | 
 45 | 
 46 | # Prepare data for plotting
 47 | labels = [item["name"] for item in sub_component_files_result]
 48 | tokens = [item["tokens"] for item in sub_component_files_result]
 49 | 
 50 | # Build custom labels with k-format
 51 | custom_labels = [
 52 |     f"{item['name']} ({format_k(item['tokens'])})"
 53 |     for item in sub_component_files_result
 54 | ]
 55 | 
 56 | # Choose some distinct colors for each slice (adjust as needed)
 57 | colors = ["#db6430", "#46729F", "#C678DD", "#98C379"]
 58 | 
 59 | # Create figure and axes with transparent background
 60 | fig, ax = plt.subplots(figsize=(6, 6), facecolor="none")
 61 | ax.set_facecolor("none")  # Transparent Axes background
 62 | 
 63 | # Plot the pie chart
 64 | wedges, text_labels, pct_texts = ax.pie(
 65 |     tokens,
 66 |     colors=colors,
 67 |     startangle=140,
 68 |     # Move labels slightly away from the center to reduce overlap
 69 |     labeldistance=1.1,
 70 |     # Show % inside slices
 71 |     autopct="%1.1f%%",
 72 |     pctdistance=0.7,
 73 |     wedgeprops={"edgecolor": "white"},  # White edge lines
 74 | )
 75 | 
 76 | # Update label text and color
 77 | for i, txt in enumerate(text_labels):
 78 |     txt.set_text(custom_labels[i])
 79 |     # Use a neutral gray so text shows up on both dark & light backgrounds
 80 |     txt.set_color("#999")
 81 | 
 82 | # Update the color of the percentage text inside slices
 83 | for pct in pct_texts:
 84 |     pct.set_color("white")  # or a different color if you prefer
 85 | 
 86 | # Ensure the pie is a circle
 87 | ax.axis("equal")
 88 | 
 89 | # Tight layout to reduce clipping
 90 | plt.tight_layout()
 91 | 
 92 | # Save the figure with a transparent background
 93 | plt.savefig(tkn_config.get("token_breakdown_file"), transparent=True, dpi=300)
 94 | 
 95 | with open(final_file.get("file_path"), "r") as f:
 96 |     final_token_count = count_token(f.read())
 97 | 
 98 | shields_data = {
 99 |     "schemaVersion": 1,
100 |     "label": "llms-full.txt token length",
101 |     "message": str(format_k(final_token_count)),
102 |     "color": "blue",
103 | }
104 | 
105 | with open(tkn_config.get("readme_shields_file"), "w") as json_file:
106 |     json.dump(shields_data, json_file, indent=4)
107 | 


--------------------------------------------------------------------------------
/context/docs/crawl_coda_tree.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import requests
  3 | from bs4 import BeautifulSoup
  4 | import json
  5 | import sys
  6 | 
  7 | # Constants
  8 | DEFAULT_URL = "https://docs.videodb.io"
  9 | HTML_PARSER = "html.parser"
 10 | DEFAULT_SELECTOR = "data-coda-ui-id"
 11 | DEFAULT_SELECTOR_VALUE = (
 12 |     "page-list"  # Default attribute value to locate the parent element
 13 | )
 14 | 
 15 | 
 16 | def find_a_tags_with_depth(parent_tag, depth=0):
 17 |     """
 18 |     Recursively find all <a> tags within a parent tag and track their depth level.
 19 |     """
 20 |     results = []
 21 |     for child in parent_tag.find_all(recursive=False):  # Iterate over direct children
 22 |         if child.name == "a":
 23 |             results.append((child, depth))  # Store <a> tag with its depth
 24 |         results.extend(find_a_tags_with_depth(child, depth + 1))  # Recurse deeper
 25 |     return results
 26 | 
 27 | 
 28 | def list_to_nested_json(data):
 29 |     """
 30 |     Convert a list of tuples (element, depth) into a nested JSON-like structure,
 31 |     where items with the smallest depth are at the top level and items of the same
 32 |     depth become siblings.
 33 | 
 34 |     Parameters:
 35 |         data (list of tuple): Each tuple is (element, depth)
 36 | 
 37 |     Returns:
 38 |         list: A list of nested dictionaries representing the JSON structure.
 39 |     """
 40 |     result = []
 41 |     stack = []
 42 | 
 43 |     for element, depth in data:
 44 |         node = {
 45 |             "element": element.get_text(strip=True),
 46 |             "href": element.get("href"),
 47 |             "children": [],
 48 |         }
 49 | 
 50 |         # Adjust the stack to match the current depth
 51 |         while stack and stack[-1][1] >= depth:
 52 |             stack.pop()
 53 | 
 54 |         if stack:
 55 |             parent_node, _ = stack[-1]
 56 |             parent_node["children"].append(node)
 57 |         else:
 58 |             result.append(node)
 59 | 
 60 |         stack.append((node, depth))
 61 | 
 62 |     return result
 63 | 
 64 | 
 65 | def fetch_and_parse(url):
 66 |     """
 67 |     Fetch the webpage content from the given URL and parse it with BeautifulSoup.
 68 |     """
 69 |     response = requests.get(url)
 70 |     if response.status_code == 200:
 71 |         return BeautifulSoup(response.text, HTML_PARSER)
 72 |     else:
 73 |         raise Exception(
 74 |             f"Failed to fetch the webpage. Status code: {response.status_code}"
 75 |         )
 76 | 
 77 | 
 78 | def scrape_and_save(
 79 |     output_file,
 80 |     url=DEFAULT_URL,
 81 |     selector=DEFAULT_SELECTOR,
 82 |     selector_value=DEFAULT_SELECTOR_VALUE,
 83 | ):
 84 |     """
 85 |     Scrape the webpage, convert <a> tags into a nested JSON structure, and save it to a file.
 86 | 
 87 |     Parameters:
 88 |         output_file (str): Path to the output JSON file.
 89 |         url (str): URL of the docs page to scrape.
 90 |         selector (str): HTML attribute name to locate the parent element.
 91 |         selector_value (str): Value for the attribute selector.
 92 |     """
 93 |     soup = fetch_and_parse(url)
 94 |     parent_tag = soup.find(attrs={selector: selector_value})
 95 | 
 96 |     if not parent_tag:
 97 |         raise Exception(f"Element with {selector}='{selector_value}' not found.")
 98 | 
 99 |     a_tags_with_levels = find_a_tags_with_depth(parent_tag)
100 |     nested_json = list_to_nested_json(a_tags_with_levels)
101 | 
102 |     with open(output_file, "w", encoding="utf-8") as f:
103 |         json.dump(nested_json, f, indent=4)
104 | 
105 | 
106 | def main():
107 |     parser = argparse.ArgumentParser(
108 |         description="Scrape a webpage, convert <a> tags into a nested JSON structure, and save it to a file."
109 |     )
110 |     parser.add_argument("output", help="Path to the output JSON file")
111 |     parser.add_argument(
112 |         "--url", default=DEFAULT_URL, help="URL of the docs page (default: %(default)s)"
113 |     )
114 |     parser.add_argument(
115 |         "--selector",
116 |         default=DEFAULT_SELECTOR,
117 |         help="Attribute selector to locate the parent element (default: %(default)s)",
118 |     )
119 |     parser.add_argument(
120 |         "--selector-value",
121 |         default=DEFAULT_SELECTOR_VALUE,
122 |         help="Value for the attribute selector (default: %(default)s)",
123 |     )
124 |     args = parser.parse_args()
125 | 
126 |     try:
127 |         scrape_and_save(
128 |             args.output,
129 |             url=args.url,
130 |             selector=args.selector,
131 |             selector_value=args.selector_value,
132 |         )
133 |     except Exception as e:
134 |         sys.exit(str(e))
135 | 
136 | 
137 | if __name__ == "__main__":
138 |     main()
139 | 


--------------------------------------------------------------------------------
/context/docs/docs_context.md:
--------------------------------------------------------------------------------
  1 | # Quick Start Guide [Source Link](https://docs.videodb.io/quick-start-guide-38)
  2 | 
  3 | VideoDB Documentation
  4 | 
  5 | Pages
  6 | 
  7 | [Welcome to VideoDB Docs](https://docs.videodb.io/)
  8 | 
  9 | [Quick Start Guide](https://docs.videodb.io/quick-start-guide-38)
 10 | 
 11 | [Visual Search and Indexing](https://docs.videodb.io/visual-search-and-indexing-80)
 12 | 
 13 | [Multimodal Search](https://docs.videodb.io/multimodal-search-90)
 14 | 
 15 | [Dynamic Video Streams](https://docs.videodb.io/dynamic-video-streams-44)
 16 | 
 17 | [Director - Video Agent Framework](https://docs.videodb.io/director-video-agent-framework-98)
 18 | 
 19 | [Open Source Tools](https://docs.videodb.io/open-source-tools-94)
 20 | 
 21 | [Examples and Tutorials](https://docs.videodb.io/examples-and-tutorials-35)
 22 | 
 23 | [Edge of Knowledge](https://docs.videodb.io/edge-of-knowledge-10)
 24 | 
 25 | [Building World's First Video Database](https://docs.videodb.io/building-worlds-first-video-database-25)
 26 | 
 27 | [Team](https://docs.videodb.io/team-46)
 28 | 
 29 | [Customer Love](https://docs.videodb.io/customer-love-42)
 30 | 
 31 | [Temp Doc](https://docs.videodb.io/temp-doc-54)
 32 | 
 33 | # Quick Start Guide
 34 | 
 35 | [How Accurate is Your Search?](https://docs.videodb.io/how-accurate-is-your-search-88) [Video Indexing Guide](https://docs.videodb.io/video-indexing-guide-101) [Semantic Search](https://docs.videodb.io/semantic-search-89) [Collections](https://docs.videodb.io/collections-68) [Public Collections](https://docs.videodb.io/public-collections-102) [Callback Details](https://docs.videodb.io/callback-details-66) [Ref: Subtitle Styles](https://docs.videodb.io/ref-subtitle-styles-57) [Language Support](https://docs.videodb.io/language-support-79) [Guide: Subtitles](https://docs.videodb.io/guide-subtitles-73)
 36 | 
 37 | This notebook is designed to help you get started with VideoDB. Advance concepts are linked in between for deep dive.
 38 | 
 39 | [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/video-db/videodb-cookbook/blob/main/quickstart/VideoDB%20Quickstart.ipynb)
 40 | 
 41 | ## Setup
 42 | 
 43 | ### 🔧 Installing VideoDB in your environment
 44 | 
 45 | VideoDB is available as
 46 | 
 47 | [python package 📦](https://pypi.org/project/videodb)
 48 | 
 49 | !pip install videodb
 50 | 
 51 | ### 🔗 Setting Up a connection to db
 52 | 
 53 | To connect to VideoDB, simply get the API and create a connection. This can be done by either providing your VideoDB API key directly to the constructor or by setting the VIDEO\_DB\_API\_KEY environment variable with your API key.
 54 | 
 55 | Get your API key from
 56 | 
 57 | [VideoDB Console](https://console.videodb.io/)
 58 | 
 59 | . ( Free for first 50 uploads, No credit card required ) 🎉
 60 | 
 61 | import videodb
 62 | 
 63 | conn = videodb.connect(api\_key="YOUR\_API\_KEY")
 64 | 
 65 | ## Working with Single Video
 66 | 
 67 | ### ⬆️ Uploading a video
 68 | 
 69 | Now that you have established a connection to VideoDB, you can upload your videos using conn.upload()
 70 | 
 71 | You can upload your media by a url or from your local file system
 72 | 
 73 | upload method returns a Video object.
 74 | 
 75 | \# Upload a video by url
 76 | 
 77 | video = conn.upload(url="https://www.youtube.com/watch?v=WDv4AWk0J3U")
 78 | 
 79 | \# Upload a video from file system
 80 | 
 81 | video\_f = conn.upload(file\_path="./my\_video.mp4")
 82 | 
 83 | VideoDB simplifies your upload by supporting links from Youtube, S3 or any Public URL with video
 84 | 
 85 | ### Pro Tip
 86 | 
 87 | If you wish to upload only the audio from a video file, just specify in the "media\_type" field. For instance, you can obtain audio from a YouTube video by doing so.
 88 | 
 89 | from videodb import MediaType
 90 | 
 91 | audio = conn.upload(url="https://youtu.be/IoDVfXFq5cU?si=BCU7ghvqY3YdCS78", media\_type=MediaType.audio)
 92 | 
 93 | The types of media that can be uploaded are defined in the MediaType class.
 94 | 
 95 | ### 📺 Viewing your video
 96 | 
 97 | Your video is instantly available for viewing 720p resolution ⚡️
 98 | 
 99 | Generate a streamable url for video using video.generate\_stream()
100 | 
101 | Preview the video using video.play(). This will open the video in your default browser/notebook
102 | 
103 | video.generate\_stream()
104 | 
105 | video.play()
106 | 
107 | Note: if you are viewing this notebook on github, you won't be able to see iframe player, because of security restrictions. Please open the printed link of player in your browser
108 | 
109 | Load content from console.videodb.io?
110 | 
111 | Loading external content may reveal information to 3rd parties. [Learn more](https://help.coda.io/en/articles/1211364-embedding-content-in-your-doc)
112 | 
113 | Allow
114 | 
115 | ### ⛓️ Stream Specific Sections of videos
116 | 
117 | You can easily clip specific sections of a video by passing timelineof start and end sections. It accepts seconds. For example Here’s we are streaming only first 10 seconds and then 120 to 140 second of a video
118 | 
119 | stream\_link = video.generate\_stream(timeline=\[(0,10),(120,140)\])
120 | 
121 | play\_stream(stream\_link)
122 | 
123 | ### 🗂️ Indexing a Video
124 | 
125 | To search bits inside a video, you have to first index the video. This can be done by a invoking the index function on the video object. VideoDB offers two type of indexes currently.
126 | 
127 | index\_spoken\_words: Indexes spoken words in the video. It automatically generate the transcript and makes it ready for search. Checkout
128 | 
129 | [Language Support](https://docs.videodb.io/language-support-79)
130 | 
131 | to index different language content.
132 | 
133 | index\_scenes: Indexes visual concepts and events of the video. Perfect for building security monitoring, drone, and other camera footage. Refer
134 | 
135 | [Visual Search and Indexing](https://docs.videodb.io/visual-search-and-indexing-80)
136 | 
137 | Checkout
138 | 
139 | [Multimodal Search](https://docs.videodb.io/multimodal-search-90)
140 | 
141 | for unlocking multimodal search in your video library.
142 | 
143 | ⏱️ Indexing may take some time for longer videos, structure it as a batch job with callback in your application. Check
144 | 
145 | [Callback Details](https://docs.videodb.io/callback-details-66)
146 | 
147 | \# best for podcasts, elearning, news, etc.
148 | 
149 | video.index\_spoken\_words()
150 | 
151 | \# best for camera feeds, moderation usecases etc.
152 | 
153 | video.index\_scenes(prompt="<your prompt to describe the scenes>")
154 | 
155 | Upcoming:
156 | 
157 | Real time feed indexing, setting up real time alerts.
158 | 
159 | Specific domain Indexes like Football, Baseball, Drone footage, Cricket etc.
160 | 
161 | ### 🔍 Search Inside a Video
162 | 
163 | Search the segments inside a video. While searching you have options to choose the type of search and index. VideoDB offers following types of search :
164 | 
165 | SearchType.semanticPerfect for question answer kind of queries. This is also the default type of search.
166 | 
167 | SearchType.keywordIt matches the exact occurrence of word or sentence you pass in the query parameter of the search function. keyword search is only available to use with single videos.
168 | 
169 | IndexType.sceneIt search the visual information of the video, Index the video using index\_scenesfunction.
170 | 
171 | IndexType.spoken\_wordIt search the spoken information of the video, Index the video using index\_spoken\_wordsfunction.
172 | 
173 | from videodb import SearchType
174 | 
175 | from videodb import IndexType
176 | 
177 | result = video.search(query ="What are the benefits of morning sunlight?",
178 | 
179 | search\_type =SearchType.semantic,
180 | 
181 | index\_type =IndexType.spoken\_word)
182 | 
183 | result.play()
184 | 
185 | Viewing Search Results :
186 | 
187 | video.search() will return a SearchResults object, which contains the sections/shots of videos which semantically match your search query
188 | 
189 | result.get\_shots()\- Returns a list of Shot that matched search query
190 | 
191 | result.play()\- Returns a playable url for video (similar to video.play() you can open this link in browser, or embed it into your website using iframe)
192 | 
193 | ## RAG: Search Inside Multiple Videos
194 | 
195 | VideoDBcan store and search inside multiple videos with ease. By default, videos are uploaded to your default collection and you have freedom to create and manage more collections, checkout our
196 | 
197 | [Collections](https://docs.videodb.io/collections-68)
198 | 
199 | doc for more details.
200 | 
201 | If you are an existing llamaIndex user, trying to build RAG pipeline on your video data. You can also use VideoDB retriever. Checkout llama docs ⬇️
202 | 
203 | [![](https://codaio.imgix.net/docs/_s5lUnUCIU/blobs/bl-LqObRP4v0A/7b9d7a007c857e3d084558d9276010d6e2101260ab78ea2dc871e4e1d2dbb358386b5f7d832921deb36cd820d65ed19f472132b189e46194f713725ee712a89368b08dfecb02c4e6bf3b90c6ab944a066ed3362a9b74309bd45495c9f221dcbea0e0b50d?auto=format%2Ccompress&fit=crop&w=227&h=51.416666666666686&dpr=2&crop=focalpoint&fp-x=0.5&fp-y=0.5113278791692889&fp-z=1)](https://docs.llamaindex.ai/en/stable/examples/retrievers/videodb_retriever.html)
204 | 
205 | 🔄 Using Collection to upload multiple Videos
206 | 
207 | \# Get the default collection
208 | 
209 | coll = conn.get\_collection()
210 | 
211 | \# Upload Videos to a collection
212 | 
213 | coll.upload(url="https://www.youtube.com/watch?v=lsODSDmY4CY")
214 | 
215 | coll.upload(url="https://www.youtube.com/watch?v=vZ4kOr38JhY")
216 | 
217 | coll.upload(url="https://www.youtube.com/watch?v=uak\_dXHh6s4")
218 | 
219 | conn.get\_collection() : Returns Collection object, the default collection
220 | 
221 | coll.get\_videos() : Returns list of Video, all videos in collections
222 | 
223 | coll.get\_video(video\_id): Returns Video, respective video object from given video\_id
224 | 
225 | coll.delete\_video(video\_id): Deletes the video from Collection
226 | 
227 | ### 📂 Search inside multiple videos in a collection
228 | 
229 | You can simply index all the videos in a collection and use search method on collection to find relevant results. Here we are indexing spoken content of a collection and searching
230 | 
231 | \# Index all videos in collection
232 | 
233 | for video in coll.get\_videos():
234 | 
235 | video.index\_spoken\_words()
236 | 
237 | Semantic Search in the collection
238 | 
239 | \# search in the collection of videos
240 | 
241 | results = coll.search(query ="What is Dopamine?")
242 | 
243 | results.play()
244 | 
245 | Let’s try one more search:
246 | 
247 | results = coll.search(query = "What's the benefit of morning sunlight?")
248 | 
249 | results.play()
250 | 
251 | The result here has all the matching bits in a single video stream from your collection. You can use these results in your application right away.
252 | 
253 | As you can see VideoDB fundamentally removes the limitation of files and gives you power to access and stream videos in a seamless way. Stay tuned for exciting features in our upcoming version and keep building awesome stuff with VideoDB 🤘
254 | 
255 | ## 🌟 Explore more with Video object
256 | 
257 | There are multiple methods available on a Video Object, that can be helpful for your use-case.
258 | 
259 | ### Access Transcript
260 | 
261 | \# get text of the spoken content
262 | 
263 | text\_json = video.get\_transcript()
264 | 
265 | text = video.get\_transcript\_text()
266 | 
267 | print(text)
268 | 
269 | Add Subtitle to a video
270 | 
271 | It returns a new stream instantly with subtitle added into the video. Subtitle functions has many styling parameters like font, size, background color etc. Check
272 | 
273 | [Ref: Subtitle Styles](https://docs.videodb.io/ref-subtitle-styles-57)
274 | 
275 | and
276 | 
277 | [Guide: Subtitles](https://docs.videodb.io/guide-subtitles-73)
278 | 
279 | for details.
280 | 
281 | new\_stream = video.add\_subtitle()
282 | 
283 | play\_stream(new\_stream)
284 | 
285 | Get Thumbnail of Video :
286 | 
287 | video.generate\_thumbnail() : Returns a thumbnail image of video.
288 | 
289 | Delete a video :
290 | 
291 | video.delete() : Delete a video.
292 | 
293 | 👉🏼 Checkout
294 | 
295 | [Dynamic Video Streams](https://docs.videodb.io/dynamic-video-streams-44)
296 | 
297 | to understand how you can modify the video streams in real-time. This opens doors for many usecases that were never possible with videos.⚡️
298 | 
299 | 👉🏼 Checkout more examples and tutorials 👉
300 | 
301 | [Examples and Tutorials](https://docs.videodb.io/examples-and-tutorials-35)
302 | 
303 | to explore what you can build with VideoDB
304 | 
305 | Setup
306 | 
307 | 🔧 Installing VideoDB in your environment
308 | 
309 | 🔗 Setting Up a connection to db
310 | 
311 | Working with Single Video
312 | 
313 | ⬆️ Uploading a video
314 | 
315 | Pro Tip
316 | 
317 | 📺 Viewing your video
318 | 
319 | ⛓️ Stream Specific Sections of videos
320 | 
321 | 🗂️ Indexing a Video
322 | 
323 | 🔍 Search Inside a Video
324 | 
325 | RAG: Search Inside Multiple Videos
326 | 
327 | 📂 Search inside multiple videos in a collection
328 | 
329 | 🌟 Explore more with Video object
330 | 
331 | Access Transcript
332 | 
333 | Want to print your doc?
334 | 
335 | This is not the way.
336 | 
337 | Try clicking the ⋯ next to your doc name or using a keyboard shortcut (
338 | 
339 | CtrlP
340 | 
341 | ) instead.
342 | 
343 | 
344 | ---
345 | 
346 | 


--------------------------------------------------------------------------------
/context/docs/fragments/callback_details_66.txt:
--------------------------------------------------------------------------------
  1 | # Callback Details [Source Link](https://docs.videodb.io/callback-details-66)
  2 | 
  3 | VideoDB Documentation
  4 | 
  5 | Pages
  6 | 
  7 | Welcome to VideoDB Docs
  8 | 
  9 | Quick Start Guide
 10 | 
 11 | How Accurate is Your Search?
 12 | 
 13 | Video Indexing Guide
 14 | 
 15 | Semantic Search
 16 | 
 17 | Collections
 18 | 
 19 | Public Collections
 20 | 
 21 | Callback Details
 22 | 
 23 | Ref: Subtitle Styles
 24 | 
 25 | Language Support
 26 | 
 27 | Guide: Subtitles
 28 | 
 29 | Visual Search and Indexing
 30 | 
 31 | Multimodal Search
 32 | 
 33 | Dynamic Video Streams
 34 | 
 35 | Director - Video Agent Framework
 36 | 
 37 | Open Source Tools
 38 | 
 39 | Examples and Tutorials
 40 | 
 41 | Edge of Knowledge
 42 | 
 43 | Building World's First Video Database
 44 | 
 45 | Team
 46 | 
 47 | Customer Love
 48 | 
 49 | Temp Doc
 50 | 
 51 | Quick Start Guide
 52 | 
 53 | # Callback Details
 54 | 
 55 | # Upload
 56 | 
 57 | You can pass callback url in upload function. Here are the details of callback responses.
 58 | 
 59 | coll.upload(url="S3\_LINK\_HERE", callback\_url="<your callback url>")
 60 | 
 61 | ### 👍🏼 Successful Video Upload
 62 | 
 63 | {
 64 | "success":true,
 65 | "data":{
 66 | "id":"m-\*\*",
 67 | "collection\_id":"c-\*\*",
 68 | "name":"What is AWS? | Amazon Web Services",
 69 | "extension":"mp4",
 70 | "size":"13294233",
 71 | "length":"191.146667",
 72 | "stream\_url":"https://stream.videodb.io/v3/published/xxx/xx/x.m3u8",
 73 | "player\_url":"https://console.videodb.io/player?url=https://stream.videodb.io/v3/published/manifests/xxx/xx/x.m3u8"
 74 | }
 75 | }
 76 | 
 77 | ### 👍🏼 Successful Audio Upload
 78 | 
 79 | {
 80 | "success":true,
 81 | "data":{
 82 | "id":"a-\*\*",
 83 | "collection\_id":"c-\*\*",
 84 | "name":"What is AWS? | Amazon Web Services",
 85 | "extension":"mp3",
 86 | "size":"13294233",
 87 | "length":"191.146667"
 88 | }
 89 | }
 90 | 
 91 | ### 👍🏼 Successful Image upload
 92 | 
 93 | {
 94 | "success":true,
 95 | "data":{
 96 | "id":"img-\*\*",
 97 | "collection\_id":"c-\*\*",
 98 | "name":"What is AWS? | Amazon Web Services",
 99 | "extension":"jpg",
100 | "size":"13294233"
101 | }
102 | }
103 | 
104 | ## Errors in Upload
105 | 
106 | If the uploaded file is corrupted 👎🏻
107 | 
108 | ### Invalid file 👎🏻
109 | 
110 | If the file is Invalid or wrong media\_type is passed in the upload function.
111 | 
112 | Upload function supported media\_type is available in the class MediaType which are 👉🏼 ["video", "image", "audio"]
113 | 
114 | ### Issue with Download 🔴
115 | 
116 | If the download link is incorrect/ private or our servers are not able to download file from the link.
117 | 
118 | # Index spoken words
119 | 
120 | Indexing a video is an asynchronous job. We do provide progress bar on our python sdk for developer experience. But it’s only good for communicating the progress on Jupyter or colab notebooks.
121 | 
122 | When you move it to to production, you can use callbacks for your backend workflows. Pass callback url while calling the function
123 | 
124 | ### Successfully Indexed 👍🏼
125 | 
126 | ### Error in Indexing job 👎🏻
127 | 
128 | # Index scenes
129 | 
130 | Similar to other indexing operations scene index is also an async job. You can pass callback in the function.
131 | 
132 | ### 👍🏼 Successfully Indexed
133 | 
134 | ### 👎🏻 Error in Indexing
135 | 
136 | # Extract Scenes
137 | 
138 | ### 👍🏼 Successfully extracted
139 | 
140 | ### 👎🏻 Error in Extracting
141 | 
142 | Want to print your doc?
143 | 
144 | This is not the way.
145 | 
146 | Try clicking the ⋯ next to your doc name or using a keyboard shortcut (CtrlP) instead.
147 | 
148 | 
149 | ---
150 | 
151 | 


--------------------------------------------------------------------------------
/context/docs/fragments/how_accurate_is_your_search_88.txt:
--------------------------------------------------------------------------------
  1 | # How Accurate is Your Search? [Source Link](https://docs.videodb.io/how-accurate-is-your-search-88)
  2 | 
  3 | ![videodb](https://codaio.imgix.net/workspaces/ws-jizMKG73gK/blobs/customIcons/1a6d553a-3676-494e-8f3b-fd666614f459?fit=fill&fill=solid&w=128&h=128&fm=gif&bg=0FFF&fill-color=0FFF)
  4 | 
  5 | VideoDB Documentation
  6 | 
  7 | Pages
  8 | 
  9 | [![](https://cdn.coda.io/icons/svg/color/align-center.svg)\\
 10 | \\
 11 | Welcome to VideoDB Docs](https://docs.videodb.io/)
 12 | 
 13 | [![](https://cdn.coda.io/icons/svg/color/quick-mode-on.svg)\\
 14 | \\
 15 | Quick Start Guide](https://docs.videodb.io/quick-start-guide-38)
 16 | 
 17 | [![icon picker](https://cdn.coda.io/icons/svg/color/wash-your-hands.svg)\\
 18 | \\
 19 | How Accurate is Your Search?](https://docs.videodb.io/how-accurate-is-your-search-88)
 20 | 
 21 | [![](https://cdn.coda.io/icons/svg/color/video-call.svg)\\
 22 | \\
 23 | Video Indexing Guide](https://docs.videodb.io/video-indexing-guide-101)
 24 | 
 25 | [![](https://cdn.coda.io/icons/svg/color/clear-search.svg)\\
 26 | \\
 27 | Semantic Search](https://docs.videodb.io/semantic-search-89)
 28 | 
 29 | [![](https://cdn.coda.io/icons/svg/color/binders-folder.svg)\\
 30 | \\
 31 | Collections](https://docs.videodb.io/collections-68)
 32 | 
 33 | [![](https://cdn.coda.io/icons/svg/color/magazine.svg)\\
 34 | \\
 35 | Public Collections](https://docs.videodb.io/public-collections-102)
 36 | 
 37 | [![](https://cdn.coda.io/icons/svg/color/callback.svg)\\
 38 | \\
 39 | Callback Details](https://docs.videodb.io/callback-details-66)
 40 | 
 41 | [![](https://cdn.coda.io/icons/svg/color/closed-captioning.svg)\\
 42 | \\
 43 | Ref: Subtitle Styles](https://docs.videodb.io/ref-subtitle-styles-57)
 44 | 
 45 | [![](https://cdn.coda.io/icons/svg/color/customer-support.svg)\\
 46 | \\
 47 | Language Support](https://docs.videodb.io/language-support-79)
 48 | 
 49 | [![](https://cdn.coda.io/icons/svg/color/closed-captioning.svg)\\
 50 | \\
 51 | Guide: Subtitles](https://docs.videodb.io/guide-subtitles-73)
 52 | 
 53 | [![](https://cdn.coda.io/icons/svg/color/asteroid.svg)\\
 54 | \\
 55 | Visual Search and Indexing](https://docs.videodb.io/visual-search-and-indexing-80)
 56 | 
 57 | [![](https://cdn.coda.io/icons/svg/color/clear-search.svg)\\
 58 | \\
 59 | Multimodal Search](https://docs.videodb.io/multimodal-search-90)
 60 | 
 61 | [![](https://cdn.coda.io/icons/svg/color/e-learning.svg)\\
 62 | \\
 63 | Dynamic Video Streams](https://docs.videodb.io/dynamic-video-streams-44)
 64 | 
 65 | [![director-light](https://codaio.imgix.net/workspaces/ws-jizMKG73gK/blobs/customIcons/6bc288c2-982b-4a97-a402-8da53aeaa236?fit=fill&fill=solid&w=128&h=128&fm=gif&bg=0FFF&fill-color=0FFF)\\
 66 | \\
 67 | Director - Video Agent Framework](https://docs.videodb.io/director-video-agent-framework-98)
 68 | 
 69 | [![github](https://codaio.imgix.net/workspaces/ws-jizMKG73gK/blobs/customIcons/ac14f3ef-daa1-4b6e-aba5-af11f11b8372?fit=fill&fill=solid&w=128&h=128&fm=gif&bg=0FFF&fill-color=0FFF)\\
 70 | \\
 71 | Open Source Tools](https://docs.videodb.io/open-source-tools-94)
 72 | 
 73 | [![](https://cdn.coda.io/icons/svg/color/book-and-pencil.svg)\\
 74 | \\
 75 | Examples and Tutorials](https://docs.videodb.io/examples-and-tutorials-35)
 76 | 
 77 | [![](https://cdn.coda.io/icons/svg/color/centre-of-gravity.svg)\\
 78 | \\
 79 | Edge of Knowledge](https://docs.videodb.io/edge-of-knowledge-10)
 80 | 
 81 | [![videodb](https://codaio.imgix.net/workspaces/ws-jizMKG73gK/blobs/customIcons/1a6d553a-3676-494e-8f3b-fd666614f459?fit=fill&fill=solid&w=128&h=128&fm=gif&bg=0FFF&fill-color=0FFF)\\
 82 | \\
 83 | Building World's First Video Database](https://docs.videodb.io/building-worlds-first-video-database-25)
 84 | 
 85 | [![](https://cdn.coda.io/icons/svg/color/the-dragon-team.svg)\\
 86 | \\
 87 | Team](https://docs.videodb.io/team-46)
 88 | 
 89 | [![](https://cdn.coda.io/icons/svg/color/like.svg)\\
 90 | \\
 91 | Customer Love](https://docs.videodb.io/customer-love-42)
 92 | 
 93 | [![](https://cdn.coda.io/icons/svg/color/llama.svg)\\
 94 | \\
 95 | Temp Doc](https://docs.videodb.io/temp-doc-54)
 96 | 
 97 | Quick Start Guide
 98 | 
 99 | # ![icon picker](https://cdn.coda.io/icons/svg/color/wash-your-hands.svg)         How Accurate is Your Search?
100 | 
101 | ### Introduction
102 | 
103 | When you index your data and retrieve it with certain parameters, how do you measure the effectiveness of your search? This is where search evaluation comes in. By using test data, queries, and their results, you can assess the performance of indexes, search parameters, and other related factors. This evaluation helps you understand how well your search system is working and identify areas for improvement.
104 | 
105 | ### Example
106 | 
107 | To keep it super simple let’s use a
108 | 
109 | [countdown video](https://www.youtube.com/watch?v=tWoo8i_VkvI)
110 | 
111 | of 30 seconds.
112 | 
113 | Loading…
114 | 
115 | We can imagine information in video indexed as documents which are “timestamps + some textual information” describing the visuals as there is no audio in this video”.
116 | 
117 | We can use the structure as
118 | 
119 | timestamp : (start, end ),description: “string”
120 | 
121 | So, if we use index\_scenes function
122 | 
123 | At (1, 2) - 29 seconds is displayed
124 | 
125 | At (2, 3) - 28 seconds is displayed
126 | 
127 | ...
128 | 
129 | This continues until:
130 | 
131 | At (29, 30) - 1 second is displayed
132 | 
133 | ### Ground Truth
134 | 
135 | It is the the ideal expected result. To evaluate the performance of search we need some test queries and the expected results.
136 | 
137 | Let's say for the query "Six" the expected result documents are at the following timestamps:
138 | 
139 | We will call this list of timestamps our ground truth for the query "Six."
140 | 
141 | ### Evaluation Metrics
142 | 
143 | To evaluate the effectiveness of our search functionality, we'll can experiment with our query "Six" with various search parameters. 📊
144 | 
145 | The search results can be categorized as follows:
146 | 
147 | Retrieved Documents 🔍:
148 | 
149 | Retrieved Relevant Documents: Matches our ground truth ✅
150 | 
151 | Retrieved Irrelevant Documents: Don't match our ground truth ❌
152 | 
153 | Non-Retrieved Documents 🚫:
154 | 
155 | Non-Retrieved Relevant Documents: In our ground truth but not in results 😕
156 | 
157 | Non-Retrieved Irrelevant Documents: Neither in ground truth nor results 👍
158 | 
159 | We can further classify these categories in terms of search accuracy:
160 | 
161 | True Positives (TP) 🎯: Retrieved Relevant Documents
162 | 
163 | We wanted them, and we got them 🙌
164 | 
165 | False Positives (FP) 🎭: Retrieved Irrelevant Documents
166 | 
167 | We didn't want them, but we got them 🤔
168 | 
169 | False Negatives (FN) 😢: Non-Retrieved Relevant Documents
170 | 
171 | We wanted them, but we didn't get them 😓
172 | 
173 | True Negatives (TN) 🚫: Non-Retrieved Irrelevant Documents
174 | 
175 | We didn't want them, and we didn't get them 👌
176 | 
177 | 💡 This classification helps us assess the precision and recall of our search algorithm, enabling further optimization.
178 | 
179 | ### Accuracy
180 | 
181 | Accuracy measures how well our search algorithm retrieves required documents while excluding irrelevant ones. It can be calculated as follows:
182 | 
183 | In other words, accuracy is the ratio of correctly classified documents (both retrieved relevant and non-retrieved irrelevant) to the total number of documents. 📊
184 | 
185 | To get a more comprehensive evaluation of search performance, it's crucial to consider other metrics such as precision, recall, and F1-score in addition to accuracy. 💡🔬
186 | 
187 | ### Precision and Recall
188 | 
189 | Precision is percentage of relevant retrieved docs out of all retrieved docs. It answers the question: "Of the documents our search returned, how many were actually relevant?"
190 | 
191 | Recall indicates the percentage of relevant documents that were successfully retrieved. It addresses the question: "Out of all the relevant documents, how many did our search find?" 🔍
192 | 
193 | ### The Precision-Recall Trade-off
194 | 
195 | These metrics often have an inverse relationship, leading to a trade-off:
196 | 
197 | Recall 📈:
198 | 
199 | Measures the model's ability to find all relevant cases in a dataset.
200 | 
201 | Increases or remains constant as more documents are retrieved.
202 | 
203 | Never decreases with an increase in retrieved documents.
204 | 
205 | Precision 📉:
206 | 
207 | Refers to the proportion of correct positive identifications.
208 | 
209 | Typically decreases as more documents are retrieved.
210 | 
211 | Drops due to increased likelihood of including false positives.
212 | 
213 | ### Search in VideoDB
214 | 
215 | Let’s understand the search interface provided by VideoDB and measure results with the above metric.
216 | 
217 | This function performs a search on video content with various customizable parameters:
218 | 
219 | query: The search query string.
220 | 
221 | search\_type: Determines the search method. Keyword search on single video level returns all the documents .
222 | 
223 | SearchType.semantic(default): For question-answering queries. ( across 1000s of videos/ collection ) Checkout
224 | 
225 | [![](https://cdn.coda.io/icons/svg/color/clear-search.svg)\\
226 | Semantic Search](https://docs.videodb.io/semantic-search-89)
227 | 
228 | for detailed understanding.
229 | 
230 | SearchType.keyword: Matches exact occurrences where the given query is present as a sub-string (single video only).
231 | 
232 | index\_type: Specifies the index to search:
233 | 
234 | IndexType.spoken\_word(default): Searches spoken content.
235 | 
236 | IndexType.scene: Searches visual content.
237 | 
238 | result\_threshold: Initial filter for top N matching documents (default: 5).
239 | 
240 | score\_threshold: Absolute threshold filter for relevance scores (default: 0.2).
241 | 
242 | dynamic\_score\_percentage: Adaptive filtering mechanism:
243 | 
244 | Useful when there is a significant gap between top results and tail results after score\_threshold filter. Retains top x% of the score range.
245 | 
246 | Calculation: dynamic\_threshold = max\_score \- (range \\* dynamic\_score\_percentage)
247 | 
248 | default: 20%
249 | 
250 | This interface allows for flexible and precise searching of video content, with options to fine-tune result filtering based on relevance scores and dynamic thresholds.
251 | 
252 | ### Experiment
253 | 
254 | Follow this notebook to explore experiments on fine-tuning search results and gain a deeper understanding of the methods involved
255 | 
256 | [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/video-db/videodb-cookbook/blob/main/guides/VideoDB_Search_and_Evaluation.ipynb)
257 | 
258 | Here’s a basic outcome of the default settings for both search types on the query "six" for the above video:
259 | 
260 | 1\. Semantic Search Default:
261 | 
262 | 2\. Keyword Search:
263 | 
264 | ### Outcome
265 | 
266 | As you can see, keyword search is best suited for queries like "teen" and "six." However, if the queries are in natural language, such as "find me a 6" then semantic search is more appropriate.
267 | 
268 | Keyword search would struggle to find relevant results for such natural language queries.
269 | 
270 | ### Search + LLM
271 | 
272 | For complex queries like "Find me all the numbers greater than six" a basic search will not work effectively since it merely matches the query with documents in vector space and returns the matching documents.
273 | 
274 | In such cases, you can apply a loose filter to get all the documents that match the query. However, you will need to add an additional layer of intelligence using a Large Language Model (LLM). The matched documents can then be passed to the LLM to curate a response that accurately answers the query.
275 | 
276 | Introduction
277 | 
278 | Example
279 | 
280 | Ground Truth
281 | 
282 | Evaluation Metrics
283 | 
284 | Accuracy
285 | 
286 | Precision and Recall
287 | 
288 | The Precision-Recall Trade-off
289 | 
290 | Search in VideoDB
291 | 
292 | Experiment
293 | 
294 | Outcome
295 | 
296 | Search + LLM
297 | 
298 | Want to print your doc?
299 | 
300 | This is not the way.
301 | 
302 | ![](https://cdn.coda.io/assets/2462459f3eb1/img/import_google_docs.png)
303 | 
304 | Try clicking the ⋯ next to your doc name or using a keyboard shortcut (
305 | 
306 | CtrlP
307 | 
308 | ) instead.
309 | 
310 | 
311 | ---
312 | 
313 | 


--------------------------------------------------------------------------------
/context/docs/fragments/language_support_79.txt:
--------------------------------------------------------------------------------
  1 | # Language Support [Source Link](https://docs.videodb.io/language-support-79)
  2 | 
  3 | ![videodb](https://codaio.imgix.net/workspaces/ws-jizMKG73gK/blobs/customIcons/1a6d553a-3676-494e-8f3b-fd666614f459?fit=fill&fill=solid&w=128&h=128&fm=gif&bg=0FFF&fill-color=0FFF)
  4 | 
  5 | VideoDB Documentation
  6 | 
  7 | Pages
  8 | 
  9 | [Welcome to VideoDB Docs](https://docs.videodb.io/)
 10 | 
 11 | [Quick Start Guide](https://docs.videodb.io/quick-start-guide-38)
 12 | 
 13 | [How Accurate is Your Search?](https://docs.videodb.io/how-accurate-is-your-search-88)
 14 | 
 15 | [Video Indexing Guide](https://docs.videodb.io/video-indexing-guide-101)
 16 | 
 17 | [Semantic Search](https://docs.videodb.io/semantic-search-89)
 18 | 
 19 | [Collections](https://docs.videodb.io/collections-68)
 20 | 
 21 | [Public Collections](https://docs.videodb.io/public-collections-102)
 22 | 
 23 | [Callback Details](https://docs.videodb.io/callback-details-66)
 24 | 
 25 | [Ref: Subtitle Styles](https://docs.videodb.io/ref-subtitle-styles-57)
 26 | 
 27 | [Language Support](https://docs.videodb.io/language-support-79)
 28 | 
 29 | [Guide: Subtitles](https://docs.videodb.io/guide-subtitles-73)
 30 | 
 31 | [Visual Search and Indexing](https://docs.videodb.io/visual-search-and-indexing-80)
 32 | 
 33 | [Scene Extraction Algorithms](https://docs.videodb.io/scene-extraction-algorithms-84)
 34 | 
 35 | [Custom Annotations](https://docs.videodb.io/custom-annotations-81)
 36 | 
 37 | [Scene-Level Metadata: Smarter Video Search & Retrieval](https://docs.videodb.io/scene-level-metadata-smarter-video-search-retrieval-107)
 38 | 
 39 | [Advanced Visual Search Pipelines](https://docs.videodb.io/advanced-visual-search-pipelines-82)
 40 | 
 41 | [Playground for Scene Extractions](https://docs.videodb.io/playground-for-scene-extractions-83)
 42 | 
 43 | [Deep Dive into Prompt Engineering : Mastering Video Scene Indexing](https://docs.videodb.io/deep-dive-into-prompt-engineering-mastering-video-scene-indexing-93)
 44 | 
 45 | [Multimodal Search](https://docs.videodb.io/multimodal-search-90)
 46 | 
 47 | [Multimodal Search: Quickstart](https://docs.videodb.io/multimodal-search-quickstart-91)
 48 | 
 49 | [Conference Slide Scraper with VideoDB](https://docs.videodb.io/conference-slide-scraper-with-videodb-92)
 50 | 
 51 | [Dynamic Video Streams](https://docs.videodb.io/dynamic-video-streams-44)
 52 | 
 53 | [Ref: TextAsset](https://docs.videodb.io/ref-textasset-74)
 54 | 
 55 | [Guide : TextAsset](https://docs.videodb.io/guide-textasset-75)
 56 | 
 57 | [Director - Video Agent Framework](https://docs.videodb.io/director-video-agent-framework-98)
 58 | 
 59 | [Agent Creation Playbook](https://docs.videodb.io/agent-creation-playbook-103)
 60 | 
 61 | [How I Built a CRM-integrated Sales Assistant Agent in 1 Hour](https://docs.videodb.io/how-i-built-a-crm-integrated-sales-assistant-agent-in-1-hour-106)
 62 | 
 63 | [Make Your Video Sound Studio Quality with Voice Cloning](https://docs.videodb.io/make-your-video-sound-studio-quality-with-voice-cloning-105)
 64 | 
 65 | [Setup Director Locally](https://docs.videodb.io/setup-director-locally-104)
 66 | 
 67 | [Open Source Tools](https://docs.videodb.io/open-source-tools-94)
 68 | 
 69 | [LlamaIndex VideoDB Retriever](https://docs.videodb.io/llamaindex-videodb-retriever-58)
 70 | 
 71 | [PromptClip: Use Power of LLM to Create Clips](https://docs.videodb.io/promptclip-use-power-of-llm-to-create-clips-52)
 72 | 
 73 | [StreamRAG: Connect ChatGPT to VideoDB](https://docs.videodb.io/streamrag-connect-chatgpt-to-videodb-43)
 74 | 
 75 | [Examples and Tutorials](https://docs.videodb.io/examples-and-tutorials-35)
 76 | 
 77 | [Dubbing - Replace Soundtrack with New Audio](https://docs.videodb.io/dubbing-replace-soundtrack-with-new-audio-49)
 78 | 
 79 | [Beep curse words in real-time](https://docs.videodb.io/beep-curse-words-in-real-time-53)
 80 | 
 81 | [Remove Unwanted Content from videos](https://docs.videodb.io/remove-unwanted-content-from-videos-5)
 82 | 
 83 | [Instant Clips of Your Favorite Characters](https://docs.videodb.io/instant-clips-of-your-favorite-characters-3)
 84 | 
 85 | [Insert Dynamic Ads in real-time](https://docs.videodb.io/insert-dynamic-ads-in-real-time-7)
 86 | 
 87 | [Adding Brand Elements with VideoDB](https://docs.videodb.io/adding-brand-elements-with-videodb-76)
 88 | 
 89 | [Revolutionize Video Editing with VideoDb: Effortless Ad Placement and Seamless Video Integration](https://docs.videodb.io/revolutionize-video-editing-with-videodb-effortless-ad-placement-8)
 90 | 
 91 | [Eleven Labs x VideoDB: Adding AI Generated voiceovers to silent footage](https://docs.videodb.io/eleven-labs-x-videodb-adding-ai-generated-voiceovers-to-silent-f-59)
 92 | 
 93 | [Elevating Trailers with Automated Narration](https://docs.videodb.io/elevating-trailers-with-automated-narration-60)
 94 | 
 95 | [Add Intro/Outro to Videos](https://docs.videodb.io/add-intro-outro-to-videos-61)
 96 | 
 97 | [Enhancing Video Captions with VideoDB Subtitle Styling](https://docs.videodb.io/enhancing-video-captions-with-videodb-subtitle-styling-62)
 98 | 
 99 | [Audio overlay + Video + Timeline](https://docs.videodb.io/audio-overlay-video-timeline-63)
100 | 
101 | [Building Dynamic Video Streams with VideoDB: Integrating Custom Data and APIs](https://docs.videodb.io/building-dynamic-video-streams-with-videodb-integrating-custom-d-85)
102 | 
103 | [Adding AI Generated Voiceovers with VideoDB and LOVO](https://docs.videodb.io/adding-ai-generated-voiceovers-with-videodb-and-lovo-70)
104 | 
105 | [AI Generated Ad Films for Product Videography: Wellsaid, Open AI & VideoDB](https://docs.videodb.io/ai-generated-ad-films-for-product-videography-wellsaid-open-ai-v-71)
106 | 
107 | [Fun with Keyword Search](https://docs.videodb.io/fun-with-keyword-search-77)
108 | 
109 | [AWS Rekognition and VideoDB - Intelligent Video Clips](https://docs.videodb.io/aws-rekognition-and-videodb-intelligent-video-clips-4)
110 | 
111 | [AWS Rekognition and VideoDB - Effortlessly Remove Inappropriate Content from Video](https://docs.videodb.io/aws-rekognition-and-videodb-effortlessly-remove-inappropriate-co-6)
112 | 
113 | [Overlay a Word-Counter on Video Stream](https://docs.videodb.io/overlay-a-word-counter-on-video-stream-86)
114 | 
115 | [Generate Automated Video Outputs with Text Prompts | DALL-E + ElevenLabs + OpenAI + VideoDB](https://docs.videodb.io/generate-automated-video-outputs-with-text-prompts-dall-e-eleven-87)
116 | 
117 | [Edge of Knowledge](https://docs.videodb.io/edge-of-knowledge-10)
118 | 
119 | [Building Intelligent Machines](https://docs.videodb.io/building-intelligent-machines-16)
120 | 
121 | [Part 1 - Define Intelligence](https://docs.videodb.io/part-1-define-intelligence-17)
122 | 
123 | [Part 2 - Observe and Respond](https://docs.videodb.io/part-2-observe-and-respond-18)
124 | 
125 | [Part 3 - Training a Model](https://docs.videodb.io/part-3-training-a-model-19)
126 | 
127 | [Society of Machines](https://docs.videodb.io/society-of-machines-20)
128 | 
129 | [Society of Machines](https://docs.videodb.io/society-of-machines-23)
130 | 
131 | [Autonomy - Do we have the choice?](https://docs.videodb.io/autonomy-do-we-have-the-choice-21)
132 | 
133 | [Emergence - An Intelligence of the collective](https://docs.videodb.io/emergence-an-intelligence-of-the-collective-22)
134 | 
135 | [Drafts](https://docs.videodb.io/drafts-24)
136 | 
137 | [From Language Models to World Models: The Next Frontier in AI](https://docs.videodb.io/from-language-models-to-world-models-the-next-frontier-in-ai-65)
138 | 
139 | [The Future Series](https://docs.videodb.io/the-future-series-78)
140 | 
141 | [Building World's First Video Database](https://docs.videodb.io/building-worlds-first-video-database-25)
142 | 
143 | [Multimedia: From MP3/MP4 to the Future with VideoDB](https://docs.videodb.io/multimedia-from-mp3-mp4-to-the-future-with-videodb-26)
144 | 
145 | [Introducing VideoDB: The Pinnacle of Synchronized Video Streaming for the Modern Web](https://docs.videodb.io/introducing-videodb-the-pinnacle-of-synchronized-video-streaming-27)
146 | 
147 | [Dynamic Video Streams](https://docs.videodb.io/dynamic-video-streams-50)
148 | 
149 | [Why do we need a Video Database Now?](https://docs.videodb.io/why-do-we-need-a-video-database-now-41)
150 | 
151 | [What's a Video Database ?](https://docs.videodb.io/whats-a-video-database-36)
152 | 
153 | [Enhancing AI-Driven Multimedia Applications](https://docs.videodb.io/enhancing-ai-driven-multimedia-applications-37)
154 | 
155 | [Misalignment of Today's Web](https://docs.videodb.io/misalignment-of-todays-web-67)
156 | 
157 | [Beyond Traditional Video Infrastructure](https://docs.videodb.io/beyond-traditional-video-infrastructure-28)
158 | 
159 | [Research Grants](https://docs.videodb.io/research-grants-96)
160 | 
161 | [Team](https://docs.videodb.io/team-46)
162 | 
163 | [Internship: Build the Future of AI-Powered Video Infrastructure](https://docs.videodb.io/internship-build-the-future-of-ai-powered-video-infrastructure-97)
164 | 
165 | [Ashutosh Trivedi](https://docs.videodb.io/ashutosh-trivedi-32)
166 | 
167 | [Playlists](https://docs.videodb.io/playlists-33)
168 | 
169 | [Talks - Solving Logical Puzzles with Natural Language Processing - PyCon India 2015](https://docs.videodb.io/talks-solving-logical-puzzles-with-natural-language-processing-p-34)
170 | 
171 | [Ashish](https://docs.videodb.io/ashish-45)
172 | 
173 | [Shivani Desai](https://docs.videodb.io/shivani-desai-48)
174 | 
175 | [Gaurav Tyagi](https://docs.videodb.io/gaurav-tyagi-51)
176 | 
177 | [Rohit Garg](https://docs.videodb.io/rohit-garg-64)
178 | 
179 | [Customer Love](https://docs.videodb.io/customer-love-42)
180 | 
181 | [Temp Doc](https://docs.videodb.io/temp-doc-54)
182 | 
183 | Quick Start Guide
184 | 
185 | # Language Support
186 | 
187 | VideoDB supports multiple languages for indexing the spoken content in the videos. You can just pass the language code in indexing function index\_spoken\_words
188 | 
189 | hindi\_video.index\_spoken\_words(language\_code="hi")
190 | 
191 | ### Auto detect languages:
192 | 
193 | English , Spanish , French, German, Italian, Portuguese and Dutch would be auto detected, you can skip passing the language code while indexing.
194 | 
195 | ### Supported Languages
196 | 
197 | Here are the supported language and their language\_code
198 | 
199 | {
200 | 
201 | "Global English": "en",
202 | 
203 | "Australian English": "en\_au",
204 | 
205 | "British English": "en\_uk",
206 | 
207 | "American English": "en\_us",
208 | 
209 | "Spanish": "es",
210 | 
211 | "French": "fr",
212 | 
213 | "German": "de",
214 | 
215 | "Italian": "it",
216 | 
217 | "Portuguese": "pt",
218 | 
219 | "Dutch": "nl",
220 | 
221 | "Hindi": "hi",
222 | 
223 | "Japanese": "ja",
224 | 
225 | "Chinese": "zh",
226 | 
227 | "Finnish": "fi",
228 | 
229 | "Korean": "ko",
230 | 
231 | "Polish": "pl",
232 | 
233 | "Russian": "ru",
234 | 
235 | "Turkish": "tr",
236 | 
237 | "Ukrainian": "uk",
238 | 
239 | "Vietnamese": "vi",
240 | 
241 | }
242 | 
243 | Want to print your doc?
244 | 
245 | This is not the way.
246 | 
247 | Try clicking the ⋯ next to your doc name or using a keyboard shortcut (
248 | CtrlP
249 | ) instead.
250 | 
251 | 
252 | ---
253 | 
254 | 


--------------------------------------------------------------------------------
/context/docs/fragments/quick_start_guide_38.txt:
--------------------------------------------------------------------------------
  1 | # Quick Start Guide [Source Link](https://docs.videodb.io/quick-start-guide-38)
  2 | 
  3 | VideoDB Documentation
  4 | 
  5 | Pages
  6 | 
  7 | [Welcome to VideoDB Docs](https://docs.videodb.io/)
  8 | 
  9 | [Quick Start Guide](https://docs.videodb.io/quick-start-guide-38)
 10 | 
 11 | [Visual Search and Indexing](https://docs.videodb.io/visual-search-and-indexing-80)
 12 | 
 13 | [Multimodal Search](https://docs.videodb.io/multimodal-search-90)
 14 | 
 15 | [Dynamic Video Streams](https://docs.videodb.io/dynamic-video-streams-44)
 16 | 
 17 | [Director - Video Agent Framework](https://docs.videodb.io/director-video-agent-framework-98)
 18 | 
 19 | [Open Source Tools](https://docs.videodb.io/open-source-tools-94)
 20 | 
 21 | [Examples and Tutorials](https://docs.videodb.io/examples-and-tutorials-35)
 22 | 
 23 | [Edge of Knowledge](https://docs.videodb.io/edge-of-knowledge-10)
 24 | 
 25 | [Building World's First Video Database](https://docs.videodb.io/building-worlds-first-video-database-25)
 26 | 
 27 | [Team](https://docs.videodb.io/team-46)
 28 | 
 29 | [Customer Love](https://docs.videodb.io/customer-love-42)
 30 | 
 31 | [Temp Doc](https://docs.videodb.io/temp-doc-54)
 32 | 
 33 | # Quick Start Guide
 34 | 
 35 | [How Accurate is Your Search?](https://docs.videodb.io/how-accurate-is-your-search-88) [Video Indexing Guide](https://docs.videodb.io/video-indexing-guide-101) [Semantic Search](https://docs.videodb.io/semantic-search-89) [Collections](https://docs.videodb.io/collections-68) [Public Collections](https://docs.videodb.io/public-collections-102) [Callback Details](https://docs.videodb.io/callback-details-66) [Ref: Subtitle Styles](https://docs.videodb.io/ref-subtitle-styles-57) [Language Support](https://docs.videodb.io/language-support-79) [Guide: Subtitles](https://docs.videodb.io/guide-subtitles-73)
 36 | 
 37 | This notebook is designed to help you get started with VideoDB. Advance concepts are linked in between for deep dive.
 38 | 
 39 | [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/video-db/videodb-cookbook/blob/main/quickstart/VideoDB%20Quickstart.ipynb)
 40 | 
 41 | ## Setup
 42 | 
 43 | ### 🔧 Installing VideoDB in your environment
 44 | 
 45 | VideoDB is available as
 46 | 
 47 | [python package 📦](https://pypi.org/project/videodb)
 48 | 
 49 | !pip install videodb
 50 | 
 51 | ### 🔗 Setting Up a connection to db
 52 | 
 53 | To connect to VideoDB, simply get the API and create a connection. This can be done by either providing your VideoDB API key directly to the constructor or by setting the VIDEO\_DB\_API\_KEY environment variable with your API key.
 54 | 
 55 | Get your API key from
 56 | 
 57 | [VideoDB Console](https://console.videodb.io/)
 58 | 
 59 | . ( Free for first 50 uploads, No credit card required ) 🎉
 60 | 
 61 | import videodb
 62 | 
 63 | conn = videodb.connect(api\_key="YOUR\_API\_KEY")
 64 | 
 65 | ## Working with Single Video
 66 | 
 67 | ### ⬆️ Uploading a video
 68 | 
 69 | Now that you have established a connection to VideoDB, you can upload your videos using conn.upload()
 70 | 
 71 | You can upload your media by a url or from your local file system
 72 | 
 73 | upload method returns a Video object.
 74 | 
 75 | \# Upload a video by url
 76 | 
 77 | video = conn.upload(url="https://www.youtube.com/watch?v=WDv4AWk0J3U")
 78 | 
 79 | \# Upload a video from file system
 80 | 
 81 | video\_f = conn.upload(file\_path="./my\_video.mp4")
 82 | 
 83 | VideoDB simplifies your upload by supporting links from Youtube, S3 or any Public URL with video
 84 | 
 85 | ### Pro Tip
 86 | 
 87 | If you wish to upload only the audio from a video file, just specify in the "media\_type" field. For instance, you can obtain audio from a YouTube video by doing so.
 88 | 
 89 | from videodb import MediaType
 90 | 
 91 | audio = conn.upload(url="https://youtu.be/IoDVfXFq5cU?si=BCU7ghvqY3YdCS78", media\_type=MediaType.audio)
 92 | 
 93 | The types of media that can be uploaded are defined in the MediaType class.
 94 | 
 95 | ### 📺 Viewing your video
 96 | 
 97 | Your video is instantly available for viewing 720p resolution ⚡️
 98 | 
 99 | Generate a streamable url for video using video.generate\_stream()
100 | 
101 | Preview the video using video.play(). This will open the video in your default browser/notebook
102 | 
103 | video.generate\_stream()
104 | 
105 | video.play()
106 | 
107 | Note: if you are viewing this notebook on github, you won't be able to see iframe player, because of security restrictions. Please open the printed link of player in your browser
108 | 
109 | Load content from console.videodb.io?
110 | 
111 | Loading external content may reveal information to 3rd parties. [Learn more](https://help.coda.io/en/articles/1211364-embedding-content-in-your-doc)
112 | 
113 | Allow
114 | 
115 | ### ⛓️ Stream Specific Sections of videos
116 | 
117 | You can easily clip specific sections of a video by passing timelineof start and end sections. It accepts seconds. For example Here’s we are streaming only first 10 seconds and then 120 to 140 second of a video
118 | 
119 | stream\_link = video.generate\_stream(timeline=\[(0,10),(120,140)\])
120 | 
121 | play\_stream(stream\_link)
122 | 
123 | ### 🗂️ Indexing a Video
124 | 
125 | To search bits inside a video, you have to first index the video. This can be done by a invoking the index function on the video object. VideoDB offers two type of indexes currently.
126 | 
127 | index\_spoken\_words: Indexes spoken words in the video. It automatically generate the transcript and makes it ready for search. Checkout
128 | 
129 | [Language Support](https://docs.videodb.io/language-support-79)
130 | 
131 | to index different language content.
132 | 
133 | index\_scenes: Indexes visual concepts and events of the video. Perfect for building security monitoring, drone, and other camera footage. Refer
134 | 
135 | [Visual Search and Indexing](https://docs.videodb.io/visual-search-and-indexing-80)
136 | 
137 | Checkout
138 | 
139 | [Multimodal Search](https://docs.videodb.io/multimodal-search-90)
140 | 
141 | for unlocking multimodal search in your video library.
142 | 
143 | ⏱️ Indexing may take some time for longer videos, structure it as a batch job with callback in your application. Check
144 | 
145 | [Callback Details](https://docs.videodb.io/callback-details-66)
146 | 
147 | \# best for podcasts, elearning, news, etc.
148 | 
149 | video.index\_spoken\_words()
150 | 
151 | \# best for camera feeds, moderation usecases etc.
152 | 
153 | video.index\_scenes(prompt="<your prompt to describe the scenes>")
154 | 
155 | Upcoming:
156 | 
157 | Real time feed indexing, setting up real time alerts.
158 | 
159 | Specific domain Indexes like Football, Baseball, Drone footage, Cricket etc.
160 | 
161 | ### 🔍 Search Inside a Video
162 | 
163 | Search the segments inside a video. While searching you have options to choose the type of search and index. VideoDB offers following types of search :
164 | 
165 | SearchType.semanticPerfect for question answer kind of queries. This is also the default type of search.
166 | 
167 | SearchType.keywordIt matches the exact occurrence of word or sentence you pass in the query parameter of the search function. keyword search is only available to use with single videos.
168 | 
169 | IndexType.sceneIt search the visual information of the video, Index the video using index\_scenesfunction.
170 | 
171 | IndexType.spoken\_wordIt search the spoken information of the video, Index the video using index\_spoken\_wordsfunction.
172 | 
173 | from videodb import SearchType
174 | 
175 | from videodb import IndexType
176 | 
177 | result = video.search(query ="What are the benefits of morning sunlight?",
178 | 
179 | search\_type =SearchType.semantic,
180 | 
181 | index\_type =IndexType.spoken\_word)
182 | 
183 | result.play()
184 | 
185 | Viewing Search Results :
186 | 
187 | video.search() will return a SearchResults object, which contains the sections/shots of videos which semantically match your search query
188 | 
189 | result.get\_shots()\- Returns a list of Shot that matched search query
190 | 
191 | result.play()\- Returns a playable url for video (similar to video.play() you can open this link in browser, or embed it into your website using iframe)
192 | 
193 | ## RAG: Search Inside Multiple Videos
194 | 
195 | VideoDBcan store and search inside multiple videos with ease. By default, videos are uploaded to your default collection and you have freedom to create and manage more collections, checkout our
196 | 
197 | [Collections](https://docs.videodb.io/collections-68)
198 | 
199 | doc for more details.
200 | 
201 | If you are an existing llamaIndex user, trying to build RAG pipeline on your video data. You can also use VideoDB retriever. Checkout llama docs ⬇️
202 | 
203 | [![](https://codaio.imgix.net/docs/_s5lUnUCIU/blobs/bl-LqObRP4v0A/7b9d7a007c857e3d084558d9276010d6e2101260ab78ea2dc871e4e1d2dbb358386b5f7d832921deb36cd820d65ed19f472132b189e46194f713725ee712a89368b08dfecb02c4e6bf3b90c6ab944a066ed3362a9b74309bd45495c9f221dcbea0e0b50d?auto=format%2Ccompress&fit=crop&w=227&h=51.416666666666686&dpr=2&crop=focalpoint&fp-x=0.5&fp-y=0.5113278791692889&fp-z=1)](https://docs.llamaindex.ai/en/stable/examples/retrievers/videodb_retriever.html)
204 | 
205 | 🔄 Using Collection to upload multiple Videos
206 | 
207 | \# Get the default collection
208 | 
209 | coll = conn.get\_collection()
210 | 
211 | \# Upload Videos to a collection
212 | 
213 | coll.upload(url="https://www.youtube.com/watch?v=lsODSDmY4CY")
214 | 
215 | coll.upload(url="https://www.youtube.com/watch?v=vZ4kOr38JhY")
216 | 
217 | coll.upload(url="https://www.youtube.com/watch?v=uak\_dXHh6s4")
218 | 
219 | conn.get\_collection() : Returns Collection object, the default collection
220 | 
221 | coll.get\_videos() : Returns list of Video, all videos in collections
222 | 
223 | coll.get\_video(video\_id): Returns Video, respective video object from given video\_id
224 | 
225 | coll.delete\_video(video\_id): Deletes the video from Collection
226 | 
227 | ### 📂 Search inside multiple videos in a collection
228 | 
229 | You can simply index all the videos in a collection and use search method on collection to find relevant results. Here we are indexing spoken content of a collection and searching
230 | 
231 | \# Index all videos in collection
232 | 
233 | for video in coll.get\_videos():
234 | 
235 | video.index\_spoken\_words()
236 | 
237 | Semantic Search in the collection
238 | 
239 | \# search in the collection of videos
240 | 
241 | results = coll.search(query ="What is Dopamine?")
242 | 
243 | results.play()
244 | 
245 | Let’s try one more search:
246 | 
247 | results = coll.search(query = "What's the benefit of morning sunlight?")
248 | 
249 | results.play()
250 | 
251 | The result here has all the matching bits in a single video stream from your collection. You can use these results in your application right away.
252 | 
253 | As you can see VideoDB fundamentally removes the limitation of files and gives you power to access and stream videos in a seamless way. Stay tuned for exciting features in our upcoming version and keep building awesome stuff with VideoDB 🤘
254 | 
255 | ## 🌟 Explore more with Video object
256 | 
257 | There are multiple methods available on a Video Object, that can be helpful for your use-case.
258 | 
259 | ### Access Transcript
260 | 
261 | \# get text of the spoken content
262 | 
263 | text\_json = video.get\_transcript()
264 | 
265 | text = video.get\_transcript\_text()
266 | 
267 | print(text)
268 | 
269 | Add Subtitle to a video
270 | 
271 | It returns a new stream instantly with subtitle added into the video. Subtitle functions has many styling parameters like font, size, background color etc. Check
272 | 
273 | [Ref: Subtitle Styles](https://docs.videodb.io/ref-subtitle-styles-57)
274 | 
275 | and
276 | 
277 | [Guide: Subtitles](https://docs.videodb.io/guide-subtitles-73)
278 | 
279 | for details.
280 | 
281 | new\_stream = video.add\_subtitle()
282 | 
283 | play\_stream(new\_stream)
284 | 
285 | Get Thumbnail of Video :
286 | 
287 | video.generate\_thumbnail() : Returns a thumbnail image of video.
288 | 
289 | Delete a video :
290 | 
291 | video.delete() : Delete a video.
292 | 
293 | 👉🏼 Checkout
294 | 
295 | [Dynamic Video Streams](https://docs.videodb.io/dynamic-video-streams-44)
296 | 
297 | to understand how you can modify the video streams in real-time. This opens doors for many usecases that were never possible with videos.⚡️
298 | 
299 | 👉🏼 Checkout more examples and tutorials 👉
300 | 
301 | [Examples and Tutorials](https://docs.videodb.io/examples-and-tutorials-35)
302 | 
303 | to explore what you can build with VideoDB
304 | 
305 | Setup
306 | 
307 | 🔧 Installing VideoDB in your environment
308 | 
309 | 🔗 Setting Up a connection to db
310 | 
311 | Working with Single Video
312 | 
313 | ⬆️ Uploading a video
314 | 
315 | Pro Tip
316 | 
317 | 📺 Viewing your video
318 | 
319 | ⛓️ Stream Specific Sections of videos
320 | 
321 | 🗂️ Indexing a Video
322 | 
323 | 🔍 Search Inside a Video
324 | 
325 | RAG: Search Inside Multiple Videos
326 | 
327 | 📂 Search inside multiple videos in a collection
328 | 
329 | 🌟 Explore more with Video object
330 | 
331 | Access Transcript
332 | 
333 | Want to print your doc?
334 | 
335 | This is not the way.
336 | 
337 | Try clicking the ⋯ next to your doc name or using a keyboard shortcut (
338 | 
339 | CtrlP
340 | 
341 | ) instead.
342 | 
343 | 
344 | ---
345 | 
346 | 


--------------------------------------------------------------------------------
/context/docs/fragments/ref_subtitle_styles_57.txt:
--------------------------------------------------------------------------------
  1 | # Ref: Subtitle Styles [Source Link](https://docs.videodb.io/ref-subtitle-styles-57)
  2 | 
  3 | VideoDB Documentation
  4 | 
  5 | Pages
  6 | 
  7 | Welcome to VideoDB Docs
  8 | Quick Start Guide
  9 | How Accurate is Your Search?
 10 | Video Indexing Guide
 11 | Semantic Search
 12 | Collections
 13 | Public Collections
 14 | Callback Details
 15 | Ref: Subtitle Styles
 16 | Language Support
 17 | Guide: Subtitles
 18 | Visual Search and Indexing
 19 | Multimodal Search
 20 | Dynamic Video Streams
 21 | Director - Video Agent Framework
 22 | Open Source Tools
 23 | Examples and Tutorials
 24 | Edge of Knowledge
 25 | Building World's First Video Database
 26 | Team
 27 | Customer Love
 28 | Temp Doc
 29 | 
 30 | Quick Start Guide
 31 | 
 32 | Ref: Subtitle Styles
 33 | 
 34 | video.add\_subtitle(SubtitleStyle()) function supports many parameters for styling your captions or subtitles according to your brand and guidelines. You can create your own
 35 | 
 36 | Typography and Style
 37 | Color and Effects
 38 | Positioning and Margins
 39 | Text Transformation
 40 | Borders and Shadow,
 41 | 
 42 | This document provides an API Reference to the parameters of SubtitleStyle function.
 43 | 
 44 | Checkout
 45 | Guide: Subtitles
 46 | 
 47 | to dive deep into the outputs of these parameters
 48 | 
 49 | ## Import
 50 | 
 51 | Import SubtitleStyle from VideoDB module
 52 | 
 53 | ```python
 54 | from videodb import SubtitleStyle, connect
 55 | 
 56 | conn =connect()
 57 | coll = conn.get_collection()
 58 | video = coll.get_video("MY_VIDEO_ID")
 59 | video.add_subtitle(
 60 |     SubtitleStyle(
 61 |         font_name=<>,
 62 |         font_size=<>,
 63 |         spacing=<>,
 64 |         ....
 65 |         ....
 66 |     )
 67 | )
 68 | ```
 69 | 
 70 | ## SubtitleStyle
 71 | 
 72 | This function supports following parameters for styling 👇
 73 | 
 74 | ### font\_name
 75 | 
 76 | The name of the font to use for the subtitles.
 77 | 
 78 | Default: "Arial"
 79 | Type: str
 80 | 
 81 | Checkout List of
 82 | Supported Fonts
 83 | 
 84 | ### font\_size
 85 | 
 86 | The size of the subtitle text in points.
 87 | 
 88 | Default: 18
 89 | Type: float
 90 | 
 91 | ### primary\_colour
 92 | 
 93 | The color of the main subtitle text in &HBBGGRR or &HAABBGGRR format. Checkout
 94 | Color Format
 95 | for the details.
 96 | 
 97 | Default: "&H00FFFFFF" (white)
 98 | Type: str
 99 | 
100 | ### secondary\_colour
101 | 
102 | The color used for secondary effects like karaoke
103 | 
104 | Default: "&H000000FF" (red)
105 | Type: str
106 | 
107 | Checkout
108 | Color Format
109 | for the format details.
110 | 
111 | ### outline\_colour
112 | 
113 | The color of the text outline.
114 | 
115 | Default: "&H00000000" (black)
116 | Type: str
117 | 
118 | Checkout
119 | Color Format
120 | for the format details.
121 | 
122 | ### back\_colour
123 | 
124 | The background color of the subtitle box
125 | 
126 | Default: "&H00000000" (black)
127 | Type: str
128 | 
129 | Checkout
130 | Color Format
131 | for the format details.
132 | 
133 | ### bold
134 | 
135 | Indicates if the subtitle text is bold.
136 | 
137 | Default: False
138 | Type: bool
139 | 
140 | ### italic
141 | 
142 | Indicates if the subtitle text is italicized.
143 | 
144 | Default: False
145 | Type: bool
146 | 
147 | ### underline
148 | 
149 | Indicates if the subtitle text is underlined.
150 | 
151 | Default: False
152 | Type: bool
153 | 
154 | ### strike\_out
155 | 
156 | Indicates if the subtitle text has a strikethrough.
157 | 
158 | Default: False
159 | Type: bool
160 | 
161 | ### scale\_x
162 | 
163 | The horizontal scale of the subtitle text in percentage.
164 | 
165 | Default: 1.0 (100%, no scaling)
166 | Type: float
167 | 
168 | ### scale\_y
169 | 
170 | The vertical scale of the subtitle text in percentage.
171 | 
172 | Default: 1.0 (100%, no scaling)
173 | Type: float
174 | 
175 | ### spacing
176 | 
177 | Space between characters in pixels.
178 | 
179 | Default: 0
180 | Type: float
181 | 
182 | ### angle
183 | 
184 | The rotation angle of the subtitle text in degrees.
185 | 
186 | Default: 0 (no rotation)
187 | Type: float
188 | 
189 | ### border\_style
190 | 
191 | The style of the border around the text
192 | 
193 | Default: SubtitleBorderStyle.outline
194 | Type: int or SubtitleBorderStyle
195 | 
196 | This field accepts following value.
197 | 
198 | SubtitleBorderStyle.no\_border or 1
199 | SubtitleBorderStyle.opaque\_box or 3
200 | SubtitleBorderStyle.outline or 4
201 | 
202 | Usage:
203 | 
204 | ```python
205 | from videodb import SubtitleStyle, SubtitleBorderStyle, connect
206 | 
207 | conn =connect()
208 | coll = conn.get_collection()
209 | video = coll.get_video("MY_VIDEO_ID")
210 | video.add_subtitle(
211 |     SubtitleStyle(
212 |         border_style=SubtitleBorderStyle.outline
213 |     )
214 | )
215 | ```
216 | 
217 | ### outline
218 | 
219 | The width (px) of the outline around the text.
220 | 
221 | Default: 1.0 (px)
222 | Type: float
223 | 
224 | ### shadow
225 | 
226 | The depth of the shadow behind the text in pixels.
227 | 
228 | Default: 0.0
229 | Type: float
230 | 
231 | ### alignment
232 | 
233 | The position of the subtitle text on the screen, typically an enumerated type following the SSA/ASS standard.
234 | 
235 | Default: SubtitleAlignment.bottom\_center
236 | Type: SubtitleAlignment or int
237 | 
238 | This field accepts following value.
239 | 
240 | SubtitleAlignment.bottom\_left or 1
241 | SubtitleAlignment.bottom\_center or 2
242 | SubtitleAlignment.bottom\_right or 3
243 | SubtitleAlignment.middle\_left or 8, 9
244 | SubtitleAlignment.middle\_center or 10
245 | 
246 | Import
247 | 
248 | SubtitleStyle
249 | font\_name
250 | font\_size
251 | primary\_colour
252 | secondary\_colour
253 | outline\_colour
254 | back\_colour
255 | bold
256 | italic
257 | underline
258 | strike\_out
259 | scale\_x
260 | scale\_y
261 | spacing
262 | angle
263 | border\_style
264 | outline
265 | shadow
266 | alignment
267 | margin\_l
268 | margin\_r
269 | margin\_v
270 | 
271 | Color Format
272 | Supported Fonts
273 | 
274 | Want to print your doc?
275 | 
276 | This is not the way.
277 | 
278 | Try clicking the ⋯ next to your doc name or using a keyboard shortcut (
279 | CtrlP
280 | ) instead.
281 | 
282 | 
283 | ---
284 | 
285 | 


--------------------------------------------------------------------------------
/context/docs/fragments/scene_level_metadata_smarter_video_search_retrieval_107.txt:
--------------------------------------------------------------------------------
  1 | # Scene-Level Metadata: Smarter Video Search & Retrieval [Source Link](https://docs.videodb.io/scene-level-metadata-smarter-video-search-retrieval-107)
  2 | 
  3 | ![videodb](https://codaio.imgix.net/workspaces/ws-jizMKG73gK/blobs/customIcons/1a6d553a-3676-494e-8f3b-fd666614f459?fit=fill&fill=solid&w=128&h=128&fm=gif&bg=0FFF&fill-color=0FFF)
  4 | 
  5 | VideoDB Documentation
  6 | 
  7 | Pages
  8 | 
  9 | [Welcome to VideoDB Docs](https://docs.videodb.io/)
 10 | 
 11 | [Quick Start Guide](https://docs.videodb.io/quick-start-guide-38)
 12 | 
 13 | [Visual Search and Indexing](https://docs.videodb.io/visual-search-and-indexing-80)
 14 | 
 15 | [Scene Extraction Algorithms](https://docs.videodb.io/scene-extraction-algorithms-84)
 16 | 
 17 | [Custom Annotations](https://docs.videodb.io/custom-annotations-81)
 18 | 
 19 | [Scene-Level Metadata: Smarter Video Search & Retrieval](https://docs.videodb.io/scene-level-metadata-smarter-video-search-retrieval-107)
 20 | 
 21 | [Advanced Visual Search Pipelines](https://docs.videodb.io/advanced-visual-search-pipelines-82)
 22 | 
 23 | [Playground for Scene Extractions](https://docs.videodb.io/playground-for-scene-extractions-83)
 24 | 
 25 | [Deep Dive into Prompt Engineering : Mastering Video Scene Indexing](https://docs.videodb.io/deep-dive-into-prompt-engineering-mastering-video-scene-indexing-93)
 26 | 
 27 | [Multimodal Search](https://docs.videodb.io/multimodal-search-90)
 28 | 
 29 | [Dynamic Video Streams](https://docs.videodb.io/dynamic-video-streams-44)
 30 | 
 31 | [Director - Video Agent Framework](https://docs.videodb.io/director-video-agent-framework-98)
 32 | 
 33 | [Open Source Tools](https://docs.videodb.io/open-source-tools-94)
 34 | 
 35 | [Examples and Tutorials](https://docs.videodb.io/examples-and-tutorials-35)
 36 | 
 37 | [Edge of Knowledge](https://docs.videodb.io/edge-of-knowledge-10)
 38 | 
 39 | [Building World's First Video Database](https://docs.videodb.io/building-worlds-first-video-database-25)
 40 | 
 41 | [Team](https://docs.videodb.io/team-46)
 42 | 
 43 | [Customer Love](https://docs.videodb.io/customer-love-42)
 44 | 
 45 | [Temp Doc](https://docs.videodb.io/temp-doc-54)
 46 | 
 47 | Visual Search and Indexing
 48 | 
 49 | # ![icon picker](https://cdn.coda.io/icons/svg/color/search-property.svg)         Scene-Level Metadata: Smarter Video Search & Retrieval
 50 | 
 51 | [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1OuqMdG44liBMQ0eG1xDDcZW-UK-Hr0ji?usp=sharing)
 52 | 
 53 | ## Introduction
 54 | 
 55 | James and Mark are video engineers at leading sports entertainment companies, responsible for managing, storing, and editing vast amounts of sports footage. Their work requires them to extract and highlight the most exciting moments from hours of raw video.
 56 | 
 57 | Both rely on VideoDB to streamline their workflow, but their approaches differ.
 58 | 
 59 | Mark follows the traditional method: he indexes the entire video and runs a search for relevant scenes. VideoDB processes every indexed scene, analyzing descriptions either semantically or via keywords. The results are useful but not always efficient—especially when the relevant content spans just a few minutes in a multi-hour video. The search still scans everything, sometimes returning unrelated clips.
 60 | 
 61 | James, on the other hand, has a smarter strategy. Instead of searching the entire video, he first filters out irrelevant scenes, ensuring that only important moments are considered. This results in faster and more precise searches. How does he achieve this? By using Scene-Level Metadata.
 62 | 
 63 | ## What is Scene-Level Metadata?
 64 | 
 65 | Scene-Level Metadata acts as smart tags for individual video scenes, allowing them to be filtered during search. Instead of relying solely on text descriptions, VideoDB enables metadata-based filtering to refine search results and make retrieval more efficient.
 66 | 
 67 | ### Why is this necessary?
 68 | 
 69 | Every video consists of multiple scenes, each composed of frames. By default, VideoDB scans every scene to find relevant content.
 70 | 
 71 | This works well for short videos, but when handling longer videos, only a few scenes may actually be relevant. Searching across the entire video can lead to:
 72 | 
 73 | Slower retrieval times
 74 | 
 75 | Less accurate results
 76 | 
 77 | By tagging scenes with metadata, we can focus the search only on relevant parts of the video, significantly improving accuracy and efficiency.
 78 | 
 79 | ### How is Metadata Stored?
 80 | 
 81 | Metadata is stored as a dictionary in the Scene object, with a maximum of five key-value pairs per scene.
 82 | 
 83 | Here’s an example:
 84 | 
 85 | scene = Scene(
 86 | 
 87 | video\_id=video.id,
 88 | 
 89 | start=60,
 90 | 
 91 | end=62,
 92 | 
 93 | description="A Red Bull car speeds down the straight at Monza.",
 94 | 
 95 | metadata={"camera\_view":"road\_ahead","action\_type":"chasing"}
 96 | 
 97 | )
 98 | 
 99 | With Scene-Level Metadata, we can apply targeted filters, ensuring that searches return only highly relevant scenes.
100 | 
101 | ## Example: Using Scene-Level Metadata in an F1 Race
102 | 
103 | James, our video engineer, works with Formula 1 race footage, which consists of continuous laps of high-speed action. To create engaging highlights, he needs to focus on the most thrilling moments:
104 | 
105 | Chasing battles
106 | 
107 | Sharp turns
108 | 
109 | Overtaking maneuvers
110 | 
111 | Dramatic crashes
112 | 
113 | Instead of searching the entire race, James applies Scene-Level Metadata to tag these key moments, ensuring faster and more accurate retrieval.
114 | 
115 | ### Defining Metadata Filters
116 | 
117 | James decides to apply metadata filters using the "action\_type" key, assigning one of the following values:
118 | 
119 | 📌 \["chase", "turn", "overtake", "crash"\]
120 | 
121 | For simplicity, he uses only one key-value pair per scene, but he could add multiple filters (e.g., "camera\_view", "lap\_number") for even more precise results.
122 | 
123 | ## James' Workflow with VideoDB
124 | 
125 | ### Step 1: Extract Scenes from the Footage
126 | 
127 | To improve indexing, James splits the video into 2-second scenes and extracts a single key frame per scene.
128 | 
129 | scene\_collection = video.extract\_scenes(
130 | 
131 | extraction\_type=SceneExtractionType.time\_based,
132 | 
133 | extraction\_config={"time":2,"select\_frames":\["middle"\]}
134 | 
135 | )
136 | 
137 | scenes = scene\_collection.scenes \# Fetch extracted scenes
138 | 
139 | ### Step 2: Assign Metadata to Each Scene
140 | 
141 | James uses AI-powered descriptions to automatically tag scenes with the correct action type before indexing.
142 | 
143 | described\_scenes =\[\]
144 | 
145 | for scene in scenes:
146 | 
147 | # use describe to create smart metadata, category, filter etc.
148 | 
149 | action\_type = scene.describe('Select one: \["chase", "turn", "overtake", "crash"\]')
150 | 
151 | \# use prompt to index contextual information that you need to search in vectors.
152 | 
153 | # use metadata to add structured information to each scene.
154 | 
155 | described\_scene = Scene(
156 | 
157 | video\_id=video.id,
158 | 
159 | start=scene.start,
160 | 
161 | end=scene.end,
162 | 
163 | description=scene.describe("Describe this scene briefly."),
164 | 
165 | metadata={"action\_type": action\_type}
166 | 
167 | )
168 | 
169 | described\_scenes.append(described\_scene)
170 | 
171 | ### Step 3: Index the Video with Scene Metadata
172 | 
173 | Once metadata is assigned, James indexes the scenes for efficient searching.
174 | 
175 | scene\_index\_id = video.index\_scenes(
176 | 
177 | scenes=described\_scenes,
178 | 
179 | name="F1 Highlight Scenes"
180 | 
181 | )
182 | 
183 | ### Step 4: Searching with Metadata Filters
184 | 
185 | Now, instead of searching the entire video, James can filter his search to focus on only specific race moments.
186 | 
187 | ## Applying Metadata Filters in Search
188 | 
189 | ### Example 1: Finding Intense Overtakes
190 | 
191 | To find all overtaking moments, James applies a metadata filter:
192 | 
193 | search\_results = video.search(
194 | 
195 | query="A thrilling overtaking maneuver",
196 | 
197 | filter=\[{"action\_type":"overtake"}\],\# Apply metadata filter
198 | 
199 | search\_type=SearchType.semantic,
200 | 
201 | index\_type=IndexType.scene,
202 | 
203 | scene\_index\_id=scene\_index\_id
204 | 
205 | )
206 | 
207 | search\_results.play()\# View the retrieved scenes
208 | 
209 | ### Example 2: Finding Chase Scenes in the Race
210 | 
211 | To retrieve close pursuit moments, James filters for chase scenes:
212 | 
213 | search\_results = video.search(
214 | 
215 | query="An aggressive chase on the track",
216 | 
217 | filter=\[{"action\_type":"chase"}\],\# Apply metadata filter
218 | 
219 | search\_type=SearchType.semantic,
220 | 
221 | index\_type=IndexType.scene,
222 | 
223 | scene\_index\_id=scene\_index\_id
224 | 
225 | )
226 | 
227 | search\_results.play()
228 | 
229 | By applying Scene-Level Metadata, James dramatically improves his video search workflow.
230 | 
231 | ## Index level Metadata
232 | 
233 | metadata can be passed as parameter to the index\_scenes function as well.
234 | 
235 | scene\_index\_id= video.index\_scenes(
236 | 
237 | extraction\_type=SceneExtractionType.time\_based,
238 | 
239 | extraction\_config={"time":540},
240 | 
241 | metadata={"category":"news","topic":"airplane"},
242 | 
243 | )
244 | 
245 | The metadata you passed during the indexing process, would apply to all the scenes that you are indexing.
246 | 
247 | Depending on your application, you may have additional scene-related metadata, which can be included within the metadata parameter. Please refer to the metadata guidelines.
248 | 
249 | Metadata Guidelines:
250 | 
251 | metadata must be a dictionary containing key-value pairs.
252 | 
253 | Both keys and values can be of type int or string.
254 | 
255 | A maximum of 5 key-value pairs is allowed.
256 | 
257 | The length of keys and values must not exceed 20 characters.
258 | 
259 | Filter results based on your criteria and you can pass more than one filter. This can be useful in timestamp based filtering of results, while exploring archival content and many such more categorical approaches to find the right content.
260 | 
261 | results= video.search(
262 | 
263 | query="airport",
264 | 
265 | filter=\[{"category":"news"}\],
266 | 
267 | index\_type=IndexType.scene
268 | 
269 | )
270 | 
271 | results= coll.search(
272 | 
273 | query="airport",
274 | 
275 | filter=\[{"category":"news"}\],
276 | 
277 | index\_type=IndexType.scene
278 | 
279 | )
280 | 
281 | Filter Guidelines:
282 | 
283 | Filter must be a list of dictionaries.
284 | 
285 | Each dictionary specifies a key-value pair to filter the results based on metadata.
286 | 
287 | ## Expanding the Use Cases
288 | 
289 | Metadata isn't just for sports highlights—it has applications across multiple industries:
290 | 
291 | 🔹 Wildlife Documentation
292 | A raw wildlife documentary may contain hours of footage capturing slow-moving landscapes and sudden bursts of animal activity. But let’s say we’re only interested in tracking a pride of lions. With metadata tagging, we can filter out only the scenes featuring lions, making it easier to find the right content.
293 | 
294 | 🔹 Tech Conferences & Keynote Events
295 | A multi-hour tech conference covers various topics—Blockchain, GenAI, Quantum Computing, etc. Instead of searching through entire sessions, we can tag segments based on their subjects and filter out irrelevant sections, making topic-based retrieval seamless.
296 | 
297 | 🔹 Security & Surveillance
298 | In CCTV surveillance, hours of footage may contain only a few moments of interest, such as unauthorized access or suspicious activity. By tagging scenes based on motion detection, time of day, or facial recognition, security teams can instantly retrieve critical footage.
299 | 
300 | ## The Future of Smart Video Retrieval
301 | 
302 | Scene-Level Metadata is a game-changer in video indexing and retrieval. It enhances:
303 | 
304 | ✅ Precision – Finds exactly what you’re looking for.
305 | ✅ Efficiency – Speeds up the search process.
306 | ✅ Scalability – Works with large video datasets effortlessly.
307 | 
308 | From Formula 1 highlights to security footage analysis, metadata-driven search makes video retrieval faster, smarter, and more intuitive than ever before.
309 | 
310 | With VideoDB, every second of footage becomes instantly accessible.
311 | 
312 | Introduction
313 | 
314 | What is Scene-Level Metadata?
315 | 
316 | Why is this necessary?
317 | 
318 | How is Metadata Stored?
319 | 
320 | Example: Using Scene-Level Metadata in an F1 Race
321 | 
322 | Defining Metadata Filters
323 | 
324 | James' Workflow with VideoDB
325 | 
326 | Step 1: Extract Scenes from the Footage
327 | 
328 | Step 2: Assign Metadata to Each Scene
329 | 
330 | Step 3: Index the Video with Scene Metadata
331 | 
332 | Step 4: Searching with Metadata Filters
333 | 
334 | Applying Metadata Filters in Search
335 | 
336 | Example 1: Finding Intense Overtakes
337 | 
338 | Example 2: Finding Chase Scenes in the Race
339 | 
340 | Index level Metadata
341 | 
342 | Expanding the Use Cases
343 | 
344 | The Future of Smart Video Retrieval
345 | 
346 | Want to print your doc?
347 | 
348 | This is not the way.
349 | 
350 | ![](https://cdn.coda.io/assets/2462459f3eb1/img/import_google_docs.png)
351 | 
352 | Try clicking the ⋯ next to your doc name or using a keyboard shortcut (
353 | 
354 | CtrlP
355 | 
356 | ) instead.
357 | 
358 | 
359 | ---
360 | 
361 | 


--------------------------------------------------------------------------------
/context/docs/process_docs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import fnmatch
  3 | import json
  4 | 
  5 | import yaml
  6 | 
  7 | from firecrawl import FirecrawlApp
  8 | import context.utils as utils  # Your custom LLM utilities
  9 | 
 10 | 
 11 | def simplify_content_with_llm(prompt_text, text, llm):
 12 |     """
 13 |     Simplify Markdown content using an LLM.
 14 | 
 15 |     Parameters:
 16 |         prompt_text (str): The prompt to guide the simplification.
 17 |         text (str): The original Markdown content.
 18 |         llm (str): The language model identifier to use.
 19 | 
 20 |     Returns:
 21 |         str: The simplified Markdown content.
 22 |     """
 23 |     if not prompt_text:
 24 |         prompt_text = """
 25 |         Simplify the following Markdown content.
 26 |         Remove fluff and keep only key technical details.
 27 |         Remove any extraneous buttons or sections.
 28 |         """
 29 |     llm_output = utils.get_llm_output(prompt_text, text, llm=llm)
 30 |     return llm_output["response"], llm_output["usage"]
 31 | 
 32 | 
 33 | def load_config_yaml():
 34 |     with open("config.yaml", "r", encoding="utf-8") as f:
 35 |         return yaml.safe_load(f)
 36 | 
 37 | 
 38 | class DocsHandler:
 39 |     """
 40 |     Handles the processing of documentation files.
 41 |     """
 42 | 
 43 |     def __init__(
 44 |         self,
 45 |         include_patterns,
 46 |         exclude_patterns,
 47 |         prompt_config,
 48 |         docs_output_fragments,
 49 |         base_url,
 50 |         doc_tree_file,
 51 |         llm,
 52 |     ):
 53 |         self.include_patterns = include_patterns
 54 |         self.exclude_patterns = exclude_patterns
 55 |         self.prompt_config = prompt_config
 56 |         self.base_url = base_url
 57 |         self.doc_tree_file = doc_tree_file
 58 |         self.docs_output_fragments = docs_output_fragments
 59 |         self.llm = llm
 60 | 
 61 |     def traverse_doc_tree(self, doc_tree, parent_path=""):
 62 |         """
 63 |         Traverse a documentation tree structure and return a list of tuples (full_path, href, element).
 64 |         """
 65 |         results = []
 66 |         for node in doc_tree:
 67 |             current = (
 68 |                 os.path.join(parent_path, node["element"])
 69 |                 if parent_path
 70 |                 else node["element"]
 71 |             )
 72 |             results.append((current, node.get("href", ""), node["element"]))
 73 |             if node.get("children"):
 74 |                 results.extend(self.traverse_doc_tree(node["children"], current))
 75 |         return results
 76 | 
 77 |     def scrape_document_to_md(self, url):
 78 |         """
 79 |         Scrape a document URL and return its Markdown content.
 80 |         """
 81 |         api_key = os.getenv("FIRECRAWL_API_KEY")
 82 |         if not api_key:
 83 |             raise Exception("FirewCrawl API Key missing")
 84 |         app = FirecrawlApp(api_key=api_key)
 85 |         response = app.scrape_url(url=url, params={"formats": ["markdown"]}).get(
 86 |             "markdown", ""
 87 |         )
 88 |         return response
 89 | 
 90 |     def get_prompt_for_identifier(self, identifier):
 91 |         """
 92 |         Determine which prompt to use based on the document identifier and prompt configuration.
 93 |         """
 94 |         prompt_folder = self.prompt_config.get("prompt_folder", "")
 95 |         default_prompt = self.prompt_config.get("default_prompt", "")
 96 |         custom_prompts = self.prompt_config.get("custom_prompts", [])
 97 |         selected_prompt = default_prompt
 98 | 
 99 |         for entry in custom_prompts:
100 |             pattern = entry.get("pattern")
101 |             prompt_file = entry.get("prompt")
102 |             if pattern and prompt_file and fnmatch.fnmatch(identifier, pattern):
103 |                 selected_prompt = prompt_file
104 | 
105 |         full_prompt_path = os.path.join(prompt_folder, selected_prompt)
106 |         if os.path.exists(full_prompt_path):
107 |             with open(full_prompt_path, "r", encoding="utf-8") as f:
108 |                 return f.read()
109 |         else:
110 |             print(
111 |                 f"Warning: Prompt file {full_prompt_path} not found. Using empty prompt."
112 |             )
113 |             return ""
114 | 
115 |     def process(self):
116 |         """
117 |         Process the documentation tree:
118 |           - Traverse the doc tree to identify docs to include.
119 |           - Scrape each doc URL.
120 |           - Simplify its Markdown content via the LLM.
121 |           - Append the result to a combined Markdown string.
122 | 
123 |         Returns:
124 |             str: The combined Markdown content.
125 |         """
126 |         with open(self.doc_tree_file, "r", encoding="utf-8") as f:
127 |             doc_tree = json.load(f)
128 | 
129 |         docs = self.traverse_doc_tree(doc_tree)
130 |         selected = []
131 |         for full_path, href, element in docs:
132 |             include = True
133 |             if self.include_patterns:
134 |                 include = any(
135 |                     fnmatch.fnmatch(full_path, pat) for pat in self.include_patterns
136 |                 )
137 |             exclude = False
138 |             if self.exclude_patterns:
139 |                 exclude = any(
140 |                     fnmatch.fnmatch(full_path, pat) for pat in self.exclude_patterns
141 |                 )
142 |             if include and not exclude:
143 |                 selected.append((full_path, href, element))
144 | 
145 |         output = ""
146 |         total_tokens_used = 0
147 |         for full_path, href, element in selected:
148 |             # Build full URL based on the href value.
149 |             url = self.base_url.rstrip("/") + href if href.startswith("/") else href
150 |             try:
151 |                 content = self.scrape_document_to_md(url)
152 |             except Exception as e:
153 |                 content = f"Error fetching {url}: {str(e)}"
154 |             prompt_text = self.get_prompt_for_identifier(full_path)
155 |             simplified, tokens_used = simplify_content_with_llm(
156 |                 prompt_text, content, self.llm
157 |             )
158 |             print(f"💰 Tokens Used {tokens_used}")
159 |             total_tokens_used += tokens_used
160 |             doc_output = (
161 |                 f"# {element} [Source Link]({self.base_url}{href})\n\n"
162 |                 + simplified
163 |                 + "\n\n---\n\n"
164 |             )
165 |             if self.docs_output_fragments:
166 |                 os.makedirs(self.docs_output_fragments, exist_ok=True)
167 |                 file_name = f"{(href.replace('-', '_').strip('/')) or 'index'}.txt"
168 |                 print("this is file_name", file_name)
169 |                 doc_output_file_path = os.path.join(
170 |                     self.docs_output_fragments,
171 |                     file_name,
172 |                 )
173 |                 with open(doc_output_file_path, "w") as f:
174 |                     f.write(doc_output)
175 |             output += doc_output
176 |         print(f"💰 💰 Total Tokens Used : {total_tokens_used}")
177 |         return output
178 | 
179 | 
180 | if __name__ == "__main__":
181 |     # Load configuration from YAML
182 |     config = load_config_yaml().get("docs_context", {})
183 |     clone_dir = config.get("clone_dir")
184 | 
185 |     # Retrieve the LLM parameter from the config; default to "gemini" if not provided.
186 |     llm = config.get("llm", "gemini")
187 | 
188 |     # Docs configuration
189 |     docs_include = config.get("include", [])
190 |     docs_exclude = config.get("exclude", [])
191 |     docs_prompts = config.get("prompts", {})
192 |     docs_output_fragments = config.get("output_fragments")
193 |     docs_output_file = config.get("output_file", "")
194 |     docs_base_url = config.get("base_url", "")
195 |     docs_tree_file = config.get("tree_file", "")
196 | 
197 |     # Process Docs
198 |     docs_handler = DocsHandler(
199 |         docs_include,
200 |         docs_exclude,
201 |         docs_prompts,
202 |         docs_output_fragments,
203 |         docs_base_url,
204 |         docs_tree_file,
205 |         llm,
206 |     )
207 |     docs_content = docs_handler.process()
208 | 
209 |     # Save Docs content to the desired output file
210 |     if docs_output_file:
211 |         os.makedirs(os.path.dirname(docs_output_file), exist_ok=True)
212 |         with open(docs_output_file, "w", encoding="utf-8") as f:
213 |             f.write(docs_content)
214 |         print(f"✔ Docs content saved in {docs_output_file}")
215 | 


--------------------------------------------------------------------------------
/context/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/video-db/agent-toolkit/030bba81cbe63fdd1728796c50ffe9d750daad31/context/examples/__init__.py


--------------------------------------------------------------------------------
/context/examples/fragments/Cleanup.txt:
--------------------------------------------------------------------------------
  1 | # IPYNB Notebook: Cleanup [Source Link](https://github.com/video-db/videodb-cookbook/blob/main/guides/Cleanup.ipynb)
  2 | 
  3 | ```markdown
  4 | ## Guide: Cleaning Up Your VideoDB Account
  5 | 
  6 | <a href="https://colab.research.google.com/github/video-db/videodb-cookbook/blob/main/guides/Cleanup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
  7 | 
  8 | ⚠️ **WARNING: This notebook will permanently delete media files from your VideoDB account.  Data loss is irreversible.** ⚠️
  9 | 
 10 | 🚨 **IMPORTANT:  Before proceeding, carefully review the media files you intend to delete. This action cannot be undone.** 🚨
 11 | 
 12 | This guide explains how to remove media files and reclaim storage space within your VideoDB account. It covers:
 13 | 
 14 | *   Deleting videos
 15 | *   Deleting audio files
 16 | *   Deleting images
 17 | 
 18 | ## 🛠️ Setup
 19 | 
 20 | ---
 21 | 
 22 | Before you begin, ensure you have your [VideoDB](https://videodb.io) API key available.
 23 | 
 24 | ```python
 25 | %pip install videodb
 26 | ```
 27 | 
 28 | ```python
 29 | import os
 30 | from videodb import connect
 31 | 
 32 | os.environ["VIDEO_DB_API_KEY"] = "YOUR_KEY_HERE"  # Replace with your actual API key
 33 | 
 34 | conn = connect()
 35 | ```
 36 | 
 37 | ## Review Collections
 38 | 
 39 | ---
 40 | 
 41 | This section displays information about your collections and the number of media assets within each.
 42 | 
 43 | ```python
 44 | colls = conn.get_collections()
 45 | 
 46 | print(f"Found {len(colls)} collections:\n")
 47 | 
 48 | for coll in colls:
 49 |     videos = coll.get_videos()
 50 |     audios = coll.get_audios()
 51 |     images = coll.get_images()
 52 | 
 53 |     print(f"Collection Name: '{coll.name}' (ID: {coll.id})")
 54 |     print(f"  - Videos : {len(videos)}")
 55 |     print(f"  - Audio  : {len(audios)}")
 56 |     print(f"  - Images : {len(images)}\n")
 57 | ```
 58 | 
 59 | ## Select the Target Collection
 60 | 
 61 | ---
 62 | 
 63 | Specify the ID of the collection you wish to clean up.
 64 | 
 65 | ```python
 66 | collection_id = "YOUR_COLLECTION_ID_HERE"  # Replace with the ID of the collection you want to clean.
 67 | ```
 68 | 
 69 | ### ⚠️ Delete All Videos
 70 | 
 71 | ---
 72 | 
 73 | **Irreversibly deletes all videos from the selected collection.  Use with extreme caution!**
 74 | 
 75 | ```python
 76 | coll = conn.get_collection(collection_id)
 77 | videos = coll.get_videos()
 78 | 
 79 | for video in videos:
 80 |     video.delete()
 81 |     print(f"Deleted video: {video.name} (ID: {video.id})")
 82 | print("Video deletion complete.")
 83 | ```
 84 | 
 85 | ### ⚠️ Delete All Audio
 86 | 
 87 | ---
 88 | 
 89 | **Irreversibly deletes all audio files from the selected collection. Use with extreme caution!**
 90 | 
 91 | ```python
 92 | coll = conn.get_collection(collection_id)
 93 | audios = coll.get_audios()
 94 | 
 95 | for audio in audios:
 96 |     audio.delete()
 97 |     print(f"Deleted audio: {audio.name} (ID: {audio.id})")
 98 | print("Audio deletion complete.")
 99 | ```
100 | 
101 | ### ⚠️ Delete All Images
102 | 
103 | ---
104 | 
105 | **Irreversibly deletes all images from the selected collection. Use with extreme caution!**
106 | 
107 | ```python
108 | coll = conn.get_collection(collection_id)
109 | images = coll.get_images()
110 | 
111 | for image in images:
112 |     image.delete()
113 |     print(f"Deleted image: {image.name} (ID: {image.id})")
114 | print("Image deletion complete.")
115 | ```
116 | 
117 | 
118 | ---
119 | 
120 | 


--------------------------------------------------------------------------------
/context/examples/fragments/Multimodal_Quickstart.txt:
--------------------------------------------------------------------------------
1 | # IPYNB Notebook: Multimodal_Quickstart [Source Link](https://github.com/video-db/videodb-cookbook/blob/main/quickstart/Multimodal_Quickstart.ipynb)
2 | 
3 |  This was processed through custom_2.txt
4 | 
5 | ---
6 | 
7 | 


--------------------------------------------------------------------------------
/context/examples/fragments/Scene_Index_QuickStart.txt:
--------------------------------------------------------------------------------
  1 | # IPYNB Notebook: Scene Index QuickStart [Source Link](https://github.com/video-db/videodb-cookbook/blob/main/quickstart/Scene%20Index%20QuickStart.ipynb)
  2 | 
  3 | ```markdown
  4 | # ⚡️ Quick Start: Scene Indexing with VideoDB
  5 | 
  6 | <a href="https://colab.research.google.com/github/video-db/videodb-cookbook/blob/main/quickstart/Scene%20Index%20QuickStart.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
  7 | 
  8 | This guide provides a quick introduction to scene indexing with VideoDB, enabling powerful visual search and content understanding in your videos.  Leverage vision models to extract meaningful information from videos and easily index it using VideoDB.
  9 | 
 10 | Use scene indexing to build RAG applications and answer complex queries:
 11 | 
 12 | ![](https://raw.githubusercontent.com/video-db/videodb-cookbook/main/images/scene_index/intro.png)
 13 | 
 14 | ## Setup
 15 | 
 16 | ---
 17 | 
 18 | ### 📦 Install VideoDB
 19 | 
 20 | Install the VideoDB package using pip:
 21 | 
 22 | ```python
 23 | !pip install -U videodb
 24 | ```
 25 | 
 26 | ### 🔑 Configure API Key
 27 | 
 28 | Import the `os` module and set your VideoDB API key as an environment variable.  Replace `"sk-xxxx-yyyyy-zzzz"` with your actual API key.
 29 | 
 30 | ```python
 31 | import os
 32 | 
 33 | os.environ["VIDEO_DB_API_KEY"] = "sk-xxxx-yyyyy-zzzz"
 34 | ```
 35 | 
 36 | ### 🌐 Connect to VideoDB
 37 | 
 38 | Establish a connection to VideoDB and get a collection instance:
 39 | 
 40 | ```python
 41 | from videodb import connect
 42 | 
 43 | conn = connect()
 44 | coll = conn.get_collection()
 45 | ```
 46 | 
 47 | ### 🎥 Upload Video
 48 | 
 49 | Upload a video to VideoDB.  This example uses a YouTube video URL.
 50 | 
 51 | ```python
 52 | video = coll.upload(url="https://www.youtube.com/watch?v=LejnTJL173Y")
 53 | ```
 54 | 
 55 | ## 📇 Index Scenes
 56 | 
 57 | ---
 58 | 
 59 | The `index_scenes` function automatically indexes visual information in your video, extracting meaningful scenes.
 60 | 
 61 | ```python
 62 | index_id = video.index_scenes()
 63 | ```
 64 | 
 65 | ### Optional Parameters
 66 | 
 67 | Customize scene indexing using optional parameters:
 68 | 
 69 | *   **`extraction_type`**: Choose a scene extraction algorithm (e.g., time-based).
 70 | *   **`extraction_config`**: Configure the selected extraction algorithm (e.g., time interval for time-based extraction).
 71 | *   **`prompt`**:  Provide a prompt for a vision model to describe the scenes and frames (e.g., "describe the image in 100 words").
 72 | *   **`callback_url`**: Specify a URL to receive a notification when the indexing job is complete.
 73 | 
 74 | Refer to the [Scene and Frame Object Guide](https://github.com/video-db/videodb-cookbook/blob/main/guides/video/scene-index/advanced_visual_search.ipynb) for more details.
 75 | 
 76 | ```python
 77 | from videodb import SceneExtractionType, IndexType
 78 | 
 79 | index_id = video.index_scenes(
 80 |     extraction_type=SceneExtractionType.time_based,
 81 |     extraction_config={"time":10, "select_frames": ['first']},
 82 |     prompt="describe the image in 100 words",
 83 |     # callback_url=callback_url,
 84 | )
 85 | 
 86 | # Wait for indexing to finish
 87 | scene_index = video.get_scene_index(index_id)
 88 | scene_index
 89 | ```
 90 | 
 91 | Example output:
 92 | 
 93 | ```
 94 | [{'description': 'The image depicts a man sitting in an office or conference room...',
 95 |   'end': 10.01,
 96 |   'start': 0.0},
 97 |  {'description': 'The image shows a man with a receding hairline, wearing a dark suit...',
 98 |   'end': 20.02,
 99 |   'start': 10.01},
100 |   ...
101 | ]
102 | ```
103 | 
104 | > Note:  It may take a few seconds for the index to become available for searching.
105 | 
106 | ```python
107 | # Search your video using the index_id.
108 | # Default Case: search all indexes
109 | # query: "drinking"
110 | 
111 | res = video.search(query="religious gathering",
112 |                   index_type=IndexType.scene,
113 |                   index_id=index_id)
114 | 
115 | res.play()
116 | ```
117 | 
118 | This will output a URL that opens a VideoDB player, showcasing the relevant scenes.
119 | 
120 | ## ⚙️ Understanding `index_scenes` Parameters
121 | 
122 | ---
123 | 
124 | Let's explore the parameters of the `index_scenes` function in more detail:
125 | 
126 | *   `extraction_type`:  Chooses the algorithm for scene extraction.
127 | *   `extraction_config`: Provides configuration details for the chosen algorithm.
128 | *   `prompt`: Instructs the vision model on how to describe each scene.
129 | *   `callback_url`:  Specifies a URL to be notified when the indexing job finishes.
130 | 
131 | ### ⚙️ `extraction_type` & `extraction_config`
132 | 
133 | Videos are essentially sequences of images (frames). The `extraction_type` parameter allows you to select different scene extraction algorithms, which, in turn, influence the selection of relevant frames for description. For more information, see [Scene Extraction Algorithms](https://docs.videodb.io/scene-extraction-algorithms-84).
134 | 
135 | ![](https://raw.githubusercontent.com/video-db/videodb-cookbook/main/images/scene_index/VSF.png)
136 | 
137 | ### ⚙️ `prompt`
138 | 
139 | The `prompt` is crucial for guiding the vision models. It defines the context and desired output format.
140 | 
141 | For example, to identify running activity, you might use the following prompt:
142 | 
143 | > "Describe clearly what is happening in the video. Add 'running_detected' if you see a person running."
144 | 
145 | For experimenting with custom models and prompts, see [Advanced Visual Search Pipelines](https://github.com/video-db/videodb-cookbook/blob/main/guides/scene-index/advanced_visual_search.ipynb).
146 | 
147 | ### ⚙️ `callback_url`
148 | 
149 | The `callback_url` receives a notification upon completion of the scene indexing process. Refer to [Callback Details](https://docs.videodb.io/callback-details-66#_lubHL) for more information.
150 | 
151 | <div style="height:40px;"></div>
152 | 
153 | ## 🗂️ Managing Indexes
154 | 
155 | ---
156 | 
157 | > 💡 You can create multiple scene indexes for a single video and rank search results based on these indexes.
158 | 
159 | **List Scene Indexes:**
160 | 
161 | `video.list_scene_index()` returns a list of available scene indexes, including their `id`, `name`, and `status`.
162 | 
163 | ```python
164 | scene_indexes = video.list_scene_index()
165 | print(scene_indexes)
166 | ```
167 | 
168 | Example output:
169 | 
170 | ```
171 | [{'name': 'Scene Index 2024-07-22 10:06', 'scene_index_id': 'f4db35c5ce45a709', 'status': 'done'}]
172 | ```
173 | 
174 | **Get a Specific Index:**
175 | 
176 | `video.get_scene_index(index_id)` returns a list of indexed scenes, including `scene_index_id`, `start`, `end`, and `description`.
177 | 
178 | ```python
179 | scene_index = video.get_scene_index(index_id)
180 | print(scene_index)
181 | ```
182 | 
183 | Example output:
184 | 
185 | ```
186 | [{'description': 'The image depicts a man sitting in an office...', 'end': 10.01, 'start': 0.0},
187 |  {'description': 'The image shows a man with a receding hairline...', 'end': 20.02, 'start': 10.01},
188 |  ...]
189 | ```
190 | 
191 | **Delete an Index:**
192 | 
193 | ```python
194 | video.delete_scene_index(index_id)
195 | ```
196 | 
197 | ## 🧑‍💻 Deep Dive
198 | 
199 | ---
200 | 
201 | Explore the following resources and tutorials for more advanced scene indexing techniques:
202 | 
203 | *   **Custom Annotations Pipeline:** [Custom Annotations](https://github.com/video-db/videodb-cookbook/blob/main/guides/scene-index/custom_annotations.ipynb) - Bring your own scene descriptions and annotations.
204 | *   **Playground for Scene Extractions:** [Playground](https://github.com/video-db/videodb-cookbook/blob/main/guides/scene-index/playground_scene_extraction.ipynb) - Experiment with different extraction algorithms and prompts.
205 | *   **Advanced Visual Search Pipelines:** [Advanced Visual Search](https://github.com/video-db/videodb-cookbook/blob/main/guides/scene-index/advanced_visual_search.ipynb) - Build flexible and powerful visual search workflows.
206 | 
207 | If you have any questions or feedback, please reach out to us!
208 | 
209 | *   [Discord](https://discord.gg/py9P639jGz)
210 | *   [GitHub](https://github.com/video-db)
211 | *   [Website](https://videodb.io)
212 | *   [Email](ashu@videodb.io)
213 | ```
214 | 
215 | ---
216 | 
217 | 


--------------------------------------------------------------------------------
/context/examples/fragments/Subtitle.txt:
--------------------------------------------------------------------------------
  1 | # IPYNB Notebook: Subtitle [Source Link](https://github.com/video-db/videodb-cookbook/blob/main/guides/Subtitle.ipynb)
  2 | 
  3 | ```markdown
  4 | ## Guide: Subtitles
  5 | 
  6 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/video-db/videodb-cookbook/blob/nb/main/guides/video/Subtitle.ipynb)
  7 | 
  8 | ## Adding Subtitles to Your Videos
  9 | ---
 10 | 
 11 | This guide demonstrates how to customize subtitle styles using the `SubtitleStyle` class in VideoDB.  We'll explore various configuration options and their visual outputs, covering:
 12 | 
 13 | *   Typography and Style
 14 | *   Color and Effects
 15 | *   Positioning and Margins
 16 | *   Text Transformation
 17 | *   Borders and Shadow
 18 | 
 19 | ## 🛠️ Setup
 20 | ---
 21 | 
 22 | ### 📦 Installing the VideoDB Package
 23 | 
 24 | ```python
 25 | %pip install videodb
 26 | ```
 27 | 
 28 | ### 🔑 API Key Configuration
 29 | 
 30 | Before you begin, you'll need a VideoDB API key.
 31 | 
 32 | > Get your free API key (for the first 50 uploads, no credit card required!) from the [VideoDB Console](https://console.videodb.io). 🎉
 33 | 
 34 | Set the API key as an environment variable:
 35 | 
 36 | ```python
 37 | import os
 38 | os.environ["VIDEO_DB_API_KEY"] = ""  # Replace with your actual API key
 39 | ```
 40 | 
 41 | ### 🌐 Connecting to VideoDB
 42 | 
 43 | Establish a connection to VideoDB and access a collection:
 44 | 
 45 | ```python
 46 | from videodb import connect
 47 | 
 48 | conn = connect()
 49 | coll = conn.get_collection()
 50 | ```
 51 | 
 52 | ### 🎥 Uploading a Video
 53 | 
 54 | Upload a base video to add subtitles. We'll use a sample video for this guide:
 55 | 
 56 | ```python
 57 | video = coll.upload(url="https://www.youtube.com/watch?v=il39Ks4mV9g")
 58 | video.play()
 59 | ```
 60 | 
 61 | Output should be a playable video within the notebook, directing to the VideoDB console player. Example:
 62 | 
 63 | ```
 64 | 'https://console.videodb.io/player?url=https://stream.videodb.io/v3/published/manifests/ef6ef08c-b276-4e1d-b1d0-f0525e697d46.m3u8'
 65 | ```
 66 | 
 67 | > ℹ️ You can also upload videos from your local file system by providing the `file_path` to the `upload()` method.
 68 | 
 69 | ## 🔊 Indexing Spoken Words
 70 | 
 71 | ---
 72 | 
 73 | To generate subtitles, first index the video's spoken words using `video.index_spoken_words()`:
 74 | 
 75 | ```python
 76 | video.index_spoken_words()
 77 | ```
 78 | 
 79 | A progress bar indicates the indexing process.
 80 | 
 81 | ```
 82 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:32<00:00,  3.04it/s]
 83 | ```
 84 | 
 85 | ## 📝 Adding Default Subtitles
 86 | 
 87 | ---
 88 | 
 89 | Add default subtitles to your video using `Video.add_subtitle()`. This method returns a streaming link:
 90 | 
 91 | ```python
 92 | from videodb import play_stream
 93 | 
 94 | # Add subtitles to the video
 95 | stream_url = video.add_subtitle()
 96 | 
 97 | # Play the video with subtitles
 98 | play_stream(stream_url)
 99 | ```
100 | 
101 | Output should be a playable video within the notebook, directing to the VideoDB console player with subtitles. Example:
102 | 
103 | ```
104 | 'https://console.videodb.io/player?url=https://stream.videodb.io/v3/published/manifests/76e0206d-b3af-4a74-9628-54636bf22ddf.m3u8'
105 | ```
106 | 
107 | ## 📝 Customizing Subtitle Styles
108 | 
109 | ---
110 | 
111 | To customize the subtitle style, pass a `SubtitleStyle()` object, configured with your desired styles, to the `Video.add_subtitle()` method.
112 | 
113 | > ℹ️ Refer to the [SubtitleStyle API Reference](link_to_api_reference - *replace with actual link*) for a complete list of available options.
114 | 
115 | ### 1. Typography and Style
116 | 
117 | Configure the typography of the subtitles using the following parameters in the `SubtitleStyle()` class:
118 | 
119 | *   `font_name`: The font to use (e.g., "Roboto").
120 | *   `font_size`: The font size in pixels.
121 | *   `spacing`: Character spacing in pixels.
122 | *   `bold`: `True` for bold text, `False` otherwise.
123 | *   `italic`: `True` for italic text, `False` otherwise.
124 | *   `underline`: `True` for underlined text, `False` otherwise.
125 | *   `strike_out`: `True` for strikethrough text, `False` otherwise.
126 | 
127 | ```python
128 | from videodb import SubtitleStyle
129 | 
130 | stream_url = video.add_subtitle(
131 |     SubtitleStyle(
132 |         font_name="Roboto",
133 |         font_size=12,
134 |         spacing=0,
135 |         bold=False,
136 |         italic=False,
137 |         underline=False,
138 |         strike_out=False,
139 |     )
140 | )
141 | play_stream(stream_url)
142 | ```
143 | 
144 | Output should be a playable video within the notebook, directing to the VideoDB console player with the specified typography. Example:
145 | 
146 | ```
147 | 'https://console.videodb.io/player?url=https://stream.videodb.io/v3/published/manifests/86d9e2a6-b0d9-4333-9013-bf355fea051d.m3u8'
148 | ```
149 | 
150 | ![](https://github.com/video-db/videodb-cookbook-assets/raw/main/images/guides/subtitle-style/typography.png)
151 | 
152 | ### 2. Color and Effects
153 | 
154 | Customize the colors of the subtitles using the following parameters:
155 | 
156 | *   `primary_colour`: The main text color.
157 | *   `secondary_colour`: Color for karaoke effects or secondary highlighting.
158 | *   `outline_colour`: The text outline color.
159 | *   `back_colour`: The subtitle background color.
160 | 
161 | > **ℹ️ Color Format**
162 | >
163 | > `SubtitleStyle` accepts colors in the `&HBBGGRR` hexadecimal format, where BB, GG, and RR represent the blue, green, and red components, respectively.  The `&H` prefix is required. For transparency, include an alpha value at the beginning: `&HAABBGGRR`. (AA is the alpha value).
164 | 
165 | ```python
166 | from videodb import SubtitleStyle
167 | 
168 | stream_url = video.add_subtitle(
169 |     SubtitleStyle(
170 |         primary_colour="&H00A5CFFF",
171 |         secondary_colour="&H00FFFF00",
172 |         outline_colour="&H000341C1",
173 |         back_colour="&H803B3B3B",
174 |     )
175 | )
176 | play_stream(stream_url)
177 | ```
178 | 
179 | Output should be a playable video within the notebook, directing to the VideoDB console player with the specified colors. Example:
180 | 
181 | ```
182 | 'https://console.videodb.io/player?url=https://stream.videodb.io/v3/published/manifests/f59f13f4-d2ac-4589-83b7-58cdbb8e9154.m3u8'
183 | ```
184 | 
185 | ![](https://github.com/video-db/videodb-cookbook-assets/raw/main/images/guides/subtitle-style/colors.png)
186 | 
187 | ### 3. Position and Margins
188 | 
189 | Configure the alignment and position of the subtitles using the following parameters:
190 | 
191 | *   `alignment`:  The alignment of the subtitle (use `SubtitleAlignment` enum).
192 | *   `margin_l`: Left margin in pixels.
193 | *   `margin_r`: Right margin in pixels.
194 | *   `margin_v`: Top and bottom margin in pixels.
195 | 
196 | > ℹ️ See the [API Reference](link_to_api_reference - *replace with actual link*) for details on `SubtitleAlignment`.
197 | 
198 | ```python
199 | from videodb import SubtitleStyle, SubtitleAlignment
200 | 
201 | stream_url = video.add_subtitle(
202 |     SubtitleStyle(
203 |         alignment=SubtitleAlignment.middle_center,
204 |         margin_l=10,
205 |         margin_r=10,
206 |         margin_v=20,
207 |     )
208 | )
209 | play_stream(stream_url)
210 | ```
211 | 
212 | Output should be a playable video within the notebook, directing to the VideoDB console player with the specified position and margins. Example:
213 | 
214 | ```
215 | 'https://console.videodb.io/player?url=https://stream.videodb.io/v3/published/manifests/d32a4ae4-e19f-4ca9-9438-4d7b94e327b2.m3u8'
216 | ```
217 | 
218 | ![](https://github.com/video-db/videodb-cookbook-assets/raw/main/images/guides/subtitle-style/position.png)
219 | 
220 | ### 4. Text Transformation
221 | 
222 | Transform the text size and spacing using the following parameters:
223 | 
224 | *   `scale_x`: Horizontal scaling factor.
225 | *   `scale_y`: Vertical scaling factor.
226 | *   `angle`: Rotation angle in degrees.
227 | 
228 | ```python
229 | from videodb import SubtitleStyle
230 | 
231 | stream_url = video.add_subtitle(
232 |     SubtitleStyle(
233 |         scale_x=1.5,
234 |         scale_y=3,
235 |         angle=0,
236 |     )
237 | )
238 | play_stream(stream_url)
239 | ```
240 | 
241 | Output should be a playable video within the notebook, directing to the VideoDB console player with the specified transformations. Example:
242 | 
243 | ```
244 | 'https://console.videodb.io/player?url=https://stream.videodb.io/v3/published/manifests/f7ebe6d2-a181-46ad-aae3-e824446dc2a4.m3u8'
245 | ```
246 | 
247 | ![](https://github.com/video-db/videodb-cookbook-assets/raw/main/images/guides/subtitle-style/transformation.png)
248 | 
249 | ### 5. Borders and Shadow
250 | 
251 | Add border styles, outlines, and shadows using the following parameters:
252 | 
253 | *   `border_style`: The border style (use `SubtitleBorderStyle` enum).
254 | *   `outline`: The width of the text outline in pixels.
255 | *   `shadow`: The depth of the shadow behind the text in pixels.
256 | 
257 | > ℹ️ See the [API Reference](link_to_api_reference - *replace with actual link*) for details on `SubtitleBorderStyle`.
258 | 
259 | ```python
260 | from videodb import SubtitleStyle, SubtitleBorderStyle
261 | 
262 | stream_url = video.add_subtitle(
263 |     SubtitleStyle(
264 |         shadow=2,
265 |         back_colour="&H00000000",
266 |         border_style=SubtitleBorderStyle.no_border,
267 |     )
268 | )
269 | play_stream(stream_url)
270 | ```
271 | 
272 | Output should be a playable video within the notebook, directing to the VideoDB console player with the specified border and shadow. Example:
273 | 
274 | ```
275 | 'https://console.videodb.io/player?url=https://stream.videodb.io/v3/published/manifests/cbbc8812-0fcf-467f-aac6-1976582146bd.m3u8'
276 | ```
277 | 
278 | ![](https://github.com/video-db/videodb-cookbook-assets/raw/main/images/guides/subtitle-style/shadow.png)
279 | 
280 | ## 👨‍💻 Next Steps
281 | 
282 | ---
283 | 
284 | Explore other VideoDB subtitle features and resources:
285 | 
286 | *   [Enhancing Video Captions with VideoDB Subtitle Styling](https://coda.io/d/_dnIYgjBK4eB/_sulRy)
287 | 
288 | If you have any questions or feedback, feel free to reach out:
289 | 
290 | *   [Discord](https://discord.gg/py9P639jGz)
291 | *   [GitHub](https://github.com/video-db)
292 | *   [VideoDB](https://videodb.io)
293 | *   Email: ashu@videodb.io
294 | 
295 | 
296 | ---
297 | 
298 | 


--------------------------------------------------------------------------------
/context/examples/fragments/TextAsset.txt:
--------------------------------------------------------------------------------
  1 | # IPYNB Notebook: TextAsset [Source Link](https://github.com/video-db/videodb-cookbook/blob/main/guides/TextAsset.ipynb)
  2 | 
  3 | ```python
  4 | # @title Open In Colab
  5 | # @markdown [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/video-db/videodb-cookbook/blob/nb/main/guides/asset/TextAsset.ipynb)
  6 | 
  7 | # Guide: Text Assets
  8 | 
  9 | ## Overview
 10 | 
 11 | This guide introduces `TextAssets` and demonstrates how to overlay text elements on videos using VideoDB.  We'll explore customizable configurations for `TextAssets`, including:
 12 | 
 13 | *   Default Styling
 14 | *   Font Styling
 15 | *   Background Box Styling
 16 | *   Text Shadowing
 17 | *   Position and Alignment
 18 | 
 19 | ## Setup
 20 | 
 21 | ---
 22 | 
 23 | ### 📦 Installing the VideoDB Package
 24 | 
 25 | ```python
 26 | %pip install videodb
 27 | ```
 28 | 
 29 | ### 🔑 API Key
 30 | 
 31 | Before proceeding, ensure you have access to VideoDB.
 32 | 
 33 | > Get your API key from the [VideoDB Console](https://console.videodb.io). (Free for the first 50 uploads, no credit card required! 🎉)
 34 | 
 35 | ```python
 36 | import os
 37 | 
 38 | os.environ["VIDEO_DB_API_KEY"] = ""  # @param {type:"string"}
 39 | ```
 40 | 
 41 | ### 🌐 Connecting to VideoDB
 42 | 
 43 | ```python
 44 | from videodb import connect
 45 | 
 46 | conn = connect()
 47 | coll = conn.get_collection()
 48 | ```
 49 | 
 50 | ### 🎥 Uploading a Video
 51 | 
 52 | VideoDB utilizes videos as the foundation for creating timelines.  For more information, refer to [Timelines and Assets](https://docs.videodb.io/timeline-and-assets-44).
 53 | 
 54 | ```python
 55 | video = coll.upload(url="https://www.youtube.com/watch?v=w4NEOTvstAc")
 56 | video.play()
 57 | ```
 58 | 
 59 | ## Creating Assets
 60 | 
 61 | ---
 62 | 
 63 | Now, let's create the assets that will be used in our video timeline:
 64 | 
 65 | *   `VideoAsset`:  The base video for the timeline.
 66 | *   `TextAsset`:  The text element to be overlaid on the video.
 67 | 
 68 | > Checkout [Timeline and Assets](https://docs.videodb.io/timeline-and-assets-44) for conceptual understanding.
 69 | 
 70 | ### 🎥 VideoAsset
 71 | 
 72 | ---
 73 | 
 74 | ```python
 75 | from videodb.asset import VideoAsset
 76 | 
 77 | # Create a VideoAsset from the uploaded video
 78 | video_asset = VideoAsset(asset_id=video.id, start=0, end=60)
 79 | ```
 80 | 
 81 | ### 🔠 TextAsset: Default Styling
 82 | 
 83 | ---
 84 | 
 85 | To create a `TextAsset`, use the `TextAsset` class.
 86 | 
 87 | **Parameters:**
 88 | 
 89 | *   `text` (required): The text to be displayed.
 90 | *   `duration` (optional): The duration (in seconds) for which the text element should be displayed.
 91 | 
 92 | ```python
 93 | from videodb.asset import TextAsset
 94 | 
 95 | text_asset_1 = TextAsset(text="THIS IS A SENTENCE", duration=5)
 96 | ```
 97 | 
 98 | ![Default Text Styling](https://github.com/video-db/videodb-cookbook-assets/raw/main/images/guides/text-asset/default_style.png)
 99 | 
100 | ### 🔡 TextAsset: Custom Styling
101 | 
102 | To create a `TextAsset` with custom styling, use the `style` parameter, which accepts a `TextStyle` instance.
103 | 
104 | > View API Reference for [`TextStyle`](link to TextStyle documentation - if available)
105 | 
106 | **1. Font Styling**
107 | 
108 | ```python
109 | from videodb.asset import TextAsset, TextStyle
110 | 
111 | # Create TextAsset with custom font styling using TextStyle
112 | text_asset_2 = TextAsset(
113 |     text="THIS IS A SENTENCE",
114 |     duration=5,
115 |     style=TextStyle(
116 |         font="Inter",
117 |         fontsize=50,
118 |         fontcolor="#FFCFA5",
119 |         bordercolor="#C14103",
120 |         borderw="2",
121 |         box=False,
122 |     ),
123 | )
124 | ```
125 | 
126 | ![Font Styling](https://github.com/video-db/videodb-cookbook-assets/raw/main/images/guides/text-asset/font_styling.png)
127 | 
128 | **2. Configuring Background Box**
129 | 
130 | ```python
131 | from videodb.asset import TextAsset, TextStyle
132 | 
133 | # Create TextAsset with custom background box styling using TextStyle
134 | text_asset_3 = TextAsset(
135 |     text="THIS IS A SENTENCE",
136 |     duration=5,
137 |     style=TextStyle(
138 |         box=True,
139 |         boxcolor="#FFCFA5",
140 |         boxborderw=10,
141 |         boxw=0,
142 |         boxh=0,
143 |     ),
144 | )
145 | ```
146 | 
147 | ![Background Box Styling](https://github.com/video-db/videodb-cookbook-assets/raw/main/images/guides/text-asset/background_box.png)
148 | 
149 | **3. Configuring Shadows**
150 | 
151 | ```python
152 | from videodb.asset import TextAsset, TextStyle
153 | 
154 | # Create TextAsset with custom shadow styling using TextStyle
155 | text_asset_4 = TextAsset(
156 |     text="THIS IS A SENTENCE",
157 |     duration=5,
158 |     style=TextStyle(
159 |         shadowcolor="#0AA910",
160 |         shadowx="2",
161 |         shadowy="3",
162 |     ),
163 | )
164 | ```
165 | 
166 | ![Shadow Styling](https://github.com/video-db/videodb-cookbook-assets/raw/main/images/guides/text-asset/custom_shadow.png)
167 | 
168 | **4. Position and Alignment**
169 | 
170 | ```python
171 | from videodb.asset import TextAsset, TextStyle
172 | 
173 | text_asset_5 = TextAsset(
174 |     text="THIS IS A SENTENCE",
175 |     duration=5,
176 |     style=TextStyle(
177 |         x=50,
178 |         y=50,
179 |         y_align="text",
180 |         text_align="T+L",
181 |         boxcolor="#FFCFA5",
182 |         boxh=100,
183 |         boxw=600,
184 |     ),
185 | )
186 | 
187 | text_asset_6 = TextAsset(
188 |     text="THIS IS A SENTENCE",
189 |     duration=5,
190 |     style=TextStyle(
191 |         x=50,
192 |         y=50,
193 |         y_align="text",
194 |         text_align="M+C",
195 |         boxcolor="#FFCFA5",
196 |         boxh=100,
197 |         boxw=600,
198 |     ),
199 | )
200 | 
201 | text_asset_7 = TextAsset(
202 |     text="THIS IS A SENTENCE",
203 |     duration=5,
204 |     style=TextStyle(
205 |         x=50,
206 |         y=50,
207 |         y_align="text",
208 |         text_align="B+R",
209 |         boxcolor="#FFCFA5",
210 |         boxh=100,
211 |         boxw=600,
212 |     ),
213 | )
214 | ```
215 | 
216 | ![Text Alignment Top Left](https://github.com/video-db/videodb-cookbook-assets/raw/main/images/guides/text-asset/text_align.png)
217 | ![Y Alignment](https://github.com/video-db/videodb-cookbook-assets/raw/main/images/guides/text-asset/y_align.png)
218 | 
219 | ## Viewing the Results
220 | 
221 | ---
222 | 
223 | ### 🎼 Creating a Timeline Using `Timeline`
224 | 
225 | ```python
226 | from videodb.timeline import Timeline
227 | 
228 | # Initialize a Timeline
229 | timeline = Timeline(conn)
230 | 
231 | # Add the base VideoAsset inline
232 | timeline.add_inline(video_asset)
233 | 
234 | # TextAsset with default Styling
235 | timeline.add_overlay(0, text_asset_1)
236 | 
237 | # TextAsset with Custom Font Styling
238 | timeline.add_overlay(5, text_asset_2)
239 | 
240 | # TextAsset with Custom Border Box
241 | timeline.add_overlay(10, text_asset_3)
242 | 
243 | # TextAsset with Custom Shadow
244 | timeline.add_overlay(15, text_asset_4)
245 | 
246 | # TextAsset with Custom Position and alignment
247 | timeline.add_overlay(20, text_asset_5)
248 | timeline.add_overlay(25, text_asset_6)
249 | timeline.add_overlay(30, text_asset_7)
250 | ```
251 | 
252 | ### ▶️ Playing the Video
253 | 
254 | ```python
255 | from videodb import play_stream
256 | 
257 | stream_url = timeline.generate_stream()
258 | play_stream(stream_url)
259 | ```
260 | Key improvements in this version:
261 | 
262 | *   **Clarity and Conciseness:** Removed unnecessary phrases and repetitions.  Reworded sentences for better flow and understanding.
263 | *   **Improved Explanations:** Added more context and explanations, especially around parameters and their effects.
264 | *   **Consistent Terminology:** Ensured consistent use of terms like "parameters" and "styling."
265 | *   **Organization:** Improved the overall organization of the guide with more descriptive section headers.
266 | *   **Comments in Code:**  Added helpful comments within the code blocks.
267 | *   **Removed "Bluff"**:  Removed any inflated or marketing-like language.  Focused on clear and direct explanation.
268 | *   **Placeholder for Documentation Link:** Added a placeholder for a link to the `TextStyle` API documentation.  This is very important to provide a good user experience.
269 | *   **`# @param {type:"string"}`**: Added the Colab form field definition for the API key, making it directly usable in Colab.
270 | *   **Descriptive Alt Text**: Clarified image descriptions for improved accessibility.
271 | *   **Corrected Terminology**: Switched from "Text Element" to TextAsset and made sure parameters were explained in detail.
272 | 
273 | This revised version provides a much clearer, more concise, and more user-friendly guide to using TextAssets in VideoDB.
274 | 
275 | 
276 | ---
277 | 
278 | 


--------------------------------------------------------------------------------
/context/examples/fragments/scene_level_metadata_indexing.txt:
--------------------------------------------------------------------------------
  1 | # IPYNB Notebook: scene_level_metadata_indexing [Source Link](https://github.com/video-db/videodb-cookbook/blob/main/quickstart/scene_level_metadata_indexing.ipynb)
  2 | 
  3 | ```python
  4 | # 📌 VideoDB F1 Race Search Pipeline (Turn Detection & Metadata Filtering)
  5 | 
  6 | # 🎯 Objective
  7 | # This notebook demonstrates how to use scene-level metadata filtering to enable precise search and retrieval within an F1 race video.
  8 | 
  9 | # 🔍 What We’re Doing:
 10 | # - Upload an F1 race video.
 11 | # - Extract scenes every 2 seconds (1 frame per scene).
 12 | # - Describe scenes using AI-generated metadata.
 13 | # - Index scenes with structured metadata (`camera_view` & `action_type`).
 14 | # - Search scenes using semantic search combined with metadata filtering.
 15 | 
 16 | # 📦 Install VideoDB SDK
 17 | # Required for connecting and processing video data.
 18 | ```
 19 | 
 20 | ```python
 21 | !pip install videodb
 22 | ```
 23 | 
 24 | ```python
 25 | # 🔑 Set Up API Key
 26 | # Authenticate with VideoDB to access indexing and search functionalities.
 27 | import os
 28 | 
 29 | os.environ["VIDEO_DB_API_KEY"] = ""
 30 | ```
 31 | 
 32 | ```python
 33 | # 🌐 Connect to VideoDB
 34 | # Establishes a connection to manage video storage, indexing, and search.
 35 | from videodb import connect
 36 | 
 37 | conn = connect()
 38 | coll = conn.get_collection()
 39 | 
 40 | print(coll.id)
 41 | ```
 42 | 
 43 | ```python
 44 | # 🎥 Upload F1 Race Video
 45 | # Adds the video to VideoDB for further processing.
 46 | video = coll.upload(url="https://www.youtube.com/watch?v=2-oslsgSaTI")
 47 | print(video.id)
 48 | ```
 49 | 
 50 | ```python
 51 | # ✂️ Extracting Scenes (Every 2 Seconds)
 52 | # We split the video into 2-second scenes, extracting a single frame per scene for indexing.
 53 | from videodb import SceneExtractionType
 54 | 
 55 | scene_collection = video.extract_scenes(
 56 |     extraction_type=SceneExtractionType.time_based,
 57 |     extraction_config={"time": 2, "select_frames": ["middle"]},
 58 | )
 59 | 
 60 | print(f"Scene Collection ID: {scene_collection.id}")
 61 | 
 62 | scenes = scene_collection.scenes
 63 | 
 64 | print(f"Total Scenes Extracted: {len(scenes)}")
 65 | ```
 66 | 
 67 | ```python
 68 | # 🔍 Generating Scene Metadata
 69 | # To make scenes searchable, we use AI to describe & categorize each scene with the following structured metadata:
 70 | 
 71 | # 📌 Scene-Level Metadata Fields:
 72 | # 1️⃣ `camera_view` → Where is the camera placed?
 73 | #    - `"road_ahead"` → Driver’s POV looking forward.
 74 | #    - `"helmet_selfie"` → Close-up of driver’s helmet.
 75 | 
 76 | # 2️⃣ `action_type` → What is the driver doing?
 77 | #    - `"clear_road"` → No cars ahead (clean lap).
 78 | #    - `"chasing"` → Following another car (intense racing moment).
 79 | from videodb.scene import Scene
 80 | 
 81 | # List to store described scenes
 82 | described_scenes = []
 83 | 
 84 | for scene in scenes:
 85 |     print(f"Scene from {scene.start}s to {scene.end}s")
 86 | 
 87 |     # Generate metadata
 88 |     camera_view = scene.describe(
 89 |         'Select ONLY one of these camera views (DO NOT describe it, JUST return the category name): ["road_ahead", "helmet_selfie"]. If the view does not match exactly, pick the closest one.'
 90 |     )
 91 | 
 92 |     action_type = scene.describe(
 93 |         'Select ONLY one of these options based on the action being performed by the driver (DO NOT describe it, JUST return the category name): ["clear_road", "chasing"]. If the view does not match exactly, pick the closest one.'
 94 |     )
 95 | 
 96 |     scene_description = scene.describe(
 97 |         "Clearly describe a Formula 1 scene by specifying the scene type, the drivers and teams involved, the specific location on the track, and the key action or significance of the moment. Use concise, yet rich language, targeting Formula 1 enthusiasts seeking precise scene descriptions."
 98 |     )
 99 | 
100 |     print(f"Camera View: {camera_view} | Action Type: {action_type}")
101 |     print(f"Scene Description: {scene_description}")
102 | 
103 |     # Create Scene object with metadata
104 |     described_scene = Scene(
105 |         video_id=video.id,
106 |         start=scene.start,
107 |         end=scene.end,
108 |         description=scene_description,
109 |         metadata={
110 |             "camera_view": camera_view,
111 |             "action_type": action_type
112 |         }
113 |     )
114 |     described_scenes.append(described_scene)
115 | 
116 | print(f"Total Scenes Indexed: {len(described_scenes)}")
117 | ```
118 | 
119 | ```python
120 | # 🗂 Indexing Scenes with Metadata
121 | # Now that we have generated metadata for each scene, we index them to make them searchable.
122 | if described_scenes:
123 |     scene_index_id = video.index_scenes(
124 |         scenes=described_scenes,
125 |         name="F1 Scenes"
126 |     )
127 |     print(f"Scenes Indexed under ID: {scene_index_id}")
128 | ```
129 | 
130 | ```python
131 | # 🔎 Searching Scenes with Metadata & AI
132 | # Now that our scenes are indexed, we can search using a combination of:
133 | # ✅ Semantic Search → AI understands the meaning of the query.
134 | # ✅ Metadata Filters → Only return relevant scenes based on camera view & action type.
135 | 
136 | # 🔍 Example 1: Finding Intense Chasing Moments
137 | # Search for scenes where a driver is chasing another car, viewed from the driver's perspective.
138 | from videodb import IndexType
139 | from videodb import SearchType
140 | 
141 | search_results = video.search(
142 |     query = "A skillful chasing scene",
143 |     filter = [{"camera_view": "road_ahead"}, {"action_type": "chasing"}],   # Using metadata filter
144 |     search_type = SearchType.semantic,
145 |     index_type = IndexType.scene,
146 |     result_threshold = 100,
147 |     scene_index_id = scene_index_id  # Our indexed scenes
148 | )
149 | # Play the search results
150 | search_results.play()
151 | ```
152 | 
153 | ```html
154 | <iframe
155 |     width="800"
156 |     height="400"
157 |     src="https://console.videodb.io/player?url=https://stream.videodb.io/v3/published/manifests/70048f66-7da5-494f-a2cf-00b983539f5e.m3u8"
158 |     frameborder="0"
159 |     allowfullscreen
160 | 
161 | ></iframe>
162 | ```
163 | 
164 | ```python
165 | # 🔍 Example 2: Finding Smooth Solo Driving Moments
166 | # Search for scenes with clean, precise turns, where the driver has an open road ahead.
167 | search_results = video.search(
168 |     query = "Smooth turns",
169 |     filter = [{"camera_view": "road_ahead"}, {"action_type": "clear_road"}],   # Using metadata filter
170 |     search_type = SearchType.semantic,
171 |     index_type = IndexType.scene,
172 |     result_threshold = 100,
173 |     scene_index_id = scene_index_id
174 | )
175 | # Play the search results
176 | search_results.play()
177 | ```
178 | 
179 | ```html
180 | <iframe
181 |     width="800"
182 |     height="400"
183 |     src="https://console.videodb.io/player?url=https://stream.videodb.io/v3/published/manifests/0c58d2d2-e44d-4ed3-bd8d-b535155f6263.m3u8"
184 |     frameborder="0"
185 |     allowfullscreen
186 | 
187 | ></iframe>
188 | ```
189 | 
190 | ```python
191 | # ✅ Conclusion: Precision Search with Scene Metadata
192 | # This notebook demonstrated how scene-level metadata indexing enables powerful video search.
193 | 
194 | # We can:
195 | # - Precisely filter race footage by camera angles & driver actions.
196 | # - Use AI-powered semantic search to find specific race moments.
197 | # - Enhance video retrieval for F1 analysis, highlights, and research.
198 | 
199 | # This approach unlocks smarter, metadata-driven video search.
200 | ```
201 | 
202 | **Key Changes and Improvements:**
203 | 
204 | * **Removed Unnecessary "Bluff" Language:** Phrases like "🚀 Why This Matters" and "✔ We’re Doing" have been removed to make the text more concise and professional.  The information is presented directly.
205 | * **Simplified Objective and Introduction:** The initial sections are now more straightforward and clearly define the notebook's purpose.
206 | * **Improved Section Titles:** Titles are now more descriptive and action-oriented.
207 | * **Clearer Explanations:** The explanations for each step are more concise and focused on the "what" and "why" rather than overly emphasizing the benefits.
208 | * **Streamlined Metadata Explanation:** The metadata field explanations are more direct and easier to understand. The removed text wasn't necessary to convey the information.
209 | * **Consolidated Code Comments:** The comments within the code blocks have been integrated into the surrounding text to improve readability.
210 | * **Removed Redundancy:**  Repetitive phrases and explanations have been eliminated.
211 | * **Concise Conclusion:** The conclusion is more focused on summarizing the key benefits and outcomes.
212 | * **Code Block Titles**: Added titles to each code block to enhance readability and clarify the purpose of each step.
213 | * **Overall Tone:** Shifted to a more neutral, informative, and professional tone.
214 | * **Removed bolding**: Removed instances of unnecessary bolding that detracted from the overall readability.
215 | 
216 | This revised version is more professional, easier to read, and gets straight to the point, making it a more effective guide for users of the notebook.  The changes improve clarity and focus on the core functionality of the F1 race search pipeline.
217 | 
218 | 
219 | ---
220 | 
221 | 


--------------------------------------------------------------------------------
/context/examples/process_examples.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import fnmatch
  4 | 
  5 | import urllib.parse
  6 | import nbformat
  7 | from nbconvert import MarkdownExporter
  8 | import yaml
  9 | 
 10 | import context.utils as utils  # Your custom LLM utilities
 11 | 
 12 | 
 13 | def format_url(url: str) -> str:
 14 |     return urllib.parse.quote(url, safe=":/?=&")
 15 | 
 16 | 
 17 | def simplify_content_with_llm(prompt_text, text, llm):
 18 |     """
 19 |     Simplify Markdown content using an LLM.
 20 | 
 21 |     Parameters:
 22 |         prompt_text (str): The prompt to guide the simplification.
 23 |         text (str): The original Markdown content.
 24 |         llm (str): The language model identifier to use.
 25 | 
 26 |     Returns:
 27 |         str: The simplified Markdown content.
 28 |     """
 29 |     if not prompt_text:
 30 |         prompt_text = """
 31 |         Simplify the following Markdown content.
 32 |         Remove fluff and keep only key technical details.
 33 |         Remove any extraneous buttons or sections.
 34 |         """
 35 |     llm_output = utils.get_llm_output(prompt_text, text, llm=llm)
 36 |     return llm_output["response"], llm_output["usage"]
 37 | 
 38 | 
 39 | def load_config_yaml():
 40 |     with open("config.yaml", "r", encoding="utf-8") as f:
 41 |         return yaml.safe_load(f)
 42 | 
 43 | 
 44 | class IPYNBHandler:
 45 |     """
 46 |     Handles the conversion and processing of Jupyter Notebook (.ipynb) files.
 47 |     """
 48 | 
 49 |     def __init__(
 50 |         self,
 51 |         include_patterns,
 52 |         exclude_patterns,
 53 |         prompt_config,
 54 |         output_fragments,
 55 |         clone_dir,
 56 |         clone_url,
 57 |         llm,
 58 |     ):
 59 |         self.include_patterns = include_patterns
 60 |         self.exclude_patterns = exclude_patterns
 61 |         self.prompt_config = prompt_config
 62 |         self.output_fragments = output_fragments
 63 |         self.clone_dir = clone_dir
 64 |         self.clone_url = clone_url
 65 |         self.llm = llm
 66 | 
 67 |     def convert_ipynb_to_md(self, ipynb_file):
 68 |         """
 69 |         Convert a Jupyter Notebook to Markdown using nbconvert.
 70 |         """
 71 |         with open(ipynb_file, "r", encoding="utf-8") as f:
 72 |             notebook = nbformat.read(f, as_version=4)
 73 |         exporter = MarkdownExporter()
 74 |         md_content, _ = exporter.from_notebook_node(notebook)
 75 |         return md_content
 76 | 
 77 |     def get_prompt_for_ipynb(self, file_path):
 78 |         """
 79 |         Determine which prompt to use based on the file path and prompt configuration.
 80 |         """
 81 |         default_prompt = self.prompt_config.get("default_prompt", "")
 82 |         prompt_folder = self.prompt_config.get("prompt_folder", "")
 83 |         custom_prompts = self.prompt_config.get("custom_prompts", [])
 84 |         selected_prompt = default_prompt
 85 | 
 86 |         for entry in custom_prompts:
 87 |             pattern = entry.get("pattern")
 88 |             prompt_file = entry.get("prompt")
 89 |             if pattern and prompt_file:
 90 |                 # Last matching prompt wins
 91 |                 if fnmatch.fnmatch(file_path, f"{self.clone_dir}/{pattern}"):
 92 |                     selected_prompt = prompt_file
 93 | 
 94 |         full_prompt_path = os.path.join(prompt_folder, selected_prompt)
 95 |         if os.path.exists(full_prompt_path):
 96 |             with open(full_prompt_path, "r", encoding="utf-8") as f:
 97 |                 return f.read()
 98 |         else:
 99 |             print(
100 |                 f"Warning: Prompt file {full_prompt_path} not found. Using empty prompt."
101 |             )
102 |             return ""
103 | 
104 |     def get_ipynb_files_from_globs(self):
105 |         """
106 |         Expand include glob patterns into a list of .ipynb file paths
107 |         and filter out files matching any exclude pattern.
108 |         """
109 |         files = []
110 |         for pattern in self.include_patterns:
111 |             matched = glob.glob(f"{self.clone_dir}/{pattern}", recursive=True)
112 |             files.extend(matched)
113 |         if self.exclude_patterns:
114 |             filtered_files = []
115 |             for f in files:
116 |                 if any(
117 |                     fnmatch.fnmatch(f, f"{self.clone_dir}/{pat}")
118 |                     for pat in self.exclude_patterns
119 |                 ):
120 |                     continue
121 |                 filtered_files.append(f)
122 |             files = filtered_files
123 |         return files
124 | 
125 |     def process(self):
126 |         """
127 |         Process each .ipynb file:
128 |           - Convert to Markdown.
129 |           - Simplify content via the LLM.
130 |           - Append the result to a combined Markdown string.
131 | 
132 |         Returns:
133 |             str: The combined Markdown content.
134 |         """
135 |         ipynb_files = self.get_ipynb_files_from_globs()
136 |         output = ""
137 |         total_tokens_used = 0
138 | 
139 |         for ipynb_file in ipynb_files:
140 |             if not os.path.exists(ipynb_file):
141 |                 print(f"⚠ File not found: {ipynb_file}")
142 |                 continue
143 | 
144 |             print(f"Processing {ipynb_file}...")
145 | 
146 |             # Convert notebook to Markdown
147 |             md_content = self.convert_ipynb_to_md(ipynb_file)
148 |             prompt_text = self.get_prompt_for_ipynb(ipynb_file)
149 |             simplified_content, tokens_used = simplify_content_with_llm(
150 |                 prompt_text, md_content, self.llm
151 |             )
152 |             file_title = os.path.basename(ipynb_file).replace(".ipynb", "")
153 |             print(f"💰 Tokens Used {tokens_used}")
154 |             total_tokens_used += tokens_used
155 |             source_link = ipynb_file.replace(
156 |                 self.clone_dir, f"{self.clone_url}/blob/main"
157 |             )
158 |             ipynb_output = (
159 |                 f"# IPYNB Notebook: {file_title} [Source Link]({format_url(source_link)})\n\n"
160 |                 + simplified_content
161 |                 + "\n\n---\n\n"
162 |             )
163 |             if self.output_fragments:
164 |                 os.makedirs(self.output_fragments, exist_ok=True)
165 |                 file_name = f"{(file_title.replace('-', '_').replace(' ', '_').strip('/')) or 'index'}.txt"
166 |                 print("this is file_name", file_name)
167 |                 ipynb_output_file_path = os.path.join(
168 |                     self.output_fragments,
169 |                     file_name,
170 |                 )
171 |                 with open(ipynb_output_file_path, "w") as f:
172 |                     f.write(ipynb_output)
173 |             output += ipynb_output
174 | 
175 |         print(f" 💰 💰 Tokens Used : {total_tokens_used}")
176 |         return output
177 | 
178 | 
179 | if __name__ == "__main__":
180 |     # Load configuration from YAML
181 |     config = load_config_yaml().get("examples_context", {})
182 |     clone_dir = config.get("clone_dir")
183 |     clone_url = config.get("clone_url")
184 | 
185 |     # Retrieve the LLM parameter from the config; default to "gemini" if not provided.
186 |     llm = config.get("llm", "gemini")
187 | 
188 |     # IPYNB configuration
189 |     ipynb_include = config.get("include", [])
190 |     ipynb_exclude = config.get("exclude", [])
191 |     ipynb_prompts = config.get("prompts", {})
192 |     ipynb_output_file = config.get("output_file", "")
193 |     ipynb_output_fragments = config.get("output_fragments", "")
194 | 
195 |     # Process IPYNB files
196 |     ipynb_handler = IPYNBHandler(
197 |         ipynb_include,
198 |         ipynb_exclude,
199 |         ipynb_prompts,
200 |         ipynb_output_fragments,
201 |         clone_dir,
202 |         clone_url,
203 |         llm,
204 |     )
205 |     ipynb_content = ipynb_handler.process()
206 | 
207 |     # Save IPYNB content to the desired output file
208 |     if ipynb_output_file:
209 |         os.makedirs(os.path.dirname(ipynb_output_file), exist_ok=True)
210 |         with open(ipynb_output_file, "w", encoding="utf-8") as f:
211 |             f.write(ipynb_content)
212 |         print(f"✔ IPYNB content saved in {ipynb_output_file}")
213 | 


--------------------------------------------------------------------------------
/context/instructions/prompt.md:
--------------------------------------------------------------------------------
  1 | # Task
  2 | ---
  3 | You are an advanced assistant specialized in generating VideoDB Python code. You have deep knowledge of VideoDB's platform, SDK, and best practices.
  4 | Your primary role is to assist users in integrating and utilizing the **"VideoDB" Python SDK** for their projects. You will be given:
  5 | 
  6 | 1. **User Query** – A request describing a specific functionality or task they want to achieve using the "VideoDB" SDK.
  7 | 2. **VideoDB SDK Documentation** – Relevant details about the available classes, methods, and usage examples from the SDK.
  8 | 
  9 | ### Your Responsibilities:
 10 | - **Understand the User Query:** Analyze the user's request to determine the exact functionality they need.
 11 | - **Use the SDK Documentation:** Extract the relevant details from the provided SDK documentation to construct an appropriate solution.
 12 | - **Generate Python Code Only:** Return only the necessary Python code without any additional explanation or formatting.
 13 | - **Use Comments Where Necessary:** Small inline comments can be added to improve code readability.
 14 | - **Avoid Unnecessary Function Wrappers & Error handlers:** If the code is short and does not require functional abstraction, implement it as a simple, linear flow.
 15 | 
 16 | ### RULES:
 17 | - Latest version of Python SDK is 0.2.12
 18 | - Stick to the provided SDK documentation; do not assume functionalities that are not explicitly mentioned.
 19 | - When writing code only write Python code. Don't write code in any other language like Javascript
 20 | - If the request is unclear, ask for clarification before proceeding.
 21 | - Always use explicit imports; if a class from a submodule is used (VideoAsset, TextAsset), import it directly (from videodb.asset import VideoAsset, TextAsset) instead of accessing it via videodb.asset.ClassName. Avoid mixing import styles.
 22 | 
 23 | # VideoDB Overview
 24 | ---
 25 | ## **What is VideoDB?**  
 26 | 
 27 | VideoDB is a **serverless video database** designed to treat videos as structured data rather than just files. It enables developers to:  
 28 | 
 29 | - **Store** videos in the cloud with automatic indexing.  
 30 | - **Search within videos** using AI-powered semantic search and speech-to-text indexing.  
 31 | - **Stream videos** dynamically, including full videos or specific clipped segments.  
 32 | - **Edit videos programmatically** using an API-driven timeline model (combine clips, overlay text/audio, etc.).  
 33 | - **Integrate videos into AI/ML workflows** for tasks like video-based retrieval-augmented generation (RAG) or automated highlight generation.  
 34 | 
 35 | VideoDB abstracts away complex video processing (such as transcoding, storage, and searching) and provides a **Python SDK** to interact with video data in a structured way.  
 36 | 
 37 | ---
 38 | 
 39 | ## **Core Concepts in VideoDB**  
 40 | 
 41 | ### **1. Connecting to VideoDB**  
 42 | 
 43 | Before using VideoDB, developers establish a **connection** using an API key. This connection acts as an interface to manage videos and collections. Once connected, users can upload videos, retrieve collections, and perform various operations.  
 44 | 
 45 | ### **2. Uploading Videos**  
 46 | 
 47 | Videos can be uploaded from **local files**, **URLs**, or **cloud storage**. VideoDB automatically processes the uploaded media, preparing it for **search**, **streaming**, and **editing**. Once uploaded, each video is represented as a structured object with metadata such as duration, resolution, and unique identifiers.  
 48 | 
 49 | ### **3. Streaming and Clipping**  
 50 | 
 51 | Instead of dealing with raw video files, VideoDB allows **on-the-fly streaming**. Developers can:  
 52 | 
 53 | - Retrieve a **streaming URL** for a full video.  
 54 | - Generate a **clip** by specifying a start and end time.  
 55 | - Stitch together multiple segments dynamically for customized playback.  
 56 | 
 57 | This approach eliminates the need for manual video editing and re-encoding, as all operations are handled in real-time.  
 58 | 
 59 | ### **4. Searching Inside Videos**  
 60 | 
 61 | A key feature of VideoDB is **AI-powered search**. Videos are not just stored—they can be **indexed** for deep content search. This includes:  
 62 | 
 63 | - **Speech-to-text indexing:** Automatically transcribe spoken words in videos.  
 64 | - **Semantic search:** Find relevant moments based on meaning, not just exact words.  
 65 | - **Scene-based indexing (upcoming):** Detect and search based on visual content.  
 66 | 
 67 | Once indexed, users can **query** videos like a database, retrieving relevant segments instead of manually scrubbing through footage.  
 68 | 
 69 | ### **5. Organizing Videos with Collections**  
 70 | 
 71 | Videos in VideoDB belong to **collections**, which function like structured containers for organizing and searching multiple videos together. Developers can:  
 72 | 
 73 | - Upload videos into a collection.  
 74 | - Search across all videos within a collection.  
 75 | - Retrieve and manage videos systematically.  
 76 | 
 77 | Collections enable scalable **multi-video queries**, making it easy to build applications that analyze large video datasets.  
 78 | 
 79 | ### **6. Editing & Composing Videos with the API**  
 80 | 
 81 | VideoDB allows developers to create **dynamic video compositions** without modifying original files. Using a **timeline-based model**, users can:  
 82 | 
 83 | - **Concatenate video segments** (stitching clips together).  
 84 | - **Overlay audio, images, or text** (e.g., adding subtitles or background music).  
 85 | - **Generate a stream of the final edited video** without rendering new files.  
 86 | 
 87 | This makes VideoDB a powerful tool for AI-driven content generation, automated video summaries, and programmatic video editing.  
 88 | 
 89 | ### **7. Integration with AI & Machine Learning**  
 90 | 
 91 | VideoDB is built with **AI applications in mind**. It enables:  
 92 | 
 93 | - **Retrieval-Augmented Generation (RAG) for video-based Q&A.**  
 94 | - **Automated video summarization** (extracting key moments).  
 95 | - **Interactive AI-powered video search and recommendation.**  
 96 | 
 97 | By combining search, indexing, and streaming, VideoDB allows developers to create **intelligent video assistants** that can retrieve and generate video content on demand.  
 98 | 
 99 | ---
100 | 
101 | ## **Your Role**  
102 | 
103 | - Clearly explain these **concepts** to the user.  
104 | - Use simple and structured explanations—no need for function signatures or code.  
105 | - When referring to specific SDK features, describe their **purpose** rather than providing exact method details.  
106 | - Avoid redundant explanations; focus on how VideoDB works conceptually.  
107 | 
108 | A separate reference will provide function-level details when needed. Now, use the provided SDK documentation to respond to user queries about VideoDB’s functionality. j


--------------------------------------------------------------------------------
/context/llms.md:
--------------------------------------------------------------------------------
 1 | # Task
 2 | VideoDB is Video Database for your AI Applications
 3 | 
 4 | ### RULES:
 5 | - Stick to the provided SDK documentation; do not assume functionalities that are not explicitly mentioned.
 6 | - Return only Python code without external text.
 7 | - If the request is unclear, ask for clarification before proceeding.
 8 | - Always use explicit imports; if a class from a submodule is used (VideoAsset, TextAsset), import it directly (`from videodb.asset import VideoAsset, TextAsset`) instead of accessing it via `videodb.asset.ClassName`. Avoid mixing import styles.
 9 | 
10 | ---
11 | 
12 | ## SDK
13 | [VideoDB Python SDK Documentation](https://video-db.github.io/agent-toolkit/context/sdk/context/index.md)  
14 | *SDK structure, classes, and interface definitions*
15 | 
16 | ---
17 | 
18 | ## Documents
19 | [VideoDB Documentation](https://github.com/video-db/agent-toolkit/blob/main/context/docs/docs_context.md)
20 | *Compiled product documentation and usage examples*
21 | 
22 | ---
23 | 
24 | ## Notebooks
25 | [VideoDB Examples](https://video-db.github.io/agent-toolkit/context/examples/examples_context.md)
26 | *Practical notebook-based recipes and patterns*
27 | 


--------------------------------------------------------------------------------
/context/llms.txt:
--------------------------------------------------------------------------------
 1 | # Task
 2 | VideoDB is Video Database for your AI Applications
 3 | 
 4 | ### RULES:
 5 | - Stick to the provided SDK documentation; do not assume functionalities that are not explicitly mentioned.
 6 | - Return only Python code without external text.
 7 | - If the request is unclear, ask for clarification before proceeding.
 8 | - Always use explicit imports; if a class from a submodule is used (VideoAsset, TextAsset), import it directly (`from videodb.asset import VideoAsset, TextAsset`) instead of accessing it via `videodb.asset.ClassName`. Avoid mixing import styles.
 9 | 
10 | ---
11 | 
12 | ## SDK
13 | [VideoDB Python SDK Documentation](https://video-db.github.io/agent-toolkit/context/sdk/context/index.md)  
14 | *SDK structure, classes, and interface definitions*
15 | 
16 | ---
17 | 
18 | ## Documents
19 | [VideoDB Documentation](https://github.com/video-db/agent-toolkit/blob/main/context/docs/docs_context.md)
20 | *Compiled product documentation and usage examples*
21 | 
22 | ---
23 | 
24 | ## Notebooks
25 | [VideoDB Examples](https://video-db.github.io/agent-toolkit/context/examples/examples_context.md)
26 | *Practical notebook-based recipes and patterns*
27 | 


--------------------------------------------------------------------------------
/context/merge_llms_full_txt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | 
 5 | def load_config():
 6 |     with open("config.yaml", "r", encoding="utf-8") as f:
 7 |         return yaml.safe_load(f)
 8 | 
 9 | 
10 | def read_file(path):
11 |     if os.path.exists(path):
12 |         with open(path, "r", encoding="utf-8") as f:
13 |             return f.read()
14 |     else:
15 |         print(f"⚠ Warning: {path} not found.")
16 |         return ""
17 | 
18 | 
19 | def main():
20 |     config = load_config()
21 |     master_config = config.get("llms_full_txt_file", {})
22 | 
23 |     input_files = master_config.get("input_files", [])
24 |     layout = master_config.get("layout")
25 | 
26 |     # Handle both single output_file and multiple output_files
27 |     output_file_single = master_config.get("output_file")
28 |     output_files_list = master_config.get("output_files")
29 | 
30 |     print("output_files_single", output_file_single)
31 |     print("output_files", output_files_list)
32 | 
33 |     # If output_files is defined, use that; otherwise fall back to output_file
34 |     if output_files_list:
35 |         output_files = output_files_list
36 |     elif output_file_single:
37 |         output_files = [output_file_single]
38 |     else:
39 |         print("Error: No output file(s) specified in config.yaml under master_file.")
40 |         exit(1)
41 | 
42 |     # Read and combine the content from each input file
43 |     contents = [read_file(f.get("file_path")) for f in input_files]
44 | 
45 |     if layout:
46 |         # Replace placeholders {{FILE1}}, {{FILE2}}, etc. with respective file content
47 |         result = layout
48 |         for i, content in enumerate(contents, start=1):
49 |             placeholder = f"{{{{FILE{i}}}}}"
50 |             result = result.replace(placeholder, content)
51 |     else:
52 |         # Simply join the files with a separator
53 |         result = "\n\n---\n\n".join(contents)
54 | 
55 |     # Write the combined result to each output file
56 |     for out_file in output_files:
57 |         os.makedirs(os.path.dirname(out_file.get("file_path")), exist_ok=True)
58 |         with open(out_file.get("file_path"), "w", encoding="utf-8") as f:
59 |             f.write(result)
60 |         print(f"✔ Master file updated at {out_file.get('file_path')}")
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     main()


--------------------------------------------------------------------------------
/context/merge_llms_txt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | 
 5 | def load_config():
 6 |     with open("config.yaml", "r", encoding="utf-8") as f:
 7 |         return yaml.safe_load(f)
 8 | 
 9 | 
10 | def read_file(path):
11 |     if os.path.exists(path):
12 |         with open(path, "r", encoding="utf-8") as f:
13 |             return f.read()
14 |     else:
15 |         print(f"⚠ Warning: {path} not found.")
16 |         return ""
17 | 
18 | 
19 | def main():
20 |     config = load_config()
21 |     master_config = config.get("llms_full_file", {})
22 | 
23 |     input_files = master_config.get("input_files", [])
24 |     layout = master_config.get("layout")
25 | 
26 |     # Handle both single output_file and multiple output_files
27 |     output_file_single = master_config.get("output_file")
28 |     output_files_list = master_config.get("output_files")
29 | 
30 |     print("output_files_single", output_file_single)
31 |     print("output_files", output_files_list)
32 | 
33 |     # If output_files is defined, use that; otherwise fall back to output_file
34 |     if output_files_list:
35 |         output_files = output_files_list
36 |     elif output_file_single:
37 |         output_files = [output_file_single]
38 |     else:
39 |         print("Error: No output file(s) specified in config.yaml under master_file.")
40 |         exit(1)
41 | 
42 |     # Read and combine the content from each input file
43 |     contents = [read_file(f.get("file_path")) for f in input_files]
44 | 
45 |     if layout:
46 |         # Replace placeholders {{FILE1}}, {{FILE2}}, etc. with respective file content
47 |         result = layout
48 |         for i, content in enumerate(contents, start=1):
49 |             placeholder = f"{{{{FILE{i}}}}}"
50 |             result = result.replace(placeholder, content)
51 |     else:
52 |         # Simply join the files with a separator
53 |         result = "\n\n---\n\n".join(contents)
54 | 
55 |     # Write the combined result to each output file
56 |     for out_file in output_files:
57 |         os.makedirs(os.path.dirname(out_file.get("file_path")), exist_ok=True)
58 |         with open(out_file.get("file_path"), "w", encoding="utf-8") as f:
59 |             f.write(result)
60 |         print(f"✔ Master file updated at {out_file.get('file_path')}")
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     main()


--------------------------------------------------------------------------------
/context/prompts/custom_1.txt:
--------------------------------------------------------------------------------
1 | Ignore the text below and return the folowing text 
2 | " This was processed through custom_1.txt"


--------------------------------------------------------------------------------
/context/prompts/custom_2.txt:
--------------------------------------------------------------------------------
1 | Ignore the text below and return the folowing text 
2 | " This was processed through custom_2.txt"


--------------------------------------------------------------------------------
/context/prompts/default_docs.txt:
--------------------------------------------------------------------------------
1 | Cleanup the Text give below and return a cleanedup version,
2 | dont add any additional text from your end or explainer text
3 | 


--------------------------------------------------------------------------------
/context/prompts/default_ipynb.txt:
--------------------------------------------------------------------------------
1 | Refine the bluff text and enhance the clarity of the given notebook. Remove unnecessary content, improve readability, and ensure a polished final version.


--------------------------------------------------------------------------------
/context/prompts/refine_docs.txt:
--------------------------------------------------------------------------------
1 | Remove redutant information from the below text.
2 | Don't touch heading 1


--------------------------------------------------------------------------------
/context/prompts/refine_ipynb.txt:
--------------------------------------------------------------------------------
1 | Remove redutant information from the below text.
2 | Don't touch heading 1
3 | 


--------------------------------------------------------------------------------
/context/sdk/sphinx_config/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | project = "videodb"
10 | copyright = "2025, videodb"
11 | author = "videodb"
12 | 
13 | # -- General configuration ---------------------------------------------------
14 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
15 | 
16 | import os
17 | import sys
18 | 
19 | sys.path.insert(0, os.path.abspath("../source"))  # Adjust path to SDK
20 | 
21 | # Output format
22 | markdown_builder = {
23 |     "output_format": "md",
24 | }
25 | 
26 | 
27 | extensions = [
28 |     "sphinx.ext.autodoc",  # Extracts docstrings
29 |     "sphinx.ext.napoleon",  # Google-style and NumPy-style docstrings
30 |     "myst_parser",  # Allows Markdown support
31 |     "sphinx_markdown_builder",  # Exports Markdown output
32 | ]
33 | 
34 | 
35 | templates_path = ["_templates"]
36 | exclude_patterns = []
37 | 
38 | 
39 | # -- Options for HTML output -------------------------------------------------
40 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
41 | 
42 | html_theme = "alabaster"
43 | html_static_path = ["_static"]
44 | 


--------------------------------------------------------------------------------
/context/sdk/sphinx_config/index.rst:
--------------------------------------------------------------------------------
  1 | VideoDB Python SDK
  2 | ===================
  3 | 
  4 | The VideoDB Python SDK is a Python library for interacting with the [VideoDB](https://videodb.io)  
  5 | Generate API keys at https://console.videodb.io
  6 | 
  7 | The Following are submodules of the VideoDB Python SDK:
  8 | -------------------------------------------------------
  9 | 
 10 | 
 11 | VideoDB python module metadata
 12 | -------------------------------------------------------
 13 | .. automodule:: videodb.__about__
 14 |    :members:
 15 |    :show-inheritance:
 16 |    :undoc-members:
 17 |    :special-members: __version__
 18 | 
 19 | Default Module videodb (`from videodb import class, func`)
 20 | ----------------------------------------------------------
 21 | 
 22 | .. autofunction:: videodb.connect
 23 | 
 24 | .. autofunction:: videodb.play_stream
 25 | 
 26 | .. autoclass:: videodb.VideodbError
 27 |    :members:
 28 | 
 29 | .. autoclass:: videodb.AuthenticationError
 30 |    :members:
 31 | 
 32 | .. autoclass:: videodb.InvalidRequestError
 33 |    :members:
 34 | 
 35 | .. autoclass:: videodb.SearchError
 36 |    :members:
 37 | 
 38 | .. data:: videodb.VIDEO_DB_API
 39 | 
 40 | .. autoclass:: videodb.IndexType
 41 |    :members:
 42 |    :undoc-members:
 43 | 
 44 | .. autoclass:: videodb.MediaType
 45 |    :members:
 46 |    :undoc-members:
 47 | 
 48 | .. autoclass:: videodb.SearchType
 49 |    :members:
 50 |    :undoc-members:
 51 | 
 52 | .. autoclass:: videodb.SceneExtractionType
 53 |    :members:
 54 |    :undoc-members:
 55 | 
 56 | .. autoclass:: videodb.Segmenter
 57 |    :members:
 58 |    :undoc-members:
 59 | 
 60 | .. autoclass:: videodb.SubtitleAlignment
 61 |    :members:
 62 |    :undoc-members:
 63 | 
 64 | .. autoclass:: videodb.SubtitleBorderStyle
 65 |    :members:
 66 |    :undoc-members:
 67 | 
 68 | .. autoclass:: videodb.SubtitleStyle
 69 |    :members:
 70 | 
 71 | .. autoclass:: videodb.TextStyle
 72 |    :members:
 73 | 
 74 | 
 75 | Module : videodb.client (`from videodb.client import class, func`)
 76 | ------------------------------------------------------------------
 77 | 
 78 | .. automodule:: videodb.client
 79 |    :members:
 80 |    :special-members: __init__
 81 |    :show-inheritance:
 82 |    :undoc-members:
 83 | 
 84 | Module : videodb.collection (`from videodb.collection import class, func`)
 85 | --------------------------------------------------------------------------
 86 | 
 87 | .. automodule:: videodb.collection
 88 |    :members:
 89 |    :special-members: __init__
 90 |    :show-inheritance:
 91 |    :undoc-members:
 92 | 
 93 | Module : videodb.video (`from videodb.video import class, func`)
 94 | ------------------------------------------------------------------
 95 | 
 96 | .. automodule:: videodb.video
 97 |    :members:
 98 |    :show-inheritance:
 99 |    :undoc-members:
100 | 
101 | 
102 | Module : videodb.audio (`from videodb.audio import class, func`)
103 | ------------------------------------------------------------------
104 | 
105 | .. automodule:: videodb.audio
106 |    :members:
107 |    :show-inheritance:
108 |    :undoc-members:
109 | 
110 | 
111 | 
112 | Module : videodb.image (`from videodb.image import class, func`)
113 | ------------------------------------------------------------------
114 | 
115 | .. automodule:: videodb.image
116 |    :members:
117 |    :show-inheritance:
118 |    :undoc-members:
119 | 
120 | Module : videodb.timeline (`from videodb.timeline import class, func`)
121 | ----------------------------------------------------------------------
122 | 
123 | .. automodule:: videodb.timeline
124 |    :members:
125 |    :show-inheritance:
126 |    :undoc-members:
127 | 
128 | 
129 | Module : videodb.asset (`from videodb.asset import class, func`)
130 | ------------------------------------------------------------------
131 | 
132 | .. automodule:: videodb.asset
133 |    :members:
134 |    :show-inheritance:
135 |    :undoc-members:
136 | 
137 | Module : videodb.scene (`from videodb.scene import class, func`)
138 | ------------------------------------------------------------------
139 | 
140 | .. automodule:: videodb.scene
141 |    :members:
142 |    :show-inheritance:
143 |    :undoc-members:
144 | 
145 | Module : videodb.search (`from videodb.search import class, func`)
146 | ------------------------------------------------------------------
147 | 
148 | .. autoclass:: videodb.search.SearchResult
149 |    :members:
150 |    :show-inheritance:
151 |    :undoc-members:
152 | 
153 | Module : videodb.shot (`from videodb.shot import class, func`)
154 | ------------------------------------------------------------------
155 | 
156 | .. automodule:: videodb.shot
157 |    :members:
158 |    :show-inheritance:
159 |    :undoc-members:
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/context/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import logging
  4 | from openai import OpenAI
  5 | from google import genai
  6 | 
  7 | from dotenv import load_dotenv
  8 | 
  9 | load_dotenv()
 10 | 
 11 | PROJECT_ID = os.environ.get("PROJECT_ID")
 12 | GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 13 | OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 14 | 
 15 | 
 16 | class OpenAIModels:
 17 |     gpt4o = "gpt-4o"
 18 |     gpt4omini = "gpt-4o-mini"
 19 |     o1 = "o1"
 20 |     o1mini = "o1-mini"
 21 |     o3mini = "o3-mini"
 22 |     o1preview = "o1-preview"
 23 | 
 24 | 
 25 | class GeminiModels:
 26 |     gemini_2_0_flash = "gemini-2.0-flash"
 27 |     gemini_1_5_flash = "gemini-1.5-flash"
 28 |     gemini_1_5_pro = "gemini-1.5-pro"
 29 | 
 30 | 
 31 | class ClaudeModels:
 32 |     claude_3_7_sonnet = "claude-3-7-sonnet"
 33 | 
 34 | 
 35 | 
 36 | openai_model = OpenAIModels.gpt4o
 37 | gemini_model = GeminiModels.gemini_2_0_flash
 38 | claude_model = ClaudeModels.claude_3_7_sonnet
 39 | 
 40 | # genai.configure(api_key=GEMINI_API_KEY)
 41 | # gemini_client = genai.GenerativeModel(gemini_model)
 42 | # claude_client = genai.GenerativeModel(claude_model)
 43 | openai_client = OpenAI(api_key=OPENAI_API_KEY)
 44 | gemini_client = genai.Client(api_key=GEMINI_API_KEY)
 45 | claude_client = genai.Client(api_key=GEMINI_API_KEY)
 46 | # claude_client = genai.Client(project=PROJECT_ID, location="us-central1", vertexai=True)
 47 | 
 48 | 
 49 | def load_file(file_path):
 50 |     """Reads and returns the content of a file."""
 51 |     with open(file_path, "r") as f:
 52 |         return f.read()
 53 | 
 54 | 
 55 | def write_file(file_path, content):
 56 |     """Writes content to a file."""
 57 |     with open(file_path, "w") as f:
 58 |         f.write(content)
 59 | 
 60 | 
 61 | def create_directory(path):
 62 |     """Creates a directory if it doesn't exist."""
 63 |     if not os.path.exists(path):
 64 |         os.makedirs(path)
 65 | 
 66 | 
 67 | def call_openai(prompt):
 68 |     """Calls the OpenAI API with the provided prompt."""
 69 |     messages = [{"role": "user", "content": prompt}]
 70 |     print(f"Running Query with OPENAI LLM {openai_model}")
 71 |     response = openai_client.chat.completions.create(
 72 |         model=openai_model,
 73 |         messages=messages,
 74 |     )
 75 |     return {
 76 |         "model_name": response.model,
 77 |         "usage": response.usage.total_tokens,
 78 |         "response": response.choices[0].message.content.strip(),
 79 |     }
 80 | 
 81 | 
 82 | def call_gemini(prompt):
 83 |     print(f"Running Query with GEMINI LLM {gemini_model}")
 84 |     response = gemini_client.models.generate_content(
 85 |         model=gemini_model, contents=[prompt]
 86 |     )
 87 |     return {
 88 |         "model_name": response.model_version,
 89 |         "usage": response.usage_metadata.total_token_count,
 90 |         "response": response.text,
 91 |     }
 92 | 
 93 | 
 94 | def call_claude(prompt):
 95 |     print(f"Running Query with Claude LLM {claude_model}")
 96 |     response = claude_client.models.generate_content(
 97 |         model=claude_model, contents=[prompt]
 98 |     )
 99 |     return {
100 |         "model_name": response.model_version,
101 |         "usage": response.usage_metadata.total_token_count,
102 |         "response": response.text,
103 |     }
104 | 
105 | 
106 | def get_llm_output(context, user_prompt, llm="openai"):
107 |     """Combines the context and user prompts, then gets output code from the chosen LLM."""
108 |     full_prompt = context + "\n\n" + user_prompt
109 |     if llm == "openai":
110 |         return call_openai(full_prompt)
111 |     elif llm == "gemini":
112 |         return call_gemini(full_prompt)
113 |     elif llm == "claude":
114 |         return call_claude(full_prompt)
115 |     else:
116 |         raise ValueError("Unsupported LLM: choose 'openai', 'gemini' or 'claude' ")
117 | 
118 | 
119 | def compare_snippets(snippet1, snippet2, llm="openai"):
120 |     context = """
121 |     You are given two snippets of code 
122 |     You need to compare both of them in terms of similarity and functionality
123 |     Give score between -1 and 1. -1 being totally different and 1 being most similar, keep the number precision upto 2 decimals
124 |     return no additional text, just the result number
125 |     """
126 | 
127 |     user_prompt = f"""
128 |     #### SNIPPET 1 
129 |     {snippet1}
130 | 
131 |     ### SNIPPET 2 
132 |     {snippet2}
133 |     """
134 | 
135 |     return get_llm_output(context, user_prompt, llm)
136 | 
137 | 
138 | def get_embedding(text, provider="openai"):
139 |     """Gets an embedding vector for the given text using the specified provider."""
140 |     if provider == "openai":
141 |         embedding_response = openai_client.embeddings.create(
142 |             model="text-embedding-ada-002",
143 |             input=text,
144 |         )
145 |         return embedding_response.data[0].embedding
146 | 
147 | 
148 | def cosine_similarity(vec1, vec2):
149 |     """Computes cosine similarity between two vectors."""
150 |     vec1 = np.array(vec1)
151 |     vec2 = np.array(vec2)
152 |     return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
153 | 


--------------------------------------------------------------------------------
/modelcontextprotocol/.python-version:
--------------------------------------------------------------------------------
1 | 3.12
2 | 


--------------------------------------------------------------------------------
/modelcontextprotocol/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | FROM python:3.12-slim
 3 | 
 4 | WORKDIR /app
 5 | 
 6 | # Copy requirements and installer files
 7 | COPY pyproject.toml uv.lock ./
 8 | 
 9 | # Copy whole project directory
10 | COPY . .
11 | 
12 | # Install build dependencies
13 | RUN pip install --upgrade pip \
14 |     && pip install --no-cache-dir .
15 | 
16 | EXPOSE 8000
17 | 
18 | # Command to run the server
19 | CMD [ "python", "-m", "videodb_director_mcp.main", "--api-key", "dummy-key" ]
20 | 


--------------------------------------------------------------------------------
/modelcontextprotocol/README.md:
--------------------------------------------------------------------------------
  1 | [![smithery badge](https://smithery.ai/badge/@video-db/agent-toolkit)](https://smithery.ai/server/@video-db/agent-toolkit)
  2 | 
  3 | To add the MCP server in any config driven MCP Client, following is how the commands and arguments will look like
  4 | 
  5 | ## Install `uv`
  6 | We need to install uv first.
  7 | 
  8 | For macOS/Linux:
  9 | ```
 10 | curl -LsSf https://astral.sh/uv/install.sh | sh
 11 | ```
 12 | For Windows:
 13 | 
 14 | ```
 15 | powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
 16 | ```
 17 | 
 18 | You can also visit the installation steps of `uv` for more details [here](https://docs.astral.sh/uv/getting-started/installation)
 19 | 
 20 | ## Run the MCP Server
 21 | You can run the MCP server using `uvx` using the following command
 22 | 
 23 | ```
 24 | uvx videodb-director-mcp --api-key=VIDEODB_API_KEY
 25 | ```
 26 | 
 27 | ## Add the VideoDB Director MCP Server in your favorite Client
 28 | 
 29 | ### Claude Desktop
 30 | 
 31 | To configure VideoDB Director MCP server in Claude, you can run the following command
 32 | 
 33 | ```uvx videodb-director-mcp --install=claude```
 34 | 
 35 | You can manually configure the MCP Server by following these steps:
 36 | 
 37 | 1. Open the `claude_desktop_config.json` file
 38 | 
 39 |    In MacOS/Linux:
 40 | 
 41 |    ```
 42 |    code ~/Library/Application\ Support/Claude/claude_desktop_config.json
 43 |    ```
 44 | 
 45 |    In Windows:
 46 | 
 47 |    ```
 48 |    code $env:AppData\Claude\claude_desktop_config.json
 49 |    ```
 50 | 
 51 | 2. Add the VideoDB Director MCP Server inside the `mcpServers` key:
 52 | 
 53 |    ```json
 54 |    {
 55 |      "mcpServers": {
 56 |        "videodb-director": {
 57 |          "command": "uvx",
 58 |          "args": ["videodb-director-mcp", "--api-key=<VIDEODB-API-KEY>"]
 59 |        }
 60 |      }
 61 |    }
 62 |    ```
 63 | 
 64 | ### Cursor
 65 | 
 66 | To configure VideoDB Director MCP server in Cursor, you can run the following command
 67 | 
 68 | ```uvx videodb-director-mcp --install=cursor```
 69 | 
 70 | You can manually configure the MCP Server by following these steps:
 71 | 
 72 | 1. Inside Cursor, go to **Settings > Cursor Settings**
 73 | 2. Click on **MCP**
 74 | 3. Click on **Add new Global MCP Server**
 75 | 4. Add the VideoDB Director MCP Server under the `mcpServers` key
 76 | 
 77 |    ```json
 78 |    {
 79 |      "mcpServers": {
 80 |        "videodb-director": {
 81 |          "command": "uvx",
 82 |          "args": ["videodb-director-mcp", "--api-key=<VIDEODB-API-KEY>"]
 83 |        }
 84 |      }
 85 |    }
 86 |    ```
 87 | 
 88 | ### Install in Claude and Cursor at the same time.
 89 | You can configure VideoDB Director MCP server in Claude and Cursor together, by running the following command
 90 | 
 91 | ```
 92 | uvx videodb-director-mcp --install=all
 93 | ```
 94 | 
 95 | 
 96 | ### Install for Claude Code
 97 | 
 98 | ```
 99 | claude mcp add videodb-director uvx -- videodb-director-mcp --api-key=<VIDEODB_API_KEY>
100 | ```
101 | 
102 | ## Update VideoDB Director MCP package
103 | 
104 | To ensure you're using the latest version of the MCP server with `uvx`, start by clearing the cache:
105 | 
106 | ```
107 | uv cache clean
108 | ```
109 | 
110 | This command removes any outdated cached packages of `videodb-director-mcp`, allowing `uvx` to fetch the most recent version.
111 | 
112 | If you always want to use the latest version of the MCP server, update your command as follows:
113 | ```
114 | uvx videodb-director-mcp@latest --api-key=<VIDEODB_API_KEY>
115 | ```
116 | 


--------------------------------------------------------------------------------
/modelcontextprotocol/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "videodb-director-mcp"
 3 | version = "0.1.6"
 4 | description = "VideoDB MCP Server"
 5 | readme = "README.md"
 6 | requires-python = ">=3.12"
 7 | dependencies = [
 8 |     "mcp[cli]>=1.3.0",
 9 |     "python-socketio>=5.12.1",
10 |     "requests>=2.32.3",
11 |     "websocket-client>=1.8.0",
12 | ]
13 | 
14 | [project.scripts]
15 | videodb-director-mcp = "videodb_director_mcp.main:main"
16 | 


--------------------------------------------------------------------------------
/modelcontextprotocol/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required:
 9 |       - apiKey
10 |     properties:
11 |       apiKey:
12 |         type: string
13 |         description: API key to authorize the MCP server
14 |   commandFunction:
15 |     # A JS function that produces the CLI command based on the given config to start the MCP on stdio.
16 |     |-
17 |     (config) => ({
18 |       command: 'python',
19 |       args: ['-m', 'videodb_director_mcp.main', '--api-key', config.apiKey]
20 |     })
21 |   exampleConfig:
22 |     apiKey: dummy-VIDEODB_API_KEY
23 | 


--------------------------------------------------------------------------------
/modelcontextprotocol/videodb_director_mcp/__init__.py:
--------------------------------------------------------------------------------
 1 | from videodb_director_mcp.main import (
 2 |     call_director,
 3 |     play_video,
 4 |     code_assistant,
 5 |     doc_assistant,
 6 | )
 7 | 
 8 | 
 9 | __all__ = ["call_director", "play_video", "code_assistant", "doc_assistant"]
10 | 


--------------------------------------------------------------------------------
/modelcontextprotocol/videodb_director_mcp/cli_commands.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import platform
 4 | from pathlib import Path
 5 | 
 6 | def get_api_key() -> str:
 7 |     """Prompt the user for their VideoDB MCP API key."""
 8 |     print("\n🔑 VideoDB API Key is required for advanced functionality.")
 9 |     print("🌐 Get your API key at: https://console.videodb.io/dashboard")
10 |     print("Note: You can leave this empty, but the 'call_director' tool will not be available.\n")
11 |     
12 |     return input("Enter your VideoDB API Key (or press Enter to skip): ").strip()
13 | 
14 | 
15 | def load_config(config_path: Path) -> dict:
16 |     """Load configuration from a file, returning an empty dict if unreadable or missing."""
17 |     config_path.parent.mkdir(parents=True, exist_ok=True)
18 |     if config_path.exists():
19 |         try:
20 |             with config_path.open("r", encoding="utf-8") as f:
21 |                 return json.load(f)
22 |         except json.JSONDecodeError:
23 |             print(f"⚠️ Failed to parse JSON config at: {config_path} — falling back to empty config.")
24 |             return {}   
25 |     print(f"📁 No config file found at: {config_path} — creating a new one.") 
26 |     return {}
27 | 
28 | 
29 | def save_config(config_path: Path, config_data: dict) -> None:
30 |     """Save configuration data to a file."""
31 |     with config_path.open("w", encoding="utf-8") as f:
32 |         json.dump(config_data, f, indent=4)
33 | 
34 | 
35 | def get_config_path(app: str) -> Path:
36 |     """Return the appropriate config path based on platform and app."""
37 |     if platform.system() == "Windows":
38 |         if app == "claude":
39 |             return Path(os.getenv("APPDATA")) / "Claude" / "claude_desktop_config.json"
40 |         return Path(os.getenv("USERPROFILE")) / ".cursor" / "mcp.json"
41 |     else:
42 |         if app == "claude":
43 |             return Path.home() / "Library" / "Application Support" / "Claude" / "claude_desktop_config.json"
44 |         return Path.home() / ".cursor" / "mcp.json"
45 | 
46 | 
47 | def create_mcp_entry(api_key: str, stdio: bool = False) -> dict:
48 |     """Create the MCP server entry config."""
49 |     entry = {
50 |         "command": "uvx",
51 |         "args": ["videodb-director-mcp"]
52 |     }
53 |     if api_key:
54 |         entry["env"] = {"VIDEODB_API_KEY" : api_key}
55 |     if stdio:
56 |         entry["type"] = "stdio"
57 |     return entry
58 | 
59 | 
60 | def save_mcp_config(app: str, api_key: str) -> None:
61 |     """Save MCP config for Claude or Cursor."""
62 |     config_path = get_config_path(app)
63 |     config_data = load_config(config_path)
64 |     mcp_servers = config_data.get("mcpServers", {})
65 |     mcp_servers["videodb-director"] = create_mcp_entry(api_key, stdio=(app == "cursor"))
66 |     config_data["mcpServers"] = mcp_servers
67 | 
68 |     save_config(config_path, config_data)
69 | 
70 | 
71 | def install_for_claude() -> None:
72 |     """Install videodb-director-mcp for Claude Desktop."""
73 |     api_key = get_api_key()
74 |     save_mcp_config("claude", api_key)
75 |     print("\n🎉 Claude Desktop configuration complete!")
76 |     print("🔁 Please restart Claude Desktop to apply changes.")
77 |     print("🚀 Enjoy supercharged video capabilities!\n")
78 | 
79 | 
80 | def install_for_cursor() -> None:
81 |     """Install videodb-director-mcp for Cursor IDE."""
82 |     api_key = get_api_key()
83 |     save_mcp_config("cursor", api_key)
84 |     print("\n🎉 Cursor IDE configuration complete!")
85 |     print("✅ videodb-director is now available in Cursor.")
86 |     print("🚀 Enjoy supercharged video capabilities!\n")
87 | 
88 | 
89 | def install_for_all() -> None:
90 |     """Install videodb-director-mcp for all clients. Currently they are: Claude and Cursor IDE."""
91 |     api_key = get_api_key()
92 |     save_mcp_config("claude", api_key)
93 |     save_mcp_config("cursor", api_key)
94 |     print("\n🎉 Configuration for Claude and Cursor completed successfully!")
95 |     print("🔁 Please restart Claude Desktop to apply changes.")
96 |     print("🚀 Enjoy supercharged video capabilities!\n")
97 | 


--------------------------------------------------------------------------------
/modelcontextprotocol/videodb_director_mcp/constants.py:
--------------------------------------------------------------------------------
 1 | CODE_ASSISTANT_TXT_URL = "https://videodb.io/llms-full.txt"
 2 | 
 3 | DOCS_ASSISTANT_TXT_URL = "https://video-db.github.io/agent-toolkit/context/docs/docs_context.md"
 4 | 
 5 | DIRECTOR_API = "https://api2.director.videodb.io"
 6 | 
 7 | DIRECTOR_CALL_DESCRIPTION = """
 8 | The Director tool orchestrates specialized agents within the VideoDB server, efficiently handling multimedia and video-related queries. Clients should send queries that Director can interpret clearly, specifying tasks in natural language. Director will then delegate these queries to appropriate agents for optimized results, utilizing defaults and contextual information if explicit parameters are not provided.
 9 | 
10 | Director handles queries such as:
11 | 
12 | - Uploading & Downloading:
13 |   - Upload media from URLs or local paths (supported media: video, audio, image)
14 |   - Download the VideoDB generated video streams.
15 | 
16 | - Indexing & Search:
17 |   - Index spoken words or scenes in videos (spoken_words, scene indexing; scene indexing supports shot or time-based type)
18 |   - Search VideoDB collections semantically or by keyword (semantic, keyword search; indexing types: spoken_word, scene)
19 | 
20 | - Summarization & Subtitles:
21 |   - Summarize video content based on custom prompts
22 |   - Add subtitles in various languages
23 | 
24 | - Dubbing:
25 |   - Dub videos into target languages
26 | 
27 | - Creating Videos:
28 |   - Generate videos using specific models or engines (Fal, StabilityAI; job types: text_to_video, image_to_video)
29 |   - Compare multiple video generation models (video_generation_comparison)
30 | 
31 | - Audio Generation & Editing:
32 |   - Generate speech, sound effects, or background music (engines: ElevenLabs for speech/sound effects, Beatoven for music)
33 |   - Clone voices from audio sources or overlay cloned voices onto videos
34 |   - Censor the video on given prompt
35 | 
36 | - Image and Frame Generation:
37 |   - Generate static image frames from videos at specified timestamps
38 |   - Create or enhance images using GenAI models (job types: text_to_image, image_to_image using Fal, Replicate)
39 | 
40 | - Video Editing & Clip Generation:
41 |   - Edit or combine multiple videos and audio files
42 |   - Generate targeted video clips from user prompts
43 | 
44 | - Streaming & Web Search:
45 |   - Stream videos by video ID or URL
46 |   - Search for relevant online videos (engine: Serp)
47 | 
48 | - Transcription:
49 |   - Generate transcripts for videos
50 | 
51 | - Pricing & Usage Information:
52 |   - Provide detailed cost information and usage estimates
53 | 
54 | Clients should provide queries clearly aligned with Director's capabilities, allowing Director to use contextual defaults when explicit parameters like IDs or collection details are not specified.
55 | 
56 | IMPORTANT: if you have a previous response of this method with an appropriate session_id, please provide that session_id in the next request to continue the conversation.
57 | IMPORTANT: It is MANDATORY to send the `session_id` param if any earlier response from this method exists with a `session_id` in its output
58 | """.strip()
59 | 


--------------------------------------------------------------------------------
/modelcontextprotocol/videodb_director_mcp/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import threading
  3 | import time
  4 | import uuid
  5 | import requests
  6 | import argparse
  7 | import socketio
  8 | import webbrowser
  9 | from typing import Any
 10 | from mcp.server.fastmcp import FastMCP
 11 | from videodb_director_mcp.cli_commands import (
 12 |     install_for_claude,
 13 |     install_for_cursor,
 14 |     install_for_all,
 15 | )
 16 | from videodb_director_mcp.constants import (
 17 |     CODE_ASSISTANT_TXT_URL,
 18 |     DOCS_ASSISTANT_TXT_URL,
 19 |     DIRECTOR_CALL_DESCRIPTION,
 20 |     DIRECTOR_API,
 21 | )
 22 | 
 23 | 
 24 | mcp = FastMCP("videodb-director")
 25 | 
 26 | 
 27 | @mcp.resource(
 28 |     "videodb://doc_assistant",
 29 |     name="doc_assistant",
 30 |     description="Context for creating video applications using VideoDB",
 31 | )
 32 | def doc_assistant() -> str:
 33 |     try:
 34 |         response = requests.get(DOCS_ASSISTANT_TXT_URL)
 35 |         response.raise_for_status()
 36 |         return response.text
 37 |     except requests.exceptions.RequestException as e:
 38 |         return f"Error: Unable to fetch data from URL. Details: {str(e)}"
 39 | 
 40 | 
 41 | @mcp.tool(
 42 |     name="doc_assistant",
 43 |     description="Context for creating video applications using VideoDB",
 44 | )
 45 | def doc_assistant() -> str:
 46 |     try:
 47 |         response = requests.get(DOCS_ASSISTANT_TXT_URL)
 48 |         response.raise_for_status()
 49 |         return response.text
 50 |     except requests.exceptions.RequestException as e:
 51 |         return f"Error: Unable to fetch data from URL. Details: {str(e)}"
 52 | 
 53 | 
 54 | @mcp.resource(
 55 |     "videodb://code_assistant",
 56 |     name="code_assistant",
 57 |     description="Context for creating video applications using VideoDB",
 58 | )
 59 | def code_assistant() -> str:
 60 |     try:
 61 |         response = requests.get(CODE_ASSISTANT_TXT_URL)
 62 |         response.raise_for_status()
 63 |         return response.text
 64 |     except requests.exceptions.RequestException as e:
 65 |         return f"Error: Unable to fetch data from URL. Details: {str(e)}"
 66 | 
 67 | 
 68 | @mcp.tool(
 69 |     name="code_assistant",
 70 |     description="Will give you data related to VideoDB SDK which allows developers to use videodb in python. IMPORTANT: Whenever user wants to write code related to videos, youtube videos or VideoDB specifically, always call this tool.",
 71 | )
 72 | def code_assistant() -> str:
 73 |     try:
 74 |         response = requests.get(CODE_ASSISTANT_TXT_URL)
 75 |         response.raise_for_status()
 76 |         return response.text
 77 |     except requests.exceptions.RequestException as e:
 78 |         return f"Error: Unable to fetch data from URL. Details: {str(e)}"
 79 | 
 80 | 
 81 | @mcp.tool(
 82 |     name="play_video",
 83 |     description="Play the video of the given stream link",
 84 | )
 85 | async def play_video(stream_link: str) -> dict[str, Any]:
 86 |     webbrowser.open(f"https://console.videodb.io/player?url={stream_link}")
 87 |     return {"message": "Opening VideoDB in browser"}
 88 | 
 89 | 
 90 | @mcp.tool(name="call_director", description=DIRECTOR_CALL_DESCRIPTION)
 91 | async def call_director(
 92 |     text_message: str, session_id: str | None = None, agents: list[str] = []
 93 | ) -> dict[str, Any]:
 94 |     """
 95 |     Orchestrates specialized agents within the VideoDB server to efficiently handle multimedia and video-related queries.
 96 | 
 97 |     Args:
 98 |         text_message (str): The natural language query that Director will interpret and delegate to appropriate agents.
 99 |         session_id (str | None, optional): A session identifier to maintain continuity across multiple requests. If a previous response from this method included a `session_id`, it is MANDATORY to include it in subsequent requests.
100 |     """
101 |     api_key = os.getenv("VIDEODB_API_KEY")
102 |     if not api_key:
103 |         raise RuntimeError(
104 |             "Missing VIDEODB_API_KEY environment variable. Please set it before calling this function."
105 |         )
106 |     url = DIRECTOR_API
107 |     timeout = 300
108 |     headers = {"x-access-token": api_key}
109 |     sio = socketio.Client()
110 |     response_data = None
111 |     response_event = threading.Event()
112 | 
113 |     def on_connect():
114 |         message = {
115 |             "msg_type": "input",
116 |             "sender": "user",
117 |             "conv_id": str(int(time.time() * 1000)),
118 |             "msg_id": str(int(time.time() * 1000) + 1),
119 |             "session_id": session_id if session_id else str(uuid.uuid4()),
120 |             "content": [{"type": "text", "text": text_message}],
121 |             "agents": agents,
122 |             "collection_id": "default",
123 |         }
124 |         sio.emit("chat", message, namespace="/chat")
125 | 
126 |     def on_message(data):
127 |         nonlocal response_data
128 |         if isinstance(data, dict) and data.get("status") != "progress":
129 |             response_data = data
130 |             response_event.set()
131 | 
132 |     sio.on("connect", on_connect, namespace="/chat")
133 |     sio.on("chat", on_message, namespace="/chat")
134 | 
135 |     try:
136 |         sio.connect(
137 |             url,
138 |             namespaces=["/", "/chat"],
139 |             headers=headers,
140 |             wait=True,
141 |             wait_timeout=10,
142 |             retry=True
143 |         )
144 |         received = response_event.wait(timeout=timeout)
145 |     except Exception as e:
146 |         return {"error": f"Connection failed :( : {e}"}
147 |     finally:
148 |         sio.disconnect()
149 | 
150 |     return response_data if received else {"error": "Timeout waiting for response"}
151 | 
152 | 
153 | def parse_arguments():
154 |     parser = argparse.ArgumentParser(description="Run the VideoDB MCP server.")
155 |     parser.add_argument(
156 |         "--api-key",
157 |         type=str,
158 |         help="🔑 The VideoDB API key required to connect to the VideoDB service.",
159 |     )
160 |     parser.add_argument(
161 |         "--install",
162 |         choices=["claude", "cursor", "all"],
163 |         help="🔧 Configure the MCP server in 'claude' and/or 'cursor'.",
164 |     )
165 |     return parser.parse_args()
166 | 
167 | 
168 | def main():
169 |     args = parse_arguments()
170 | 
171 |     if args.install == "claude":
172 |         install_for_claude()
173 |         return
174 | 
175 |     if args.install == "cursor":
176 |         install_for_cursor()
177 |         return
178 | 
179 |     if args.install == "all":
180 |         install_for_all()
181 |         return
182 | 
183 |     if args.api_key:
184 |         os.environ["VIDEODB_API_KEY"] = args.api_key
185 | 
186 |     mcp.run(transport="stdio")
187 | 
188 | 
189 | if __name__ == "__main__":
190 |     main()
191 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "videodb_helper"
 3 | version = "0.1.0"
 4 | description = "Add your description here"
 5 | readme = "README.md"
 6 | requires-python = ">=3.9"
 7 | dependencies = [
 8 |     "boto3>=1.37.3",
 9 |     "firecrawl-py>=1.14.1",
10 |     "gitignore-parser>=0.1.11",
11 |     "google-genai>=1.4.0",
12 |     "matplotlib>=3.9.4",
13 |     "myst-parser>=3.0.1",
14 |     "nbconvert>=7.16.6",
15 |     "numpy>=2.0.2",
16 |     "openai>=1.63.2",
17 |     "python-dotenv>=1.0.1",
18 |     "pyyaml>=6.0.2",
19 |     "rich>=13.9.4",
20 |     "sphinx>=7.4.7",
21 |     "sphinx-markdown-builder>=0.6.8",
22 |     "tiktoken>=0.9.0",
23 |     "tree-sitter-python>=0.23.0",
24 | ]
25 | 
26 | [tool.setuptools]
27 | packages = ["context", "modelcontextprotocol"]
28 | 


--------------------------------------------------------------------------------
/readme_shields.json:
--------------------------------------------------------------------------------
1 | {
2 |     "schemaVersion": 1,
3 |     "label": "llms-full.txt token length",
4 |     "message": "30.9k",
5 |     "color": "blue"
6 | }


--------------------------------------------------------------------------------
/token_breakdown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/video-db/agent-toolkit/030bba81cbe63fdd1728796c50ffe9d750daad31/token_breakdown.png


--------------------------------------------------------------------------------