├── .gitignore
├── README.md
├── app
    ├── __init__.py
    ├── bd_api.py
    ├── database.py
    ├── main.py
    ├── models.py
    ├── parse_sponsor.py
    ├── process_pool_manager.py
    ├── schemas.py
    └── sponsor_worker.py
├── env.sample
├── frontend
    ├── .gitignore
    ├── README.md
    ├── eslint.config.js
    ├── index.html
    ├── package-lock.json
    ├── package.json
    ├── public
    │   └── vite.svg
    ├── src
    │   ├── App.css
    │   ├── App.tsx
    │   ├── assets
    │   │   └── react.svg
    │   ├── components
    │   │   └── PendingChannels.tsx
    │   ├── index.css
    │   ├── main.tsx
    │   ├── pages
    │   │   ├── VideoDetail.tsx
    │   │   ├── VideoInput.tsx
    │   │   └── VideoList.tsx
    │   └── vite-env.d.ts
    ├── tsconfig.app.json
    ├── tsconfig.json
    ├── tsconfig.node.json
    └── vite.config.ts
├── requirements.txt
└── sql_app.db


/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | env/
3 | __pycache__/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Video Scraping API
 2 | 
 3 | A FastAPI-based backend for video scraping and analysis.
 4 | 
 5 | ## Setup
 6 | 
 7 | 1. Create a virtual environment:
 8 | ```bash
 9 | python -m venv venv
10 | source venv/bin/activate  # On Windows: venv\Scripts\activate
11 | ```
12 | 
13 | 2. Install dependencies:
14 | ```bash
15 | pip install -r requirements.txt
16 | ```
17 | 
18 | 3. Run the application:
19 | ```bash
20 | uvicorn app.main:app --reload
21 | ```
22 | 
23 | The API will be available at `http://localhost:8000`
24 | 
25 | ## API Documentation
26 | 
27 | After starting the server, visit `http://localhost:8000/docs` for the interactive API documentation.
28 | 
29 | ### Endpoints
30 | 
31 | - `POST /scrape-videos/`: Submit URLs for video scraping
32 | - `GET /videos/`: Get paginated list of videos
33 | - `GET /videos/{video_id}`: Get detailed information about a specific video
34 | 
35 | ## Models
36 | 
37 | - Video: Base video information
38 | - Metadata: Video transcript and metadata
39 | - VideoSummary: Generated summary of the video
40 | - VideoSponsor: Sponsorship information and brand mentions
41 | 


--------------------------------------------------------------------------------
/app/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/app/bd_api.py:
--------------------------------------------------------------------------------
  1 | import httpx
  2 | from typing import Dict, List
  3 | from sqlalchemy.orm import Session
  4 | import asyncio
  5 | from . import models
  6 | import os
  7 | from dotenv import load_dotenv
  8 | from .process_pool_manager import get_pool_manager
  9 | 
 10 | load_dotenv()
 11 | 
 12 | API_ENDPOINT = "https://api.brightdata.com/datasets/v3"
 13 | VIDEO_DATASET_ID = "gd_lk56epmy2i5g7lzu0k"
 14 | SNAPSHOT_POLL_INTERVAL = 5  # seconds
 15 | MAX_RETRIES = 200  # 1 minute total polling time
 16 | API_KEY = os.getenv("BD_API_KEY")
 17 | 
 18 | async def check_progress(client: httpx.AsyncClient, headers: Dict[str, str], snapshot: str) -> bool:
 19 |     """Check if the scraping process is complete."""
 20 |     response = await client.get(
 21 |         f"{API_ENDPOINT}/progress/{snapshot}",
 22 |         headers=headers
 23 |     )
 24 |     response.raise_for_status()
 25 |     progress_data = response.json()
 26 |     print(f"Progress: {progress_data}, Snapshot: {snapshot}")
 27 |     
 28 |     # Check if all items are processed
 29 |     return progress_data.get("status") == "ready"
 30 | 
 31 | async def get_snapshot_data(client: httpx.AsyncClient, headers: Dict[str, str], snapshot: str) -> Dict:
 32 |     """Poll for snapshot data until it's ready or max retries reached."""
 33 |     for _ in range(MAX_RETRIES):
 34 |         # First check progress
 35 |         is_complete = await check_progress(client, headers, snapshot)
 36 |         if not is_complete:
 37 |             await asyncio.sleep(SNAPSHOT_POLL_INTERVAL)
 38 |             continue
 39 | 
 40 |         # If complete, get snapshot data
 41 |         response = await client.get(
 42 |             f"{API_ENDPOINT}/snapshot/{snapshot}",
 43 |             headers=headers,
 44 |             params={"format": "json"}
 45 |         )
 46 |         response.raise_for_status()
 47 |         data = response.json()
 48 |         
 49 |         if isinstance(data, list) and len(data) > 0:
 50 |             return data
 51 |             
 52 |         # Wait before next poll if data not ready
 53 |         await asyncio.sleep(SNAPSHOT_POLL_INTERVAL)
 54 |     
 55 |     raise TimeoutError("Snapshot data not ready after maximum retries")
 56 | 
 57 | async def process_video_data(video_data: Dict, video: models.Video, db: Session) -> None:
 58 |     try:
 59 |         # Update video metadata
 60 |         metadata = models.Metadata(
 61 |             video_id=video.id,
 62 |             metadata_json=video_data,
 63 |             creator=video_data.get("youtuber"),
 64 |         )
 65 |         print(video_data.get("youtuber"))
 66 |         db.add(metadata)
 67 |         video.status = "completed"
 68 |         db.commit()
 69 | 
 70 |         # Start sponsor processing using the process pool
 71 |         if video_data.get("transcript") or video_data.get("description"):
 72 |             # Get database URL from current session
 73 |             db_url = db.get_bind().url.render_as_string(hide_password=False)
 74 |             
 75 |             # Add task to the process pool
 76 |             pool_manager = get_pool_manager()
 77 |             pool_manager.add_task(
 78 |                 video.id,
 79 |                 video_data.get("transcript", ""),
 80 |                 video_data.get("description", ""),
 81 |                 db_url
 82 |             )
 83 | 
 84 |     except Exception as e:
 85 |         video.status = "failed"
 86 |         db.commit()
 87 |         print(f"Error processing video data: {str(e)}")
 88 | 
 89 | async def scrape_videos(urls: List[str], db: Session, type: str = "video") -> List[models.Video]:
 90 |     if not API_KEY:
 91 |         raise ValueError("BD_API_KEY environment variable not set")
 92 | 
 93 |     # Prepare the request payload
 94 |     if type == "video":
 95 |         payload = [{"url": url} for url in urls]
 96 |     else:
 97 |         payload = [{"url": url, "num_of_posts": 50} for url in urls]
 98 |     
 99 |     headers = {
100 |         "Authorization": f"Bearer {API_KEY}",
101 |         "Content-Type": "application/json"
102 |     }
103 | 
104 |     try:
105 |         async with httpx.AsyncClient() as client:
106 |             # Step 1: Trigger the scraping
107 |             if type == "video":
108 |                 response = await client.post(
109 |                     f"{API_ENDPOINT}/trigger",
110 |                     params={"dataset_id": VIDEO_DATASET_ID, "include_errors": "true"},
111 |                     headers=headers,
112 |                     json=payload
113 |                 )
114 |             else:
115 |                 response = await client.post(
116 |                     f"{API_ENDPOINT}/trigger",
117 |                     params={"dataset_id": VIDEO_DATASET_ID, "include_errors": "true", "type": "discover_new", "discover_by": "url"},
118 |                     headers=headers,
119 |                     json=payload
120 |                 )
121 |             response.raise_for_status()
122 |             snapshot = response.json().get("snapshot_id")
123 |             
124 |             # Step 2: Poll for progress and get snapshot data
125 |             video_data_list = await get_snapshot_data(client, headers, snapshot)
126 | 
127 |             if type == "channel":
128 |                 for url in urls:
129 |                     channel = db.query(models.Channel).filter(models.Channel.url == url).first()
130 |                     if channel:
131 |                         channel.status = "completed"
132 |                         db.commit()
133 |             
134 |             # Process videos one by one to avoid transaction conflicts
135 |             videos = []
136 |             for video_data in video_data_list:
137 |                 # Check if video already exists
138 |                 video = db.query(models.Video).filter(models.Video.url == video_data["url"]).first()
139 |                 if not video:
140 |                     # Only create new video if it doesn't exist
141 |                     video = models.Video(url=video_data["url"])
142 |                     db.add(video)
143 |                     db.flush()  # Flush to get the video ID
144 |                 
145 |                 # Always update the video data
146 |                 video.status = "pending"
147 |                 await process_video_data(video_data, video, db)
148 |                 videos.append(video)
149 |             
150 |             return videos
151 |                 
152 |     except Exception as e:
153 |         print(f"HTTP error occurred: {e}")
154 |         if type == "channel":
155 |             for url in urls:
156 |                 channel = db.query(models.Channel).filter(models.Channel.url == url).first()
157 |                 if channel:
158 |                     channel.status = "failed"
159 |                     db.commit()
160 |         raise
161 | 


--------------------------------------------------------------------------------
/app/database.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import create_engine
 2 | from sqlalchemy.ext.declarative import declarative_base
 3 | from sqlalchemy.orm import sessionmaker
 4 | 
 5 | SQLALCHEMY_DATABASE_URL = "sqlite:///./sql_app.db"
 6 | 
 7 | engine = create_engine(
 8 |     SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
 9 | )
10 | SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
11 | 
12 | Base = declarative_base()
13 | 
14 | def get_db():
15 |     db = SessionLocal()
16 |     try:
17 |         yield db
18 |     finally:
19 |         db.close()
20 | 


--------------------------------------------------------------------------------
/app/main.py:
--------------------------------------------------------------------------------
  1 | from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks
  2 | from sqlalchemy.orm import Session, joinedload
  3 | from typing import List, Union
  4 | from . import models, schemas, bd_api
  5 | from .database import engine, get_db
  6 | import json
  7 | from sqlalchemy import String, text
  8 | import logging
  9 | 
 10 | # Set up logging
 11 | logging.basicConfig(level=logging.INFO)
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | models.Base.metadata.create_all(bind=engine)
 15 | 
 16 | app = FastAPI()
 17 | 
 18 | def model_to_dict(obj):
 19 |     if obj is None:
 20 |         return None
 21 |     
 22 |     if not hasattr(obj, '__table__'):
 23 |         return obj
 24 |     
 25 |     result = {}
 26 |     for column in obj.__table__.columns:
 27 |         value = getattr(obj, column.name)
 28 |         result[column.name] = value
 29 |     
 30 |     # Handle relationships
 31 |     if hasattr(obj, 'video_metadata') and obj.video_metadata:
 32 |         result['video_metadata'] = model_to_dict(obj.video_metadata)
 33 |     if hasattr(obj, 'summary') and obj.summary:
 34 |         result['summary'] = model_to_dict(obj.summary)
 35 |     if hasattr(obj, 'sponsor') and obj.sponsor:
 36 |         sponsor_dict = model_to_dict(obj.sponsor)
 37 |         if sponsor_dict and 'brands_mentioned' in sponsor_dict:
 38 |             try:
 39 |                 if isinstance(sponsor_dict['brands_mentioned'], str):
 40 |                     sponsor_dict['brands_mentioned'] = json.loads(sponsor_dict['brands_mentioned'])
 41 |             except:
 42 |                 sponsor_dict['brands_mentioned'] = []
 43 |         result['sponsor'] = sponsor_dict
 44 |     
 45 |     return result
 46 | 
 47 | async def process_videos_background(urls: List[str], db: Session, type: str = "video"):
 48 |     try:
 49 |         await bd_api.scrape_videos(urls, db, type)
 50 |     except Exception as e:
 51 |         # Get videos and update their status to Failed
 52 |         for url in urls:
 53 |             if type == "channel":
 54 |                 channel = db.query(models.Channel).filter(models.Channel.url == url).first()
 55 |                 if channel:
 56 |                     channel.status = "failed"
 57 |             else:
 58 |                 video = db.query(models.Video).filter(models.Video.url == url).first()
 59 |                 if video:
 60 |                     video.status = "failed"
 61 |         db.commit()
 62 |         print(f"Error processing videos: {str(e)}")
 63 | 
 64 | async def handle_urls(urls: List[str], db: Session, background_tasks: BackgroundTasks, scrape_type: str = "video"):
 65 |     results = []
 66 |     
 67 |     for url in urls:
 68 |         if scrape_type == "channel":
 69 |             # Handle channel URL
 70 |             existing_channel = db.query(models.Channel).filter(models.Channel.url == url).first()
 71 |             if existing_channel:
 72 |                 # Update status if it was previously failed or completed
 73 |                 if existing_channel.status in ["failed", "completed"]:
 74 |                     existing_channel.status = "pending"
 75 |                     db.commit()
 76 |                 results.append(existing_channel)
 77 |             else:
 78 |                 channel = models.Channel(url=url)
 79 |                 db.add(channel)
 80 |                 try:
 81 |                     db.commit()
 82 |                     results.append(channel)
 83 |                 except:
 84 |                     db.rollback()
 85 |                     raise HTTPException(status_code=500, detail="Error adding channel to database")
 86 |         else:
 87 |             # Handle video URL
 88 |             existing_video = db.query(models.Video).filter(models.Video.url == url).first()
 89 |             if existing_video:
 90 |                 # Update status if it was previously failed or completed
 91 |                 if existing_video.status in ["failed", "completed"]:
 92 |                     existing_video.status = "pending"
 93 |                     db.commit()
 94 |                 results.append(existing_video)
 95 |             else:
 96 |                 video = models.Video(url=url)
 97 |                 db.add(video)
 98 |                 try:
 99 |                     db.commit()
100 |                     results.append(video)
101 |                 except:
102 |                     db.rollback()
103 |                     raise HTTPException(status_code=500, detail="Error adding video to database")
104 |     
105 |     # Process in background
106 |     if results:
107 |         background_tasks.add_task(process_videos_background, urls, db, scrape_type)
108 |     
109 |     # Return appropriate schema based on type
110 |     if scrape_type == "channel":
111 |         return [schemas.Channel(**model_to_dict(result)) for result in results]
112 |     else:
113 |         return [schemas.Video(**model_to_dict(result)) for result in results]
114 | 
115 | @app.post("/scrape-videos/", response_model=Union[List[schemas.Video], List[schemas.Channel]])
116 | async def scrape_videos(
117 |     request: schemas.ScrapeVideosRequest, 
118 |     background_tasks: BackgroundTasks,
119 |     db: Session = Depends(get_db)
120 | ):
121 |     return await handle_urls(request.urls, db, background_tasks, request.scrape_type)
122 | 
123 | @app.get("/videos/", response_model=schemas.VideoList)
124 | async def get_videos(
125 |     page: int = 1, 
126 |     size: int = 10, 
127 |     sponsor_filter: str = None,  
128 |     sponsor_name: str = None,    
129 |     creator: str = None,         
130 |     sort_by: str = None,         
131 |     sort_order: str = "asc",     
132 |     db: Session = Depends(get_db)
133 | ):
134 |     # Calculate offset
135 |     offset = (page - 1) * size
136 | 
137 |     # Create base subquery for IDs only
138 |     subquery = db.query(models.Video.id).select_from(models.Video)
139 |     metadata_joined = False
140 |     sponsor_joined = False
141 | 
142 |     # Apply filters to subquery
143 |     if sponsor_filter or sponsor_name:
144 |         subquery = subquery.join(models.VideoSponsor)
145 |         sponsor_joined = True
146 |         if sponsor_filter == "sponsored":
147 |             subquery = subquery.filter(models.VideoSponsor.is_sponsored == True)
148 |         elif sponsor_filter == "not_sponsored":
149 |             subquery = subquery.filter(models.VideoSponsor.is_sponsored == False)
150 | 
151 |     if sponsor_name:
152 |         if not sponsor_joined:
153 |             subquery = subquery.join(models.VideoSponsor)
154 |             sponsor_joined = True
155 |         subquery = subquery.filter(
156 |             models.VideoSponsor.brands_mentioned.cast(String).ilike(f'%{sponsor_name}%')
157 |         )
158 | 
159 |     if creator:
160 |         subquery = subquery.join(models.Metadata)
161 |         metadata_joined = True
162 |         subquery = subquery.filter(models.Metadata.creator.ilike(f'%{creator}%'))
163 | 
164 |     # Apply sorting to subquery
165 |     if sort_by == "creator":
166 |         if not metadata_joined:
167 |             subquery = subquery.join(models.Metadata)
168 |             metadata_joined = True
169 |         if sort_order == "desc":
170 |             subquery = subquery.order_by(models.Metadata.creator.desc())
171 |         else:
172 |             subquery = subquery.order_by(models.Metadata.creator.asc())
173 |     else:
174 |         if sort_order == "desc":
175 |             subquery = subquery.order_by(models.Video.created_at.desc())
176 |         else:
177 |             subquery = subquery.order_by(models.Video.created_at.asc())
178 | 
179 |     # Apply pagination to subquery
180 |     subquery = subquery.offset(offset).limit(size)
181 | 
182 |     # Get total count
183 |     total_query = db.query(models.Video.id).select_from(models.Video)
184 |     metadata_joined = False
185 |     sponsor_joined = False
186 | 
187 |     if sponsor_filter or sponsor_name:
188 |         total_query = total_query.join(models.VideoSponsor)
189 |         sponsor_joined = True
190 |         if sponsor_filter == "sponsored":
191 |             total_query = total_query.filter(models.VideoSponsor.is_sponsored == True)
192 |         elif sponsor_filter == "not_sponsored":
193 |             total_query = total_query.filter(models.VideoSponsor.is_sponsored == False)
194 |         if sponsor_name:
195 |             total_query = total_query.filter(
196 |                 models.VideoSponsor.brands_mentioned.cast(String).ilike(f'%{sponsor_name}%')
197 |             )
198 | 
199 |     if creator:
200 |         if not metadata_joined:
201 |             total_query = total_query.join(models.Metadata)
202 |             metadata_joined = True
203 |         total_query = total_query.filter(models.Metadata.creator.ilike(f'%{creator}%'))
204 | 
205 |     total = total_query.count()
206 | 
207 |     # Get the actual videos with their relationships
208 |     videos = db.query(models.Video).options(
209 |         joinedload(models.Video.sponsor),
210 |         joinedload(models.Video.video_metadata)
211 |     ).filter(
212 |         models.Video.id.in_(subquery)
213 |     ).order_by(
214 |         models.Video.created_at.asc() if sort_order == "asc" else models.Video.created_at.desc()
215 |     ).all()
216 | 
217 |     # Convert to schema format
218 |     video_list = []
219 |     for video in videos:
220 |         try:
221 |             video_dict = model_to_dict(video)
222 |             video_schema = schemas.Video(**video_dict)
223 |             video_list.append(video_schema)
224 |         except Exception as e:
225 |             logger.error(f"Error converting video ID {video.id} to schema: {str(e)}")
226 |             continue
227 | 
228 |     return schemas.VideoList(
229 |         items=video_list,
230 |         total=total,
231 |         page=page,
232 |         size=size
233 |     )
234 | 
235 | @app.get("/videos/{video_id}", response_model=schemas.Video)
236 | async def get_video(video_id: int, db: Session = Depends(get_db)):
237 |     video = db.query(models.Video).options(
238 |         joinedload(models.Video.video_metadata),
239 |         joinedload(models.Video.summary),
240 |         joinedload(models.Video.sponsor)
241 |     ).filter(models.Video.id == video_id).first()
242 |     
243 |     if video is None:
244 |         raise HTTPException(status_code=404, detail="Video not found")
245 |     
246 |     return schemas.Video(**model_to_dict(video))
247 | 
248 | @app.get("/pending-channels/", response_model=List[schemas.Channel])
249 | async def get_pending_channels(db: Session = Depends(get_db)):
250 |     channels = db.query(models.Channel).filter(
251 |         models.Channel.status == "pending"
252 |     ).order_by(models.Channel.created_at.desc()).all()
253 |     
254 |     return [schemas.Channel(**model_to_dict(channel)) for channel in channels]
255 | 


--------------------------------------------------------------------------------
/app/models.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, JSON, Boolean
 2 | from sqlalchemy.orm import relationship
 3 | from .database import Base
 4 | from datetime import datetime
 5 | 
 6 | class Video(Base):
 7 |     __tablename__ = "videos"
 8 | 
 9 |     id = Column(Integer, primary_key=True, index=True)
10 |     url = Column(String, index=True)
11 |     created_at = Column(DateTime, default=datetime.now)
12 |     status = Column(String, default="pending")
13 | 
14 |     # Relationships for additional processing
15 |     video_metadata = relationship("Metadata", back_populates="video", uselist=False)
16 |     summary = relationship("VideoSummary", back_populates="video", uselist=False)
17 |     sponsor = relationship("VideoSponsor", back_populates="video", uselist=False)
18 | 
19 | class Metadata(Base):
20 |     __tablename__ = "metadata"
21 | 
22 |     id = Column(Integer, primary_key=True, index=True)
23 |     video_id = Column(Integer, ForeignKey("videos.id"))
24 |     created_at = Column(DateTime, default=datetime.now)
25 |     status = Column(String, default="pending")
26 |     creator = Column(String, index=True, nullable=True)
27 |     metadata_json = Column(JSON, nullable=True)  # Store all Bright Data response as JSON
28 | 
29 |     video = relationship("Video", back_populates="video_metadata")
30 | 
31 | class VideoSummary(Base):
32 |     __tablename__ = "video_summaries"
33 | 
34 |     id = Column(Integer, primary_key=True, index=True)
35 |     video_id = Column(Integer, ForeignKey("videos.id"))
36 |     summary = Column(String)
37 |     created_at = Column(DateTime, default=datetime.now)
38 |     status = Column(String, default="pending")
39 | 
40 |     video = relationship("Video", back_populates="summary")
41 | 
42 | class VideoSponsor(Base):
43 |     __tablename__ = "video_sponsors"
44 | 
45 |     id = Column(Integer, primary_key=True, index=True)
46 |     video_id = Column(Integer, ForeignKey("videos.id"))
47 |     is_sponsored = Column(Boolean, default=False)
48 |     brands_mentioned = Column(JSON)
49 |     created_at = Column(DateTime, default=datetime.now)
50 |     status = Column(String, default="pending")
51 | 
52 |     video = relationship("Video", back_populates="sponsor")
53 | 
54 | class Channel(Base):
55 |     __tablename__ = "channels"
56 | 
57 |     id = Column(Integer, primary_key=True, index=True)
58 |     url = Column(String, index=True)
59 |     created_at = Column(DateTime, default=datetime.now)
60 |     status = Column(String, default="pending")
61 |     channel_metadata = Column(JSON, nullable=True)  # Store channel metadata when scraping is complete
62 | 


--------------------------------------------------------------------------------
/app/parse_sponsor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from openai import OpenAI
  3 | from dotenv import load_dotenv
  4 | 
  5 | load_dotenv()
  6 | 
  7 | # Initialize OpenAI client
  8 | client = OpenAI(api_key="<enter yours>")
  9 | 
 10 | # Common sponsor-related keywords and phrases
 11 | sponsor_keywords = [
 12 |     "sponsor", "sponsored", "partnership", "partner", "brought to you by",
 13 |     "thanks to", "promotion", "promotional", "affiliate", "discount code",
 14 |     "promo code", "special offer", "check out", "sponsored by"
 15 | ]
 16 | 
 17 | def extract_context(text: str, keyword: str, window_size: int = 100) -> str:
 18 |     """
 19 |     Extract context around a keyword with specified window size before and after.
 20 |     """
 21 |     text = text.lower()
 22 |     keyword_pos = text.find(keyword.lower())
 23 |     
 24 |     if keyword_pos == -1:
 25 |         return ""
 26 |     
 27 |     # Find the start and end positions for the context window
 28 |     start = max(0, keyword_pos - window_size)
 29 |     end = min(len(text), keyword_pos + len(keyword) + window_size)
 30 |     
 31 |     # Extract the context and clean it
 32 |     context = text[start:end].strip()
 33 |     return ' '.join(context.split())  # Normalize whitespace
 34 | 
 35 | def find_sponsor(text: str, keywords: list[str]) -> list[tuple[str, str]]:
 36 |     """
 37 |     Find potential sponsor mentions in text using keyword matching.
 38 |     Returns list of tuples containing (context, keyword).
 39 |     """
 40 |     if not text:
 41 |         return []
 42 |     
 43 |     found_contexts = []
 44 |     
 45 |     for keyword in keywords:
 46 |         # Get all occurrences of the keyword
 47 |         text_lower = text.lower()
 48 |         start = 0
 49 |         while True:
 50 |             pos = text_lower.find(keyword, start)
 51 |             if pos == -1:
 52 |                 break
 53 |                 
 54 |             # Extract context around this occurrence
 55 |             context = extract_context(text[max(0, pos - 100):min(len(text), pos + 100 + len(keyword))], keyword)
 56 |             if context:
 57 |                 found_contexts.append((context, keyword))
 58 |             
 59 |             start = pos + len(keyword)
 60 |     
 61 |     return found_contexts
 62 | 
 63 | def extract_sponsor_name(context: str) -> str:
 64 |     """
 65 |     Use GPT to extract the sponsor name from the context.
 66 |     """
 67 |     prompt = f"""Given this text, extract ONLY the company name that is sponsoring or advertising.
 68 |     If there is no clear sponsor, respond with 'None'. If there is a sponsor but you can't determine the exact name, respond with 'Unknown'.
 69 |     Respond with just the company name, no other text.
 70 |     
 71 |     Text: {context}"""
 72 | 
 73 |     response = client.chat.completions.create(
 74 |         model="gpt-4o-mini-2024-07-18",
 75 |         messages=[
 76 |             {"role": "system", "content": "You are a sponsor detection system. Extract only the company name, nothing else."},
 77 |             {"role": "user", "content": prompt}
 78 |         ],
 79 |         temperature=0.1
 80 |     )
 81 |     
 82 |     sponsor = response.choices[0].message.content.strip()
 83 |     
 84 |     # Clean up common formatting issues
 85 |     if sponsor.lower() in ['none', 'no sponsor', 'no clear sponsor']:
 86 |         return None
 87 |     if sponsor.lower() in ['unknown', "can't determine", 'unclear']:
 88 |         return None
 89 |         
 90 |     return sponsor
 91 | 
 92 | def parse_sponsors(description: str, transcript: str) -> dict:
 93 |     """
 94 |     Parse description and transcript to find potential sponsors.
 95 |     Returns a dictionary with is_sponsored flag and list of sponsor brands with context.
 96 |     """
 97 |     all_text = f"{description}\n{transcript}"
 98 |     sponsor_contexts = find_sponsor(all_text, sponsor_keywords)
 99 |     
100 |     if not sponsor_contexts:
101 |         return {
102 |             "is_sponsored": False,
103 |             "brands": []
104 |         }
105 |     
106 |     # Extract sponsor names for contexts
107 |     brands = []
108 |     seen_brands = set()
109 |     
110 |     for context, _ in sponsor_contexts:
111 |         sponsor_name = extract_sponsor_name(context)
112 |         if sponsor_name and sponsor_name.lower() not in seen_brands:
113 |             seen_brands.add(sponsor_name.lower())
114 |             brands.append({
115 |                 "name": sponsor_name,
116 |                 "context": context
117 |             })
118 |     
119 |     return {
120 |         "is_sponsored": bool(brands),
121 |         "brands": brands
122 |     }
123 | 


--------------------------------------------------------------------------------
/app/process_pool_manager.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing import Pool, Manager
 2 | from .sponsor_worker import process_sponsor_task
 3 | 
 4 | # Default to using half of available CPU cores, with a minimum of 2
 5 | import multiprocessing
 6 | DEFAULT_POOL_SIZE = max(2, multiprocessing.cpu_count() // 2)
 7 | 
 8 | class ProcessPoolManager:
 9 |     def __init__(self, pool_size: int = DEFAULT_POOL_SIZE):
10 |         self.pool_size = pool_size
11 |         self.manager = Manager()
12 |         self.task_queue = self.manager.Queue()
13 |         self.active_tasks = self.manager.Value('i', 0)
14 |         self.pool = Pool(processes=pool_size)
15 |         
16 |     def add_task(self, video_id: int, transcript: str, description: str, db_url: str):
17 |         """Add a new task to the queue"""
18 |         task_data = {
19 |             'video_id': video_id,
20 |             'transcript': transcript,
21 |             'description': description,
22 |             'db_url': db_url
23 |         }
24 |         self.task_queue.put(task_data)
25 |         self._process_queue()
26 |     
27 |     def _process_queue(self):
28 |         """Process tasks from the queue if there's capacity in the pool"""
29 |         while self.active_tasks.value < self.pool_size and not self.task_queue.empty():
30 |             try:
31 |                 task_data = self.task_queue.get_nowait()
32 |                 self.active_tasks.value += 1
33 |                 
34 |                 # Start the task in the pool
35 |                 self.pool.apply_async(
36 |                     process_sponsor_task,
37 |                     args=(
38 |                         task_data['video_id'],
39 |                         task_data['transcript'],
40 |                         task_data['description'],
41 |                         task_data['db_url']
42 |                     ),
43 |                     callback=self._task_complete,
44 |                     error_callback=self._task_error
45 |                 )
46 |             except Exception as e:
47 |                 print(f"Error starting task: {str(e)}")
48 |                 self.active_tasks.value -= 1
49 |     
50 |     def _task_complete(self, result):
51 |         """Callback when a task completes successfully"""
52 |         self.active_tasks.value -= 1
53 |         self._process_queue()
54 |     
55 |     def _task_error(self, error):
56 |         """Callback when a task fails"""
57 |         print(f"Task failed with error: {str(error)}")
58 |         self.active_tasks.value -= 1
59 |         self._process_queue()
60 |     
61 |     def shutdown(self):
62 |         """Shutdown the pool and wait for all tasks to complete"""
63 |         self.pool.close()
64 |         self.pool.join()
65 | 
66 | # Global instance of the pool manager
67 | _pool_manager = None
68 | 
69 | def get_pool_manager(pool_size: int = DEFAULT_POOL_SIZE) -> ProcessPoolManager:
70 |     """Get or create the global pool manager instance"""
71 |     global _pool_manager
72 |     if _pool_manager is None:
73 |         _pool_manager = ProcessPoolManager(pool_size)
74 |     return _pool_manager
75 | 


--------------------------------------------------------------------------------
/app/schemas.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, ConfigDict
 2 | from typing import List, Optional, Dict, Any, Union
 3 | from datetime import datetime
 4 | 
 5 | class VideoBase(BaseModel):
 6 |     url: str
 7 | 
 8 | class VideoCreate(VideoBase):
 9 |     pass
10 | 
11 | class MetadataBase(BaseModel):
12 |     metadata_json: Optional[Dict[str, Any]] = None
13 |     status: Optional[str] = None
14 |     creator: Optional[str] = None
15 | 
16 | class MetadataCreate(MetadataBase):
17 |     video_id: int
18 | 
19 | class Metadata(MetadataBase):
20 |     id: int
21 |     video_id: int
22 |     created_at: datetime
23 | 
24 |     model_config = ConfigDict(from_attributes=True)
25 | 
26 | class VideoSummaryBase(BaseModel):
27 |     summary: Optional[str] = None
28 |     status: Optional[str] = None
29 | 
30 | class VideoSummaryCreate(VideoSummaryBase):
31 |     video_id: int
32 | 
33 | class VideoSummary(VideoSummaryBase):
34 |     id: int
35 |     video_id: int
36 |     created_at: datetime
37 | 
38 |     model_config = ConfigDict(from_attributes=True)
39 | 
40 | class VideoSponsorBase(BaseModel):
41 |     is_sponsored: Optional[bool] = None
42 |     brands_mentioned: Optional[List[Dict[str, str]]] = None
43 |     status: Optional[str] = None
44 | 
45 | class VideoSponsorCreate(VideoSponsorBase):
46 |     video_id: int
47 | 
48 | class VideoSponsor(VideoSponsorBase):
49 |     id: int
50 |     video_id: int
51 |     created_at: datetime
52 | 
53 |     model_config = ConfigDict(from_attributes=True)
54 | 
55 | class Video(VideoBase):
56 |     id: int
57 |     status: Optional[str] = None
58 |     created_at: datetime
59 |     video_metadata: Optional[Metadata] = None
60 |     summary: Optional[VideoSummary] = None
61 |     sponsor: Optional[VideoSponsor] = None
62 | 
63 |     @property
64 |     def creator(self) -> Optional[str]:
65 |         return self.video_metadata.creator if self.video_metadata else None
66 | 
67 |     model_config = ConfigDict(from_attributes=True)
68 | 
69 | class VideoList(BaseModel):
70 |     items: List[Video]
71 |     total: int
72 |     page: int
73 |     size: int
74 | 
75 | class Channel(BaseModel):
76 |     id: int
77 |     url: str
78 |     status: Optional[str] = None
79 |     created_at: datetime
80 |     channel_metadata: Optional[Dict[str, Any]] = None
81 | 
82 |     model_config = ConfigDict(from_attributes=True)
83 | 
84 | class ChannelList(BaseModel):
85 |     items: List[Channel]
86 |     total: int
87 |     page: int
88 |     size: int
89 | 
90 | class ScrapeVideosRequest(BaseModel):
91 |     urls: List[str]
92 |     scrape_type: str = "video"
93 | 
94 | class ScrapeChannelRequest(BaseModel):
95 |     url: str


--------------------------------------------------------------------------------
/app/sponsor_worker.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import create_engine
 2 | from sqlalchemy.orm import sessionmaker
 3 | from .parse_sponsor import parse_sponsors
 4 | from .models import Video, VideoSponsor
 5 | from .database import Base
 6 | 
 7 | def process_sponsor_task(video_id: int, transcript: str, description: str, db_url: str):
 8 |     try:
 9 |         # Create a new database session for this process
10 |         engine = create_engine(db_url)
11 |         Base.metadata.create_all(bind=engine)
12 |         SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
13 |         db = SessionLocal()
14 | 
15 |         try:
16 |             # Get the video
17 |             video = db.query(Video).filter(Video.id == video_id).first()
18 |             if not video:
19 |                 print(f"Video {video_id} not found")
20 |                 return
21 | 
22 |             # Get or create VideoSponsor
23 |             sponsor = db.query(VideoSponsor).filter(VideoSponsor.video_id == video_id).first()
24 |             if not sponsor:
25 |                 sponsor = VideoSponsor(video_id=video_id)
26 |                 db.add(sponsor)
27 | 
28 |             # Update status to in_progress
29 |             sponsor.status = "in_progress"
30 |             db.commit()
31 | 
32 |             # Process sponsors
33 |             try:
34 |                 sponsor_data = parse_sponsors(description, transcript)
35 |                 if isinstance(sponsor_data, dict):
36 |                     sponsor.is_sponsored = sponsor_data.get("is_sponsored", False)
37 |                     sponsor.brands_mentioned = sponsor_data.get("brands", [])
38 |                 else:
39 |                     # Handle the case where parse_sponsors returns a different structure
40 |                     sponsor.is_sponsored = bool(sponsor_data)
41 |                     sponsor.brands_mentioned = sponsor_data if isinstance(sponsor_data, list) else []
42 |                 sponsor.status = "completed"
43 |             except Exception as e:
44 |                 print(f"Error processing sponsors for video {video_id}: {str(e)}")
45 |                 sponsor.status = "failed"
46 | 
47 |             db.commit()
48 | 
49 |         finally:
50 |             db.close()
51 | 
52 |     except Exception as e:
53 |         print(f"Worker process error for video {video_id}: {str(e)}")
54 | 


--------------------------------------------------------------------------------
/env.sample:
--------------------------------------------------------------------------------
1 | BD_API_KEY=
2 | OPENAI_API_KEY=


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | pnpm-debug.log*
 8 | lerna-debug.log*
 9 | 
10 | node_modules
11 | dist
12 | dist-ssr
13 | *.local
14 | 
15 | # Editor directories and files
16 | .vscode/*
17 | !.vscode/extensions.json
18 | .idea
19 | .DS_Store
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?
25 | 


--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
 1 | # React + TypeScript + Vite
 2 | 
 3 | This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
 4 | 
 5 | Currently, two official plugins are available:
 6 | 
 7 | - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
 8 | - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
 9 | 
10 | ## Expanding the ESLint configuration
11 | 
12 | If you are developing a production application, we recommend updating the configuration to enable type aware lint rules:
13 | 
14 | - Configure the top-level `parserOptions` property like this:
15 | 
16 | ```js
17 | export default tseslint.config({
18 |   languageOptions: {
19 |     // other options...
20 |     parserOptions: {
21 |       project: ['./tsconfig.node.json', './tsconfig.app.json'],
22 |       tsconfigRootDir: import.meta.dirname,
23 |     },
24 |   },
25 | })
26 | ```
27 | 
28 | - Replace `tseslint.configs.recommended` to `tseslint.configs.recommendedTypeChecked` or `tseslint.configs.strictTypeChecked`
29 | - Optionally add `...tseslint.configs.stylisticTypeChecked`
30 | - Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and update the config:
31 | 
32 | ```js
33 | // eslint.config.js
34 | import react from 'eslint-plugin-react'
35 | 
36 | export default tseslint.config({
37 |   // Set the react version
38 |   settings: { react: { version: '18.3' } },
39 |   plugins: {
40 |     // Add the react plugin
41 |     react,
42 |   },
43 |   rules: {
44 |     // other rules...
45 |     // Enable its recommended rules
46 |     ...react.configs.recommended.rules,
47 |     ...react.configs['jsx-runtime'].rules,
48 |   },
49 | })
50 | ```
51 | 


--------------------------------------------------------------------------------
/frontend/eslint.config.js:
--------------------------------------------------------------------------------
 1 | import js from '@eslint/js'
 2 | import globals from 'globals'
 3 | import reactHooks from 'eslint-plugin-react-hooks'
 4 | import reactRefresh from 'eslint-plugin-react-refresh'
 5 | import tseslint from 'typescript-eslint'
 6 | 
 7 | export default tseslint.config(
 8 |   { ignores: ['dist'] },
 9 |   {
10 |     extends: [js.configs.recommended, ...tseslint.configs.recommended],
11 |     files: ['**/*.{ts,tsx}'],
12 |     languageOptions: {
13 |       ecmaVersion: 2020,
14 |       globals: globals.browser,
15 |     },
16 |     plugins: {
17 |       'react-hooks': reactHooks,
18 |       'react-refresh': reactRefresh,
19 |     },
20 |     rules: {
21 |       ...reactHooks.configs.recommended.rules,
22 |       'react-refresh/only-export-components': [
23 |         'warn',
24 |         { allowConstantExport: true },
25 |       ],
26 |     },
27 |   },
28 | )
29 | 


--------------------------------------------------------------------------------
/frontend/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <link rel="icon" type="image/svg+xml" href="/vite.svg" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 7 |     <title>Vite + React + TS</title>
 8 |   </head>
 9 |   <body>
10 |     <div id="root"></div>
11 |     <script type="module" src="/src/main.tsx"></script>
12 |   </body>
13 | </html>
14 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "private": true,
 4 |   "version": "0.0.0",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "dev": "vite",
 8 |     "build": "tsc -b && vite build",
 9 |     "lint": "eslint .",
10 |     "preview": "vite preview"
11 |   },
12 |   "dependencies": {
13 |     "@emotion/react": "^11.14.0",
14 |     "@emotion/styled": "^11.14.0",
15 |     "@mui/icons-material": "^6.3.1",
16 |     "@mui/material": "^6.3.1",
17 |     "react": "^18.3.1",
18 |     "react-dom": "^18.3.1",
19 |     "react-router-dom": "^7.1.1"
20 |   },
21 |   "devDependencies": {
22 |     "@eslint/js": "^9.17.0",
23 |     "@types/react": "^18.3.18",
24 |     "@types/react-dom": "^18.3.5",
25 |     "@vitejs/plugin-react": "^4.3.4",
26 |     "eslint": "^9.17.0",
27 |     "eslint-plugin-react-hooks": "^5.0.0",
28 |     "eslint-plugin-react-refresh": "^0.4.16",
29 |     "globals": "^15.14.0",
30 |     "typescript": "~5.6.2",
31 |     "typescript-eslint": "^8.18.2",
32 |     "vite": "^6.0.5"
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/frontend/public/vite.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>


--------------------------------------------------------------------------------
/frontend/src/App.css:
--------------------------------------------------------------------------------
 1 | #root {
 2 |   max-width: 1280px;
 3 |   margin: 0 auto;
 4 |   padding: 2rem;
 5 |   text-align: center;
 6 | }
 7 | 
 8 | .logo {
 9 |   height: 6em;
10 |   padding: 1.5em;
11 |   will-change: filter;
12 |   transition: filter 300ms;
13 | }
14 | .logo:hover {
15 |   filter: drop-shadow(0 0 2em #646cffaa);
16 | }
17 | .logo.react:hover {
18 |   filter: drop-shadow(0 0 2em #61dafbaa);
19 | }
20 | 
21 | @keyframes logo-spin {
22 |   from {
23 |     transform: rotate(0deg);
24 |   }
25 |   to {
26 |     transform: rotate(360deg);
27 |   }
28 | }
29 | 
30 | @media (prefers-reduced-motion: no-preference) {
31 |   a:nth-of-type(2) .logo {
32 |     animation: logo-spin infinite 20s linear;
33 |   }
34 | }
35 | 
36 | .card {
37 |   padding: 2em;
38 | }
39 | 
40 | .read-the-docs {
41 |   color: #888;
42 | }
43 | 


--------------------------------------------------------------------------------
/frontend/src/App.tsx:
--------------------------------------------------------------------------------
 1 | import { BrowserRouter as Router, Routes, Route, Link, useNavigate } from 'react-router-dom';
 2 | import { Container, AppBar, Toolbar, Typography, Box, Button, CssBaseline } from '@mui/material';
 3 | import { VideoInput } from './pages/VideoInput';
 4 | import { VideoList } from './pages/VideoList';
 5 | import { VideoDetail } from './pages/VideoDetail';
 6 | 
 7 | // Wrapper component to handle navigation
 8 | const VideoInputWrapper = () => {
 9 |   const navigate = useNavigate();
10 | 
11 |   const handleSubmit = async (urls: string[], type: string) => {
12 |     try {
13 |       const response = await fetch('/api/scrape-videos/', {
14 |         method: 'POST',
15 |         headers: {
16 |           'Content-Type': 'application/json',
17 |         },
18 |         body: JSON.stringify({ urls, scrape_type: type }),
19 |       });
20 |       
21 |       if (!response.ok) {
22 |         throw new Error('Failed to submit videos');
23 |       }
24 | 
25 |       // Navigate to videos page after successful submission
26 |       navigate('/videos');
27 |     } catch (error) {
28 |       console.error('Error submitting videos:', error);
29 |       throw error;
30 |     }
31 |   };
32 | 
33 |   return <VideoInput onSubmit={handleSubmit} />;
34 | };
35 | 
36 | function App() {
37 |   return (
38 |     <Router>
39 |       <CssBaseline />
40 |       <Box sx={{ 
41 |         display: 'flex', 
42 |         flexDirection: 'column', 
43 |         minHeight: '100vh' 
44 |       }}>
45 |         <AppBar position="static">
46 |           <Toolbar>
47 |             <Typography variant="h6" component="div" sx={{ flexGrow: 1 }}>
48 |               YouTube Video Scraper
49 |             </Typography>
50 |             <Button color="inherit" component={Link} to="/">
51 |               Home
52 |             </Button>
53 |             <Button color="inherit" component={Link} to="/videos">
54 |               Videos
55 |             </Button>
56 |           </Toolbar>
57 |         </AppBar>
58 |         
59 |         <Box component="main" sx={{ 
60 |           flexGrow: 1, 
61 |           display: 'flex',
62 |           flexDirection: 'column',
63 |           alignItems: 'center',
64 |           width: '100%',
65 |           py: 4
66 |         }}>
67 |           <Container disableGutters maxWidth={false} sx={{ width: '100%', px: { xs: 2, sm: 3 } }}>
68 |             <Routes>
69 |               <Route path="/" element={<VideoInputWrapper />} />
70 |               <Route path="/videos" element={<VideoList />} />
71 |               <Route path="/video/:id" element={<VideoDetail />} />
72 |             </Routes>
73 |           </Container>
74 |         </Box>
75 |       </Box>
76 |     </Router>
77 |   );
78 | }
79 | 
80 | export default App;
81 | 


--------------------------------------------------------------------------------
/frontend/src/assets/react.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>


--------------------------------------------------------------------------------
/frontend/src/components/PendingChannels.tsx:
--------------------------------------------------------------------------------
 1 | import { useEffect, useState } from 'react';
 2 | import { Box, Typography, Paper, CircularProgress, List, ListItem, ListItemText } from '@mui/material';
 3 | 
 4 | interface Channel {
 5 |   id: number;
 6 |   url: string;
 7 |   status: string;
 8 |   created_at: string;
 9 | }
10 | 
11 | export const PendingChannels = () => {
12 |   const [channels, setChannels] = useState<Channel[]>([]);
13 |   const [isLoading, setIsLoading] = useState(true);
14 |   const [error, setError] = useState('');
15 | 
16 |   const fetchPendingChannels = async () => {
17 |     try {
18 |       const response = await fetch('/api/pending-channels/');
19 |       if (!response.ok) {
20 |         throw new Error('Failed to fetch pending channels');
21 |       }
22 |       const data = await response.json();
23 |       setChannels(data);
24 |     } catch (err) {
25 |       setError('Failed to load pending channels');
26 |       console.error('Error fetching pending channels:', err);
27 |     } finally {
28 |       setIsLoading(false);
29 |     }
30 |   };
31 | 
32 |   useEffect(() => {
33 |     fetchPendingChannels();
34 |     // Poll for updates every 10 seconds
35 |     const interval = setInterval(fetchPendingChannels, 10000);
36 |     return () => clearInterval(interval);
37 |   }, []);
38 | 
39 |   if (isLoading) {
40 |     return (
41 |       <Box sx={{ display: 'flex', justifyContent: 'center', mt: 2 }}>
42 |         <CircularProgress />
43 |       </Box>
44 |     );
45 |   }
46 | 
47 |   if (error) {
48 |     return (
49 |       <Typography color="error" sx={{ mt: 2 }}>
50 |         {error}
51 |       </Typography>
52 |     );
53 |   }
54 | 
55 |   if (channels.length === 0) {
56 |     return null;
57 |   }
58 | 
59 |   return (
60 |     <Paper elevation={2} sx={{ mt: 3, p: 2 }}>
61 |       <Typography variant="h6" gutterBottom>
62 |         Pending Channels
63 |       </Typography>
64 |       <List dense>
65 |         {channels.map((channel) => (
66 |           <ListItem key={channel.id}>
67 |             <ListItemText
68 |               primary={new URL(channel.url).pathname}
69 |               secondary={`Added ${new Date(channel.created_at).toLocaleString()}`}
70 |             />
71 |             <CircularProgress size={20} />
72 |           </ListItem>
73 |         ))}
74 |       </List>
75 |     </Paper>
76 |   );
77 | };
78 | 


--------------------------------------------------------------------------------
/frontend/src/index.css:
--------------------------------------------------------------------------------
 1 | * {
 2 |   margin: 0;
 3 |   padding: 0;
 4 |   box-sizing: border-box;
 5 | }
 6 | 
 7 | :root {
 8 |   font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif;
 9 |   line-height: 1.5;
10 |   font-weight: 400;
11 |   color-scheme: light dark;
12 |   color: #213547;
13 |   background-color: #ffffff;
14 |   font-synthesis: none;
15 |   text-rendering: optimizeLegibility;
16 |   -webkit-font-smoothing: antialiased;
17 |   -moz-osx-font-smoothing: grayscale;
18 | }
19 | 
20 | html, body {
21 |   margin: 0;
22 |   padding: 0;
23 |   width: 100%;
24 |   height: 100%;
25 |   overflow-x: hidden;
26 | }
27 | 
28 | body {
29 |   display: flex;
30 |   flex-direction: column;
31 |   place-items: center;
32 |   min-width: 320px;
33 | }
34 | 
35 | #root {
36 |   display: flex;
37 |   flex-direction: column;
38 |   min-height: 100vh;
39 |   width: 100%;
40 | }
41 | 
42 | a {
43 |   font-weight: 500;
44 |   color: #646cff;
45 |   text-decoration: inherit;
46 | }
47 | a:hover {
48 |   color: #535bf2;
49 | }
50 | 
51 | h1 {
52 |   font-size: 3.2em;
53 |   line-height: 1.1;
54 | }
55 | 
56 | button {
57 |   border-radius: 8px;
58 |   border: 1px solid transparent;
59 |   padding: 0.6em 1.2em;
60 |   font-size: 1em;
61 |   font-weight: 500;
62 |   font-family: inherit;
63 |   background-color: #1a1a1a;
64 |   cursor: pointer;
65 |   transition: border-color 0.25s;
66 | }
67 | button:hover {
68 |   border-color: #646cff;
69 | }
70 | button:focus,
71 | button:focus-visible {
72 |   outline: 4px auto -webkit-focus-ring-color;
73 | }
74 | 
75 | @media (prefers-color-scheme: dark) {
76 |   :root {
77 |     color: rgba(255, 255, 255, 0.87);
78 |     background-color: #242424;
79 |   }
80 |   a:hover {
81 |     color: #747bff;
82 |   }
83 |   button {
84 |     background-color: #f9f9f9;
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/frontend/src/main.tsx:
--------------------------------------------------------------------------------
 1 | import { StrictMode } from 'react'
 2 | import { createRoot } from 'react-dom/client'
 3 | import './index.css'
 4 | import App from './App.tsx'
 5 | 
 6 | createRoot(document.getElementById('root')!).render(
 7 |   <StrictMode>
 8 |     <App />
 9 |   </StrictMode>,
10 | )
11 | 


--------------------------------------------------------------------------------
/frontend/src/pages/VideoDetail.tsx:
--------------------------------------------------------------------------------
  1 | import { useEffect, useState } from 'react';
  2 | import { useParams } from 'react-router-dom';
  3 | import { 
  4 |   Paper, 
  5 |   Typography, 
  6 |   Box, 
  7 |   Grid, 
  8 |   Chip,
  9 |   CircularProgress,
 10 |   Card,
 11 |   CardContent,
 12 |   Link,
 13 |   Accordion,
 14 |   AccordionSummary,
 15 |   AccordionDetails,
 16 |   Tooltip
 17 | } from '@mui/material';
 18 | import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
 19 | import MonetizationOnIcon from '@mui/icons-material/MonetizationOn';
 20 | 
 21 | interface Video {
 22 |   id: number;
 23 |   url: string;
 24 |   status: string;
 25 |   created_at: string;
 26 |   sponsor?: {
 27 |     status: string;
 28 |     is_sponsored: boolean;
 29 |     brands_mentioned: Array<{
 30 |       name: string;
 31 |       context: string;
 32 |     }>;
 33 |   };
 34 |   video_metadata?: {
 35 |     metadata_json?: {
 36 |       title?: string;
 37 |       description?: string;
 38 |       views?: number;
 39 |       likes?: number;
 40 |       date_posted?: string;
 41 |       youtuber?: string;
 42 |       channel_url?: string;
 43 |       subscribers?: number;
 44 |       video_length?: number;
 45 |       preview_image?: string;
 46 |       transcript?: string;
 47 |     };
 48 |   };
 49 | }
 50 | 
 51 | export const VideoDetail = () => {
 52 |   const { id } = useParams<{ id: string }>();
 53 |   const [video, setVideo] = useState<Video | null>(null);
 54 |   const [isLoading, setIsLoading] = useState(true);
 55 |   const [error, setError] = useState('');
 56 | 
 57 |   useEffect(() => {
 58 |     const fetchVideo = async () => {
 59 |       try {
 60 |         setIsLoading(true);
 61 |         const response = await fetch(`/api/videos/${id}`);
 62 |         if (!response.ok) throw new Error('Failed to fetch video');
 63 |         const data = await response.json();
 64 |         setVideo(data);
 65 |       } catch (err) {
 66 |         setError('Failed to load video details');
 67 |         console.error(err);
 68 |       } finally {
 69 |         setIsLoading(false);
 70 |       }
 71 |     };
 72 | 
 73 |     fetchVideo();
 74 |   }, [id]);
 75 | 
 76 |   if (isLoading) {
 77 |     return (
 78 |       <Box display="flex" justifyContent="center" alignItems="center" height="80vh">
 79 |         <CircularProgress />
 80 |       </Box>
 81 |     );
 82 |   }
 83 | 
 84 |   if (error || !video) {
 85 |     return (
 86 |       <Box display="flex" justifyContent="center" alignItems="center" height="80vh">
 87 |         <Typography color="error">{error || 'Video not found'}</Typography>
 88 |       </Box>
 89 |     );
 90 |   }
 91 | 
 92 |   const metadata = video.video_metadata?.metadata_json || {};
 93 | 
 94 |   const formatDuration = (seconds?: number): string => {
 95 |     if (!seconds) return '00:00:00';
 96 |     const hours = Math.floor(seconds / 3600);
 97 |     const minutes = Math.floor((seconds % 3600) / 60);
 98 |     const remainingSeconds = seconds % 60;
 99 |     return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${remainingSeconds.toString().padStart(2, '0')}`;
100 |   };
101 | 
102 |   return (
103 |     <Box sx={{ width: '100%' }}>
104 |       <Paper elevation={3} sx={{ width: '100%', p: 3 }}>
105 |         <Grid container spacing={3} sx={{ width: '100%' }}>
106 |           <Grid item xs={12}>
107 |             <Typography variant="h4" gutterBottom>
108 |               {metadata.title || 'Untitled Video'}
109 |             </Typography>
110 |             <Box sx={{ display: 'flex', gap: 2, alignItems: 'center', mb: 2, flexWrap: 'wrap' }}>
111 |               <Chip 
112 |                 label={formatDuration(metadata.video_length)} 
113 |                 variant="outlined"
114 |               />
115 |               <Chip 
116 |                 label={`${metadata.views?.toLocaleString() || 0} views`}
117 |                 variant="outlined"
118 |               />
119 |               <Chip 
120 |                 label={`${metadata.likes?.toLocaleString() || 0} likes`}
121 |                 variant="outlined"
122 |               />
123 |               {video.sponsor?.status === 'completed' && video.sponsor.is_sponsored && (
124 |                 <Tooltip title={video.sponsor.brands_mentioned.map(b => b.name).join(', ')}>
125 |                   <Chip 
126 |                     label="Sponsored" 
127 |                     color="primary"
128 |                     icon={<MonetizationOnIcon />}
129 |                   />
130 |                 </Tooltip>
131 |               )}
132 |             </Box>
133 |           </Grid>
134 | 
135 |           <Grid item xs={12} md={6}>
136 |             {metadata.preview_image && (
137 |               <Box sx={{ 
138 |                 width: '100%', 
139 |                 height: { xs: '200px', sm: '300px', md: '360px' },
140 |                 position: 'relative',
141 |                 overflow: 'hidden',
142 |                 borderRadius: 1
143 |               }}>
144 |                 <img 
145 |                   src={metadata.preview_image} 
146 |                   alt={metadata.title} 
147 |                   style={{ 
148 |                     width: '100%', 
149 |                     height: '100%', 
150 |                     objectFit: 'cover'
151 |                   }}
152 |                 />
153 |               </Box>
154 |             )}
155 |           </Grid>
156 | 
157 |           <Grid item xs={12} md={metadata.preview_image ? 6 : 12}>
158 |             <Card variant="outlined" sx={{ height: '100%' }}>
159 |               <CardContent>
160 |                 <Typography variant="h6" gutterBottom>
161 |                   Video Statistics
162 |                 </Typography>
163 |                 <Grid container spacing={2}>
164 |                   <Grid item xs={6}>
165 |                     <Typography color="textSecondary">Views</Typography>
166 |                     <Typography variant="h6">{metadata.views?.toLocaleString() || 'N/A'}</Typography>
167 |                   </Grid>
168 |                   <Grid item xs={6}>
169 |                     <Typography color="textSecondary">Likes</Typography>
170 |                     <Typography variant="h6">{metadata.likes?.toLocaleString() || 'N/A'}</Typography>
171 |                   </Grid>
172 |                   <Grid item xs={6}>
173 |                     <Typography color="textSecondary">Duration</Typography>
174 |                     <Typography variant="h6">{formatDuration(metadata.video_length)}</Typography>
175 |                   </Grid>
176 |                   <Grid item xs={6}>
177 |                     <Typography color="textSecondary">Posted</Typography>
178 |                     <Typography variant="h6">
179 |                       {metadata.date_posted 
180 |                         ? new Date(metadata.date_posted).toLocaleDateString()
181 |                         : 'N/A'
182 |                       }
183 |                     </Typography>
184 |                   </Grid>
185 |                 </Grid>
186 |               </CardContent>
187 |             </Card>
188 |           </Grid>
189 | 
190 |           <Grid item xs={12}>
191 |             <Card variant="outlined" sx={{ width: '100%' }}>
192 |               <CardContent>
193 |                 <Typography variant="h6" gutterBottom>
194 |                   Channel Information
195 |                 </Typography>
196 |                 <Box sx={{ display: 'flex', flexDirection: 'column', gap: 1 }}>
197 |                   <Typography>
198 |                     <strong>Channel:</strong>{' '}
199 |                     {metadata.channel_url ? (
200 |                       <Link href={metadata.channel_url} target="_blank" rel="noopener noreferrer">
201 |                         {metadata.youtuber || 'Unknown'}
202 |                       </Link>
203 |                     ) : (
204 |                       metadata.youtuber || 'Unknown'
205 |                     )}
206 |                   </Typography>
207 |                   <Typography>
208 |                     <strong>Subscribers:</strong>{' '}
209 |                     {metadata.subscribers?.toLocaleString() || 'N/A'}
210 |                   </Typography>
211 |                 </Box>
212 |               </CardContent>
213 |             </Card>
214 |           </Grid>
215 | 
216 |           {video.sponsor?.status === 'completed' && video.sponsor.is_sponsored && (
217 |             <Grid item xs={12}>
218 |               <Card variant="outlined" sx={{ width: '100%' }}>
219 |                 <CardContent>
220 |                   <Typography variant="h6" gutterBottom sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
221 |                     <MonetizationOnIcon color="primary" />
222 |                     Sponsor Information
223 |                   </Typography>
224 |                   <Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
225 |                     {video.sponsor.brands_mentioned.map((brand, index) => (
226 |                       <Box key={index}>
227 |                         <Typography variant="subtitle1" color="primary" gutterBottom>
228 |                           {brand.name}
229 |                         </Typography>
230 |                         <Typography variant="body2" color="text.secondary">
231 |                           {brand.context}
232 |                         </Typography>
233 |                       </Box>
234 |                     ))}
235 |                   </Box>
236 |                 </CardContent>
237 |               </Card>
238 |             </Grid>
239 |           )}
240 | 
241 |           {metadata.description && (
242 |             <Grid item xs={12}>
243 |               <Accordion defaultExpanded={false} sx={{ width: '100%' }}>
244 |                 <AccordionSummary expandIcon={<ExpandMoreIcon />}>
245 |                   <Typography variant="h6">Description</Typography>
246 |                 </AccordionSummary>
247 |                 <AccordionDetails>
248 |                   <Typography
249 |                     component="pre"
250 |                     sx={{
251 |                       whiteSpace: 'pre-wrap',
252 |                       wordBreak: 'break-word',
253 |                       fontFamily: 'inherit'
254 |                     }}
255 |                   >
256 |                     {metadata.description}
257 |                   </Typography>
258 |                 </AccordionDetails>
259 |               </Accordion>
260 |             </Grid>
261 |           )}
262 | 
263 |           {metadata.transcript && (
264 |             <Grid item xs={12}>
265 |               <Accordion defaultExpanded={false} sx={{ width: '100%' }}>
266 |                 <AccordionSummary expandIcon={<ExpandMoreIcon />}>
267 |                   <Typography variant="h6">Transcript</Typography>
268 |                 </AccordionSummary>
269 |                 <AccordionDetails>
270 |                   <Typography
271 |                     component="pre"
272 |                     sx={{
273 |                       whiteSpace: 'pre-wrap',
274 |                       wordBreak: 'break-word',
275 |                       fontFamily: 'inherit'
276 |                     }}
277 |                   >
278 |                     {metadata.transcript}
279 |                   </Typography>
280 |                 </AccordionDetails>
281 |               </Accordion>
282 |             </Grid>
283 |           )}
284 |         </Grid>
285 |       </Paper>
286 |     </Box>
287 |   );
288 | };
289 | 


--------------------------------------------------------------------------------
/frontend/src/pages/VideoInput.tsx:
--------------------------------------------------------------------------------
  1 | import { useState } from 'react';
  2 | import { TextField, Button, Box, Typography, Paper, Alert, Tabs, Tab } from '@mui/material';
  3 | import { PendingChannels } from '../components/PendingChannels';
  4 | 
  5 | interface VideoInputProps {
  6 |   onSubmit: (urls: string[], type: string) => Promise<void>;
  7 | }
  8 | 
  9 | interface TabPanelProps {
 10 |   children?: React.ReactNode;
 11 |   index: number;
 12 |   value: number;
 13 | }
 14 | 
 15 | function TabPanel(props: TabPanelProps) {
 16 |   const { children, value, index, ...other } = props;
 17 | 
 18 |   return (
 19 |     <div
 20 |       role="tabpanel"
 21 |       hidden={value !== index}
 22 |       id={`simple-tabpanel-${index}`}
 23 |       aria-labelledby={`simple-tab-${index}`}
 24 |       {...other}
 25 |     >
 26 |       {value === index && (
 27 |         <Box sx={{ pt: 3 }}>
 28 |           {children}
 29 |         </Box>
 30 |       )}
 31 |     </div>
 32 |   );
 33 | }
 34 | 
 35 | export const VideoInput = ({ onSubmit }: VideoInputProps) => {
 36 |   const [urls, setUrls] = useState('');
 37 |   const [channelUrl, setChannelUrl] = useState('');
 38 |   const [error, setError] = useState('');
 39 |   const [isLoading, setIsLoading] = useState(false);
 40 |   const [tabValue, setTabValue] = useState(0);
 41 | 
 42 |   const handleTabChange = (_: React.SyntheticEvent, newValue: number) => {
 43 |     setTabValue(newValue);
 44 |     setError('');
 45 |   };
 46 | 
 47 |   const handleVideoSubmit = async (e: React.FormEvent) => {
 48 |     e.preventDefault();
 49 |     setError('');
 50 |     
 51 |     // Split URLs by newline and filter out empty lines
 52 |     const urlList = urls.split('\n').filter(url => url.trim());
 53 |     
 54 |     if (urlList.length === 0) {
 55 |       setError('Please enter at least one URL');
 56 |       return;
 57 |     }
 58 | 
 59 |     // Validate URLs
 60 |     const invalidUrls = urlList.filter(url => !url.includes('youtube.com/'));
 61 |     if (invalidUrls.length > 0) {
 62 |       setError('Please enter valid YouTube URLs');
 63 |       return;
 64 |     }
 65 | 
 66 |     setIsLoading(true);
 67 |     try {
 68 |       await onSubmit(urlList, "video");
 69 |       setUrls(''); // Clear input on success
 70 |     } catch (err) {
 71 |       setError('Failed to submit videos. Please try again.');
 72 |     } finally {
 73 |       setIsLoading(false);
 74 |     }
 75 |   };
 76 | 
 77 |   const handleChannelSubmit = async (e: React.FormEvent) => {
 78 |     e.preventDefault();
 79 |     setError('');
 80 | 
 81 |     if (!channelUrl.trim()) {
 82 |       setError('Please enter a channel URL');
 83 |       return;
 84 |     }
 85 | 
 86 |     if (!channelUrl.includes('youtube.com/')) {
 87 |       setError('Please enter a valid YouTube channel URL');
 88 |       return;
 89 |     }
 90 | 
 91 |     setIsLoading(true);
 92 |     setIsLoading(true);
 93 |     try {
 94 |       await onSubmit([channelUrl], "channel");
 95 |       setUrls(''); // Clear input on success
 96 |     } catch (err) {
 97 |       setError('Failed to submit channel. Please try again.');
 98 |     } finally {
 99 |       setIsLoading(false);
100 |     }
101 |   };
102 | 
103 |   return (
104 |     <Paper elevation={3} sx={{ p: 3, maxWidth: 600, mx: 'auto', mt: 4 }}>
105 |       <Typography variant="h5" gutterBottom>
106 |         Submit YouTube Content
107 |       </Typography>
108 |       
109 |       <Tabs value={tabValue} onChange={handleTabChange} sx={{ borderBottom: 1, borderColor: 'divider' }}>
110 |         <Tab label="Videos" />
111 |         <Tab label="Channel" />
112 |       </Tabs>
113 | 
114 |       <TabPanel value={tabValue} index={0}>
115 |         <form onSubmit={handleVideoSubmit}>
116 |           <TextField
117 |             multiline
118 |             rows={4}
119 |             fullWidth
120 |             value={urls}
121 |             onChange={(e) => setUrls(e.target.value)}
122 |             placeholder="Enter YouTube URLs (one per line)"
123 |             variant="outlined"
124 |             disabled={isLoading}
125 |             sx={{ mb: 2 }}
126 |           />
127 |           {error && (
128 |             <Alert severity="error" sx={{ mb: 2 }}>
129 |               {error}
130 |             </Alert>
131 |           )}
132 |           <Box sx={{ display: 'flex', justifyContent: 'flex-end' }}>
133 |             <Button
134 |               type="submit"
135 |               variant="contained"
136 |               color="primary"
137 |               disabled={isLoading}
138 |             >
139 |               {isLoading ? 'Processing...' : 'Submit Videos'}
140 |             </Button>
141 |           </Box>
142 |         </form>
143 |       </TabPanel>
144 | 
145 |       <TabPanel value={tabValue} index={1}>
146 |         <form onSubmit={handleChannelSubmit}>
147 |           <TextField
148 |             fullWidth
149 |             value={channelUrl}
150 |             onChange={(e) => setChannelUrl(e.target.value)}
151 |             placeholder="Enter YouTube channel URL"
152 |             variant="outlined"
153 |             disabled={isLoading}
154 |             sx={{ mb: 2 }}
155 |           />
156 |           {error && (
157 |             <Alert severity="error" sx={{ mb: 2 }}>
158 |               {error}
159 |             </Alert>
160 |           )}
161 |           <Box sx={{ display: 'flex', justifyContent: 'flex-end' }}>
162 |             <Button
163 |               type="submit"
164 |               variant="contained"
165 |               color="primary"
166 |               disabled={isLoading}
167 |             >
168 |               {isLoading ? 'Processing...' : 'Submit Channel'}
169 |             </Button>
170 |           </Box>
171 |         </form>
172 |         <PendingChannels />
173 |       </TabPanel>
174 |     </Paper>
175 |   );
176 | };
177 | 


--------------------------------------------------------------------------------
/frontend/src/pages/VideoList.tsx:
--------------------------------------------------------------------------------
  1 | import { useEffect, useState } from 'react';
  2 | import { 
  3 |   Table, 
  4 |   TableBody, 
  5 |   TableCell, 
  6 |   TableContainer, 
  7 |   TableHead, 
  8 |   TableRow, 
  9 |   Paper,
 10 |   TablePagination,
 11 |   Typography,
 12 |   Chip,
 13 |   Link,
 14 |   TableSortLabel,
 15 |   Box,
 16 |   IconButton,
 17 |   Tooltip,
 18 |   CircularProgress,
 19 |   FormControl,
 20 |   InputLabel,
 21 |   Select,
 22 |   MenuItem,
 23 |   TextField,
 24 |   Stack
 25 | } from '@mui/material';
 26 | import { Link as RouterLink } from 'react-router-dom';
 27 | import ContentCopyIcon from '@mui/icons-material/ContentCopy';
 28 | 
 29 | interface Video {
 30 |   id: number;
 31 |   url: string;
 32 |   creator: string;
 33 |   status: string;
 34 |   created_at: string;
 35 |   sponsor?: {
 36 |     status: string;
 37 |     is_sponsored: boolean;
 38 |     brands_mentioned: Array<{
 39 |       name: string;
 40 |       context: string;
 41 |     }>;
 42 |   };
 43 |   video_metadata?: {
 44 |     metadata_json?: {
 45 |       title?: string;
 46 |       views?: number;
 47 |       likes?: number;
 48 |       date_posted?: string;
 49 |       youtuber?: string
 50 |     };
 51 |     creator?: string
 52 |   };
 53 | }
 54 | 
 55 | interface VideoListResponse {
 56 |   items: Video[];
 57 |   total: number;
 58 |   page: number;
 59 |   size: number;
 60 | }
 61 | 
 62 | type Order = 'asc' | 'desc';
 63 | 
 64 | interface HeadCell {
 65 |   id: keyof Video | 'views' | 'date_posted' | 'title' | 'sponsor' | 'sponsor_names' | 'creator';
 66 |   label: string;
 67 |   numeric: boolean;
 68 |   sortable: boolean;
 69 | }
 70 | 
 71 | const headCells: HeadCell[] = [
 72 |   { id: 'id', label: 'ID', numeric: true, sortable: true },
 73 |   { id: 'url', label: 'URL', numeric: false, sortable: false },
 74 |   { id: 'title', label: 'Title', numeric: false, sortable: true },
 75 |   { id: 'views', label: 'Views', numeric: true, sortable: true },
 76 |   { id: 'date_posted', label: 'Date Posted', numeric: false, sortable: true },
 77 |   { id: 'status', label: 'Status', numeric: false, sortable: false },
 78 |   { id: 'sponsor', label: 'Sponsor Status', numeric: false, sortable: false },
 79 |   { id: 'sponsor_names', label: 'Sponsors', numeric: false, sortable: false },
 80 |   { id: 'creator', label: 'Creator', numeric: false, sortable: true },
 81 | ];
 82 | 
 83 | function useDebounce<T>(value: T, delay: number): [T] {
 84 |   const [debouncedValue, setDebouncedValue] = useState<T>(value);
 85 | 
 86 |   useEffect(() => {
 87 |     const handler = setTimeout(() => {
 88 |       setDebouncedValue(value);
 89 |     }, delay);
 90 | 
 91 |     return () => {
 92 |       clearTimeout(handler);
 93 |     };
 94 |   }, [value, delay]);
 95 | 
 96 |   return [debouncedValue];
 97 | }
 98 | 
 99 | export const VideoList = () => {
100 |   const [videos, setVideos] = useState<Video[]>([]);
101 |   const [page, setPage] = useState(0);
102 |   const [rowsPerPage, setRowsPerPage] = useState(10);
103 |   const [total, setTotal] = useState(0);
104 |   const [isLoading, setIsLoading] = useState(true);
105 |   const [error, setError] = useState('');
106 |   const [orderBy, setOrderBy] = useState<string>('id');
107 |   const [order, setOrder] = useState<Order>('desc');
108 |   const [sponsorFilter, setSponsorFilter] = useState<string>('');
109 |   const [sponsorSearch, setSponsorSearch] = useState<string>('');
110 |   const [creatorFilter, setCreatorFilter] = useState<string>('');
111 |   const [debouncedSponsorSearch] = useDebounce(sponsorSearch, 500);
112 |   const [debouncedCreatorFilter] = useDebounce(creatorFilter, 500);
113 | 
114 |   const fetchVideos = async () => {
115 |     try {
116 |       setIsLoading(true);
117 |       const queryParams = new URLSearchParams({
118 |         page: (page + 1).toString(),
119 |         size: rowsPerPage.toString(),
120 |         sort: orderBy,
121 |         order: order
122 |       });
123 | 
124 |       if (sponsorFilter) {
125 |         queryParams.append('sponsor_filter', sponsorFilter);
126 |       }
127 | 
128 |       if (debouncedSponsorSearch) {
129 |         queryParams.append('sponsor_name', debouncedSponsorSearch);
130 |       }
131 | 
132 |       if (debouncedCreatorFilter) {
133 |         queryParams.append('creator', debouncedCreatorFilter);
134 |       }
135 | 
136 |       const response = await fetch(`/api/videos/?${queryParams}`);
137 |       if (!response.ok) throw new Error('Failed to fetch videos');
138 |       
139 |       const data: VideoListResponse = await response.json();
140 |       setVideos(data.items);
141 |       setTotal(data.total);
142 |     } catch (err) {
143 |       setError('Failed to load videos');
144 |       console.error(err);
145 |     } finally {
146 |       setIsLoading(false);
147 |     }
148 |   };
149 | 
150 |   useEffect(() => {
151 |     fetchVideos();
152 |   }, [page, rowsPerPage, orderBy, order, debouncedSponsorSearch, sponsorFilter, debouncedCreatorFilter]);
153 | 
154 |   const handleChangePage = (_: unknown, newPage: number) => {
155 |     setPage(newPage);
156 |   };
157 | 
158 |   const handleChangeRowsPerPage = (event: React.ChangeEvent<HTMLInputElement>) => {
159 |     setRowsPerPage(parseInt(event.target.value, 10));
160 |     setPage(0);
161 |   };
162 | 
163 |   const handleRequestSort = (property: string) => {
164 |     const isAsc = orderBy === property && order === 'asc';
165 |     setOrder(isAsc ? 'desc' : 'asc');
166 |     setOrderBy(property);
167 |   };
168 | 
169 |   const getStatusColor = (status: string) => {
170 |     switch (status.toLowerCase()) {
171 |       case 'completed':
172 |         return 'success';
173 |       case 'in_progress':
174 |         return 'warning';
175 |       case 'failed':
176 |         return 'error';
177 |       default:
178 |         return 'default';
179 |     }
180 |   };
181 | 
182 |   const handleCopyUrl = (url: string) => {
183 |     navigator.clipboard.writeText(url);
184 |   };
185 | 
186 |   return (
187 |     <Box sx={{ width: '100%' }}>
188 |       <Paper elevation={3} sx={{ p: 3, mb: 3 }}>
189 |         <Stack spacing={3}>
190 |           <Typography variant="h6" gutterBottom>
191 |             Filters
192 |           </Typography>
193 |           <Box sx={{ display: 'flex', gap: 2, flexWrap: 'wrap' }}>
194 |             <FormControl sx={{ minWidth: 200 }}>
195 |               <InputLabel>Sponsor Status</InputLabel>
196 |               <Select
197 |                 value={sponsorFilter}
198 |                 label="Sponsor Status"
199 |                 onChange={(e) => setSponsorFilter(e.target.value)}
200 |               >
201 |                 <MenuItem value="">All</MenuItem>
202 |                 <MenuItem value="sponsored">Sponsored</MenuItem>
203 |                 <MenuItem value="not_sponsored">Not Sponsored</MenuItem>
204 |               </Select>
205 |             </FormControl>
206 |             <TextField
207 |               label="Search by Sponsor"
208 |               value={sponsorSearch}
209 |               onChange={(e) => setSponsorSearch(e.target.value)}
210 |               placeholder="Enter sponsor name..."
211 |               sx={{ minWidth: 200 }}
212 |             />
213 |             <TextField
214 |               label="Search by Creator"
215 |               value={creatorFilter}
216 |               onChange={(e) => setCreatorFilter(e.target.value)}
217 |               placeholder="Enter creator name..."
218 |               sx={{ minWidth: 200 }}
219 |             />
220 |           </Box>
221 |         </Stack>
222 |       </Paper>
223 | 
224 |       {isLoading && videos.length === 0 ? (
225 |         <Box display="flex" justifyContent="center" p={4}>
226 |           <CircularProgress />
227 |         </Box>
228 |       ) : error ? (
229 |         <Typography color="error" align="center" sx={{ p: 4 }}>{error}</Typography>
230 |       ) : (
231 |         <Paper elevation={3}>
232 |           <TableContainer>
233 |             <Table sx={{ minWidth: 750 }} size="medium">
234 |               <TableHead>
235 |                 <TableRow>
236 |                   {headCells.map((headCell) => (
237 |                     <TableCell
238 |                       key={headCell.id}
239 |                       align={headCell.numeric ? 'right' : 'left'}
240 |                       sortDirection={orderBy === headCell.id ? order : false}
241 |                       sx={{ 
242 |                         whiteSpace: 'nowrap',
243 |                         fontWeight: 'bold'
244 |                       }}
245 |                     >
246 |                       {headCell.sortable ? (
247 |                         <TableSortLabel
248 |                           active={orderBy === headCell.id}
249 |                           direction={orderBy === headCell.id ? order : 'asc'}
250 |                           onClick={() => handleRequestSort(headCell.id)}
251 |                         >
252 |                           {headCell.label}
253 |                         </TableSortLabel>
254 |                       ) : (
255 |                         headCell.label
256 |                       )}
257 |                     </TableCell>
258 |                   ))}
259 |                   <TableCell>Actions</TableCell>
260 |                 </TableRow>
261 |               </TableHead>
262 |               <TableBody>
263 |                 {videos.length === 0 ? (
264 |                   <TableRow>
265 |                     <TableCell colSpan={headCells.length + 1} align="center">
266 |                       <Typography sx={{ py: 2 }}>No videos found</Typography>
267 |                     </TableCell>
268 |                   </TableRow>
269 |                 ) : (
270 |                   videos.map((video) => (
271 |                     <TableRow
272 |                       key={video.id}
273 |                       hover
274 |                       sx={{ '&:last-child td, &:last-child th': { border: 0 } }}
275 |                     >
276 |                       <TableCell align="right">{video.id}</TableCell>
277 |                       <TableCell>
278 |                         <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
279 |                           <Typography
280 |                             sx={{
281 |                               maxWidth: '300px',
282 |                               overflow: 'hidden',
283 |                               textOverflow: 'ellipsis',
284 |                               whiteSpace: 'nowrap'
285 |                             }}
286 |                           >
287 |                             {video.url}
288 |                           </Typography>
289 |                           <Tooltip title="Copy URL">
290 |                             <IconButton
291 |                               size="small"
292 |                               onClick={() => handleCopyUrl(video.url)}
293 |                             >
294 |                               <ContentCopyIcon fontSize="small" />
295 |                             </IconButton>
296 |                           </Tooltip>
297 |                         </Box>
298 |                       </TableCell>
299 |                       <TableCell>
300 |                         <Typography
301 |                           sx={{
302 |                             maxWidth: '300px',
303 |                             overflow: 'hidden',
304 |                             textOverflow: 'ellipsis',
305 |                             whiteSpace: 'nowrap'
306 |                           }}
307 |                         >
308 |                           {video.video_metadata?.metadata_json?.title || '-'}
309 |                         </Typography>
310 |                       </TableCell>
311 |                       <TableCell align="right">
312 |                         {video.video_metadata?.metadata_json?.views?.toLocaleString() || '-'}
313 |                       </TableCell>
314 |                       <TableCell>
315 |                         {video.video_metadata?.metadata_json?.date_posted
316 |                           ? new Date(video.video_metadata.metadata_json.date_posted).toLocaleDateString()
317 |                           : '-'
318 |                         }
319 |                       </TableCell>
320 |                       <TableCell>
321 |                         <Chip 
322 |                           label={video.status.toUpperCase()} 
323 |                           color={getStatusColor(video.status) as any}
324 |                           size="small"
325 |                         />
326 |                       </TableCell>
327 |                       <TableCell>
328 |                         {video.sponsor?.status === 'completed' ? (
329 |                           <Tooltip title={video.sponsor.is_sponsored ? video.sponsor.brands_mentioned.map(b => b.name).join(', ') : ''}>
330 |                             <Chip 
331 |                               label={video.sponsor.is_sponsored ? 'Sponsored' : 'Not Sponsored'}
332 |                               color={video.sponsor.is_sponsored ? 'primary' : 'default'}
333 |                               size="small"
334 |                             />
335 |                           </Tooltip>
336 |                         ) : (
337 |                           <Chip 
338 |                             label={video.sponsor?.status || 'pending'}
339 |                             color={
340 |                               video.sponsor?.status === 'failed' ? 'error' :
341 |                               video.sponsor?.status === 'in_progress' ? 'warning' : 'default'
342 |                             }
343 |                             size="small"
344 |                           />
345 |                         )}
346 |                       </TableCell>
347 |                       <TableCell>
348 |                         {video.sponsor?.status === 'completed' ? (
349 |                           video.sponsor.is_sponsored ? (
350 |                             <Typography variant="body2">
351 |                               {video.sponsor.brands_mentioned.map(b => b.name).join(', ')}
352 |                             </Typography>
353 |                           ) : null
354 |                         ) : video.sponsor?.status === 'in_progress' ? (
355 |                           <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
356 |                             <CircularProgress size={16} />
357 |                             <Typography variant="body2" color="text.secondary">
358 |                               Loading...
359 |                             </Typography>
360 |                           </Box>
361 |                         ) : null}
362 |                       </TableCell>
363 |                       <TableCell>
364 |                         <Typography variant="body2">
365 |                           {video.video_metadata?.metadata_json?.youtuber || '-'}
366 |                         </Typography>
367 |                       </TableCell>
368 |                       <TableCell>
369 |                         <Link 
370 |                           component={RouterLink} 
371 |                           to={`/video/${video.id}`}
372 |                           color="primary"
373 |                         >
374 |                           View Details
375 |                         </Link>
376 |                       </TableCell>
377 |                     </TableRow>
378 |                   ))
379 |                 )}
380 |               </TableBody>
381 |             </Table>
382 |           </TableContainer>
383 |           <Box sx={{ display: 'flex', justifyContent: 'flex-end', p: 2 }}>
384 |             <TablePagination
385 |               component="div"
386 |               count={total}
387 |               page={page}
388 |               onPageChange={handleChangePage}
389 |               rowsPerPage={rowsPerPage}
390 |               onRowsPerPageChange={handleChangeRowsPerPage}
391 |               rowsPerPageOptions={[5, 10, 25, 50]}
392 |             />
393 |           </Box>
394 |         </Paper>
395 |       )}
396 |     </Box>
397 |   );
398 | };
399 | 


--------------------------------------------------------------------------------
/frontend/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="vite/client" />
2 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
 4 |     "target": "ES2020",
 5 |     "useDefineForClassFields": true,
 6 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 7 |     "module": "ESNext",
 8 |     "skipLibCheck": true,
 9 | 
10 |     /* Bundler mode */
11 |     "moduleResolution": "bundler",
12 |     "allowImportingTsExtensions": true,
13 |     "isolatedModules": true,
14 |     "moduleDetection": "force",
15 |     "noEmit": true,
16 |     "jsx": "react-jsx",
17 | 
18 |     /* Linting */
19 |     "strict": true,
20 |     "noUnusedLocals": true,
21 |     "noUnusedParameters": true,
22 |     "noFallthroughCasesInSwitch": true,
23 |     "noUncheckedSideEffectImports": true
24 |   },
25 |   "include": ["src"]
26 | }
27 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "files": [],
3 |   "references": [
4 |     { "path": "./tsconfig.app.json" },
5 |     { "path": "./tsconfig.node.json" }
6 |   ]
7 | }
8 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
 4 |     "target": "ES2022",
 5 |     "lib": ["ES2023"],
 6 |     "module": "ESNext",
 7 |     "skipLibCheck": true,
 8 | 
 9 |     /* Bundler mode */
10 |     "moduleResolution": "bundler",
11 |     "allowImportingTsExtensions": true,
12 |     "isolatedModules": true,
13 |     "moduleDetection": "force",
14 |     "noEmit": true,
15 | 
16 |     /* Linting */
17 |     "strict": true,
18 |     "noUnusedLocals": true,
19 |     "noUnusedParameters": true,
20 |     "noFallthroughCasesInSwitch": true,
21 |     "noUncheckedSideEffectImports": true
22 |   },
23 |   "include": ["vite.config.ts"]
24 | }
25 | 


--------------------------------------------------------------------------------
/frontend/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from 'vite'
 2 | import react from '@vitejs/plugin-react'
 3 | 
 4 | // https://vitejs.dev/config/
 5 | export default defineConfig({
 6 |   plugins: [react()],
 7 |   server: {
 8 |     proxy: {
 9 |       '/api': {
10 |         target: 'http://localhost:8000',
11 |         changeOrigin: true,
12 |         rewrite: (path) => path.replace(/^\/api/, ''),
13 |       },
14 |     },
15 |   },
16 | })
17 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | fastapi==0.104.1
 2 | uvicorn==0.24.0
 3 | sqlalchemy==2.0.23
 4 | pydantic==2.5.2
 5 | python-dotenv==1.0.0
 6 | alembic==1.12.1
 7 | psycopg2-binary==2.9.9
 8 | httpx==0.25.2
 9 | ollama
10 | openai


--------------------------------------------------------------------------------
/sql_app.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techwithtim/AI-Sponsor-Detection/dad27c7350bf2561a3894cb55d24a920a83201cf/sql_app.db


--------------------------------------------------------------------------------