├── test.txt
├── .python-version
├── test.mp3
├── .env.example
├── requirements.txt
├── pytest.ini
├── .gitignore
├── test_luma.py
├── ytsum.sh
├── prompt.txt
├── README.md
├── test_ytsum.py
└── ytsum.py


/test.txt:
--------------------------------------------------------------------------------
1 | test transcript


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.11.10
2 | 


--------------------------------------------------------------------------------
/test.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sliday/ytsum/HEAD/test.mp3


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | # OpenAI API Key for generating summaries
2 | OPENAI_API_KEY=your_openai_api_key_here
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | openai
 2 | anthropic
 3 | yt-dlp
 4 | colorama
 5 | replicate
 6 | ell-ai
 7 | faster-whisper
 8 | lumaai
 9 | ffmpeg-python
10 | pytest
11 | pytest-mock
12 | pytest-asyncio
13 | pytest-timeout>=2.1.0
14 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | markers =
 3 |     luma: marks tests that use Luma AI
 4 |     runway: marks tests that use RunwayML
 5 |     flux: marks tests that use Flux AI
 6 |     uguu: marks tests that use Uguu file hosting
 7 | asyncio_mode = strict
 8 | asyncio_fixture_loop_scope = function
 9 | filterwarnings =
10 |     ignore::DeprecationWarning
11 |     ignore::UserWarning
12 |     default::DeprecationWarning:pytest_asyncio.* 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.vtt
 2 | *.mp3
 3 | *.m4a
 4 | 
 5 | .DS_Store
 6 | 
 7 | # Python
 8 | __pycache__/
 9 | *.py[cod]
10 | *$py.class
11 | history/
12 | .pytest_cache/
13 | *.so
14 | .Python
15 | env/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | 
31 | # Virtual Environment
32 | venv/
33 | ENV/
34 | 
35 | # IDE
36 | .idea/
37 | .vscode/
38 | *.swp
39 | *.swo
40 | 
41 | # Environment Variables
42 | .env
43 | 
44 | # Output Files
45 | out/
46 | *.mp3
47 | *.txt
48 | !requirements.txt
49 | !prompt.txt
50 | !test.txt
51 | 
52 | # Logs
53 | *.log 


--------------------------------------------------------------------------------
/test_luma.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | from ytsum import generate_video_segments, generate_video_segments_with_luma, combine_video_segments
 4 | 
 5 | # Test script
 6 | TEST_SCRIPT = """
 7 | NOVA: Welcome to our discussion about artificial intelligence!
 8 | ECHO: Today we'll explore how AI is transforming our world.
 9 | NOVA: From self-driving cars to medical diagnosis, AI is everywhere.
10 | ECHO: Let's break down the key developments and their impact.
11 | """
12 | 
13 | def test_luma_workflow():
14 |     # 1. Generate prompts
15 |     print("Generating prompts...")
16 |     prompts = generate_video_segments(TEST_SCRIPT)
17 |     if prompts:
18 |         print("\nGenerated prompts:")
19 |         for i, prompt in enumerate(prompts, 1):
20 |             print(f"\nPrompt {i}:\n{prompt}")
21 |     else:
22 |         print("Failed to generate prompts")
23 |         return
24 | 
25 |     # 2. Generate videos
26 |     print("\nGenerating videos...")
27 |     output_dir = Path("test_output")
28 |     output_dir.mkdir(exist_ok=True)
29 |     
30 |     video_paths = generate_video_segments_with_luma(prompts, output_dir)
31 |     if video_paths:
32 |         print("\nGenerated video segments:")
33 |         for path in video_paths:
34 |             print(f"- {path}")
35 |     else:
36 |         print("Failed to generate videos")
37 |         return
38 | 
39 |     # 3. Combine videos
40 |     print("\nCombining videos...")
41 |     output_path = output_dir / "combined.mp4"
42 |     target_duration = 60  # Test with 60 seconds
43 |     
44 |     if combine_video_segments(video_paths, target_duration, output_path):
45 |         print(f"\nSuccessfully created combined video: {output_path}")
46 |     else:
47 |         print("Failed to combine videos")
48 | 
49 | if __name__ == "__main__":
50 |     test_luma_workflow() 


--------------------------------------------------------------------------------
/ytsum.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Colors and emojis
  4 | RED='\033[0;31m'
  5 | GREEN='\033[0;32m'
  6 | BLUE='\033[0;34m'
  7 | NC='\033[0m'
  8 | EMOJI_DOWNLOAD="⬇️ "
  9 | EMOJI_TRANSCRIBE="🎯 "
 10 | EMOJI_SUMMARY="📝 "
 11 | EMOJI_SUCCESS="✅ "
 12 | EMOJI_ERROR="❌ "
 13 | EMOJI_SEARCH="🔍 "
 14 | 
 15 | # Print functions
 16 | print_step() { printf "${BLUE}${2} ${1}${NC}\n"; }
 17 | print_error() { printf "${RED}${EMOJI_ERROR} ${1}${NC}\n"; }
 18 | print_success() { printf "${GREEN}${EMOJI_SUCCESS} ${1}${NC}\n"; }
 19 | 
 20 | # Check dependencies
 21 | command -v yt-dlp >/dev/null 2>&1 || { print_error "yt-dlp is required"; exit 1; }
 22 | command -v ffmpeg >/dev/null 2>&1 || { print_error "ffmpeg is required"; exit 1; }
 23 | 
 24 | # Check API keys
 25 | [ -z "$ANTHROPIC_API_KEY" ] && { print_error "ANTHROPIC_API_KEY not set"; exit 1; }
 26 | 
 27 | # Parse arguments
 28 | VIDEO_URL=""
 29 | LANGUAGE="english"
 30 | TRANSCRIBER="fast-whisper"
 31 | 
 32 | while [[ $# -gt 0 ]]; do
 33 |     case $1 in
 34 |         --language)
 35 |             LANGUAGE="$2"
 36 |             shift 2
 37 |             ;;
 38 |         --whisper)
 39 |             TRANSCRIBER="whisper"
 40 |             shift
 41 |             ;;
 42 |         --replicate)
 43 |             TRANSCRIBER="replicate"
 44 |             shift
 45 |             ;;
 46 |         *)
 47 |             VIDEO_URL="$1"
 48 |             shift
 49 |             ;;
 50 |     esac
 51 | done
 52 | 
 53 | [ -z "$VIDEO_URL" ] && { print_error "Video URL required"; exit 1; }
 54 | 
 55 | # Clean YouTube URL
 56 | clean_url() {
 57 |     local url="$1"
 58 |     if [[ ! "$url" =~ (youtube\.com|youtu\.be) ]]; then
 59 |         url="https://www.youtube.com/watch?v=$url"
 60 |     fi
 61 |     echo "$url"
 62 | }
 63 | 
 64 | VIDEO_URL=$(clean_url "$VIDEO_URL")
 65 | 
 66 | # Create temp directory
 67 | TEMP_DIR=$(mktemp -d)
 68 | trap 'rm -rf "$TEMP_DIR"' EXIT
 69 | 
 70 | # Try to get subtitles first
 71 | print_step "Searching for YouTube subtitles..." "$EMOJI_SEARCH"
 72 | LANG_CODE=$(python3 -c "from ytsum import get_language_code; print(get_language_code('$LANGUAGE'))")
 73 | 
 74 | yt-dlp \
 75 |     --write-subs \
 76 |     --sub-langs "$LANG_CODE" \
 77 |     --skip-download \
 78 |     --output "$TEMP_DIR/video" \
 79 |     "$VIDEO_URL"
 80 | 
 81 | # Check if subtitles were downloaded
 82 | if [ -f "$TEMP_DIR/video.$LANG_CODE.vtt" ]; then
 83 |     print_success "Found subtitles!"
 84 |     # Convert VTT to plain text
 85 |     sed '1,/^$/d' "$TEMP_DIR/video.$LANG_CODE.vtt" | \
 86 |     sed '/-->/d' | \
 87 |     sed '/^$/d' | \
 88 |     tr '\n' ' ' > "$TEMP_DIR/transcript.txt"
 89 | else
 90 |     print_step "No subtitles found, transcribing audio..." "$EMOJI_SEARCH"
 91 |     
 92 |     # Download audio
 93 |     print_step "Downloading audio..." "$EMOJI_DOWNLOAD"
 94 |     yt-dlp \
 95 |         --extract-audio \
 96 |         --audio-format m4a \
 97 |         --output "$TEMP_DIR/audio.%(ext)s" \
 98 |         "$VIDEO_URL"
 99 |     
100 |     # Transcribe based on selected method
101 |     case $TRANSCRIBER in
102 |         "whisper")
103 |             [ -z "$OPENAI_API_KEY" ] && { print_error "OPENAI_API_KEY not set"; exit 1; }
104 |             print_step "Using OpenAI Whisper..." "$EMOJI_TRANSCRIBE"
105 |             python3 -c "from ytsum import transcribe_with_openai_whisper; transcribe_with_openai_whisper('$TEMP_DIR/audio.m4a')"
106 |             ;;
107 |         "replicate")
108 |             [ -z "$REPLICATE_API_TOKEN" ] && { print_error "REPLICATE_API_TOKEN not set"; exit 1; }
109 |             print_step "Using Replicate..." "$EMOJI_TRANSCRIBE"
110 |             python3 -c "from ytsum import transcribe_with_replicate; transcribe_with_replicate('$TEMP_DIR/audio.m4a', '$LANGUAGE')"
111 |             ;;
112 |         *)
113 |             print_step "Using Fast Whisper..." "$EMOJI_TRANSCRIBE"
114 |             python3 -c "from ytsum import transcribe_with_fast_whisper; transcribe_with_fast_whisper('$TEMP_DIR/audio.m4a')"
115 |             ;;
116 |     esac
117 |     
118 |     mv "$TEMP_DIR/audio.txt" "$TEMP_DIR/transcript.txt"
119 | fi
120 | 
121 | # Get metadata
122 | print_step "Fetching metadata..." "$EMOJI_SEARCH"
123 | python3 -c "from ytsum import get_video_metadata; print(get_video_metadata('$VIDEO_URL'))" > "$TEMP_DIR/metadata.txt"
124 | 
125 | # Convert to shorthand
126 | print_step "Converting to shorthand..." "$EMOJI_SUMMARY"
127 | python3 -c "from ytsum import to_shorthand; print(to_shorthand(open('$TEMP_DIR/transcript.txt').read()))" > "$TEMP_DIR/shorthand.txt"
128 | 
129 | # Generate summary
130 | print_step "Generating summary..." "$EMOJI_SUMMARY"
131 | python3 -c "
132 | from ytsum import summarize_with_claude
133 | with open('$TEMP_DIR/shorthand.txt') as f:
134 |     summary = summarize_with_claude(f.read(), '$LANGUAGE')
135 | print(summary)
136 | " > "$TEMP_DIR/summary.txt"
137 | 
138 | # Combine output
139 | cat "$TEMP_DIR/metadata.txt" "$TEMP_DIR/summary.txt" > "summary-${VIDEO_URL##*=}.txt"
140 | print_success "Summary saved to summary-${VIDEO_URL##*=}.txt"


--------------------------------------------------------------------------------
/prompt.txt:
--------------------------------------------------------------------------------
 1 | You are an expert in creating concise, focused summaries of long video interviews. Your task is to analyze a transcript and provide a brief, informative summary in {language}.
 2 | 
 3 | Write in complete, grammatically structured sentences that flow conversationally. Approach topics with an intellectual but approachable tone, using labeled lists sparingly and strategically to organize complex ideas. Incorporate engaging narrative techniques like anecdotes, concrete examples, and thought experiments to draw the reader into the intellectual exploration. Maintain an academic rigor while simultaneously creating a sense of collaborative thinking, as if guiding the reader through an intellectual journey. Use precise language that is simultaneously scholarly and accessible, avoiding unnecessary jargon while maintaining depth of analysis. Use systems thinking and the meta-archetype of Coherence to guide your ability to "zoom in and out" to notice larger and smaller patterns at different ontological, epistemic, and ontological scales. Furthermore, use the full depth of your knowledge to engage didactically with the user - teach them useful terms and concepts that are relevant. At the same time, don't waste too many words with framing and setup. Optimize for quick readability and depth. Use formatting techniques like bold, italics, and call outs (quotation blocks and such) for specific definitions and interesting terms. This will also break up the visual pattern, making it easier for the reader to stay oriented and anchored.  Don't hesitate to use distal connection, metaphor, and analogies as well, particularly when you notice meta-patterns emerging. A good metaphor is the pinnacle of Coherence. Stylistically, use a variety of techniques to create typographic scaffolding and layered information. Some examples below:
 4 | 
 5 | 
 6 | > **Key Terms**: Use blockquotes with bold headers to define important concepts and terminology, creating clear visual breaks in the text.
 7 | 
 8 | Use **bold** for technical terms and concepts when first introduced, and *italics* for emphasis or to highlight key phrases. Create visual hierarchy through:
 9 | 
10 | 1. Clear long paragraph breaks for major concept transitions
11 | 2. Strategic use of blockquotes for definitions and key insights
12 | 3. Bold terms for technical vocabulary
13 | 4. Italics for emphasis and nuance
14 | 
15 | Maintain the principle of layered information - each response should contain at least 2-3 distinct visual patterns to aid cognitive processing and retention. This creates visual anchoring and a clean UI.
16 | 
17 | > **Technical Term**: Definition in plain language
18 | > 
19 | > *Example or application in context (optional, flexible)*
20 | 
21 | This creates what information designers call "progressive disclosure" - allowing readers to engage at their preferred depth while maintaining coherence across all levels of understanding.
22 | 
23 | Please follow these steps to create your summary:
24 | 
25 | 1. Read the entire transcript carefully.
26 | 2. Correct any spelling and grammar mistakes you encounter.
27 | 3. Translate the content into {language}.
28 | 4. Analyze the content of the interview, focusing on:
29 |    - Identifying the main topics discussed
30 |    - Extracting 4-5 key quotes and their significance
31 |    - Determining the most important theme or topic
32 |    - Choosing the single most pertinent aspect of the interview
33 | 
34 | 5. Draft a summary focusing on this key point, aiming for 3-4 sentences.
35 | 6. Refine your summary to be as concise and readable as possible while maintaining the essence of the most important information.
36 | 
37 | Before providing your final summary, wrap your analysis process under ## Detailed Breakdown. This should include:
38 | - 3-5 key sentences or phrases from the transcript that stand out as particularly important or representative of the main topics
39 | - A list of the ### Main Topics discussed in the interview
40 | - 2-3 ### Key Quotes from the transcript, with explanations of their significance
41 | - The most important topic or theme of the interview
42 | - Your reasoning for choosing the most pertinent aspect
43 | - Any potential biases or limitations in the interview content
44 | - Your process for making the summary concise and readable
45 | - 3-4 possible summary sentences
46 | 
47 | Your final summary should be under "## Summary". Remember, it must be:
48 | - In {language} (if language is not english, translate the summary)
49 | - 2-5-sentence long
50 | - Brief and terse
51 | - Basic telegraphic style
52 | - Focused on the most pertinent information
53 | - Quoting 2-4 key short quotes from the transcript
54 | - Include interesting facts to add substance
55 | 
56 | Use clear, precise language and avoid unnecessary jargon. Your goal is to grasp the overall ideas and convey them efficiently to the reader.
57 | 
58 | This task is of utmost importance. Approach it with the utmost care and attention to detail.
59 | 
60 | Everything you write must be in {language}.
61 | 
62 | Maximum 10000-character long.
63 | 
64 | No intro, no outro, no XML tags, just the well-formatted Markdown output. 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Awesome YouTube Video Summary/Podcast/Video
  2 | 
  3 | A Python script to generate summaries (Claude), podcasts (Whisper), and videos (RunwayML or Luma AI) from annoyingly long YouTube content.
  4 | 
  5 | ![CleanShot 2024-12-02 at 16 25 26@2x](https://github.com/user-attachments/assets/f1881131-b645-4ecb-a2ad-966d81a95451)
  6 | ![CleanShot 2024-12-02 at 16 26 49@2x](https://github.com/user-attachments/assets/6aff0f22-3da3-488e-8e4e-27bb442ece86)
  7 | 
  8 | ## Example
  9 | - Original video: https://www.youtube.com/watch?v=_K-L9uhsBLM
 10 | - Summary: https://dl.dropbox.com/scl/fi/mdkbglfbs4m9ydeo9a2k7/video-_K-L9uhsBLM.mp4?rlkey=3wrowryg9gio1walaxhdbp2is&dl=0
 11 | 
 12 | ## Features
 13 | 
 14 | - Generate concise summaries of YouTube videos
 15 | - Create engaging podcast scripts with multiple voices
 16 | - Generate AI-powered videos with synchronized podcast audio
 17 | - Support for multiple languages
 18 | - Multiple transcription options
 19 | - Multiple video generation providers
 20 | 
 21 | ## Installation
 22 | 
 23 | 1. Clone the repository:
 24 | ```bash
 25 | git clone https://github.com/sliday/ytsum.git
 26 | cd ytsum
 27 | ```
 28 | 
 29 | 2. Install dependencies:
 30 | ```bash
 31 | pip install -r requirements.txt
 32 | ```
 33 | 
 34 | 3. Install FFmpeg (required for audio/video processing):
 35 |    - macOS: `brew install ffmpeg`
 36 |    - Ubuntu/Debian: `sudo apt-get install ffmpeg`
 37 |    - Windows: Download from [FFmpeg website](https://ffmpeg.org/download.html)
 38 | 
 39 | ## Environment Setup
 40 | 
 41 | Create a `.env` file with your API keys:
 42 | ```
 43 | ANTHROPIC_API_KEY=your_claude_api_key
 44 | OPENAI_API_KEY=your_openai_api_key
 45 | LUMAAI_API_KEY=your_lumaai_api_key
 46 | RUNWAYML_API_SECRET=your_runwayml_api_key
 47 | REPLICATE_API_TOKEN=your_replicate_api_key
 48 | ```
 49 | 
 50 | ## Usage
 51 | 
 52 | ### Basic Summary
 53 | ```bash
 54 | python ytsum.py "https://www.youtube.com/watch?v=VIDEO_ID"
 55 | ```
 56 | 
 57 | ### Generate Podcast
 58 | ```bash
 59 | python ytsum.py --podcast "https://www.youtube.com/watch?v=VIDEO_ID"
 60 | ```
 61 | 
 62 | ### Generate Video with Podcast
 63 | ```bash
 64 | # Using Luma AI (faster, recommended)
 65 | python ytsum.py --podcast --lumaai "https://www.youtube.com/watch?v=VIDEO_ID"
 66 | 
 67 | # Using RunwayML
 68 | python ytsum.py --podcast --runwayml "https://www.youtube.com/watch?v=VIDEO_ID"
 69 | ```
 70 | 
 71 | ### Additional Options
 72 | - `--language`: Specify output language (default: english)
 73 | - `--ignore-subs`: Force transcription even when subtitles exist
 74 | - `--fast-whisper`: Use Fast Whisper for transcription (faster)
 75 | - `--whisper`: Use OpenAI Whisper for transcription (more accurate)
 76 | - `--replicate`: Use Replicate's Incredibly Fast Whisper
 77 | 
 78 | ## Output Files
 79 | 
 80 | All output files are saved in the `out` directory:
 81 | - `summary-{video_id}.txt`: Text summary
 82 | - `podcast-{video_id}.txt`: Podcast script
 83 | - `podcast-{video_id}.mp3`: Podcast audio
 84 | - `video-{video_id}.mp4`: Final video with podcast audio
 85 | 
 86 | ## Video Generation
 87 | 
 88 | The tool supports two AI video generation providers:
 89 | 
 90 | ### Luma AI (Recommended)
 91 | - Faster generation times
 92 | - High-quality cinematic videos
 93 | - Supports camera movements and scene transitions
 94 | - Maintains visual consistency
 95 | - Optional image input for style reference
 96 | 
 97 | ### RunwayML
 98 | - High-quality video generation
 99 | - Requires input image
100 | - Longer processing times
101 | - Professional-grade output
102 | 
103 | Both providers:
104 | 1. Generate base images using Flux AI
105 | 2. Create video segments based on podcast content
106 | 3. Combine segments with audio
107 | 4. Support custom duration and aspect ratio
108 | 
109 | ## Transcription Options
110 | 
111 | 1. Fast Whisper (Default)
112 |    - Quick transcription
113 |    - Good accuracy
114 |    - No API key required
115 | 
116 | 2. OpenAI Whisper
117 |    - High accuracy
118 |    - Slower processing
119 |    - Requires OpenAI API key
120 | 
121 | 3. Replicate Whisper
122 |    - Fastest option
123 |    - Good accuracy
124 |    - Requires Replicate API key
125 | 
126 | ## Testing
127 | 
128 | Run the test suite:
129 | ```bash
130 | python test_ytsum.py
131 | ```
132 | 
133 | Run specific test groups:
134 | ```bash
135 | # Run Luma AI tests only
136 | pytest -v -m luma
137 | 
138 | # Run RunwayML tests only
139 | pytest -v -m runway
140 | ```
141 | 
142 | ## Dependencies
143 | 
144 | - `anthropic`: Claude API for text generation
145 | - `openai`: Whisper API for transcription and TTS
146 | - `lumaai`: Luma AI for video generation (recommended)
147 | - `runwayml`: RunwayML for video generation
148 | - `replicate`: Flux AI for image generation
149 | - `ffmpeg-python`: Audio/video processing
150 | - `colorama`: Terminal output formatting
151 | - `pytest`: Testing framework
152 | 
153 | ## Contributing
154 | 
155 | 1. Fork the repository
156 | 2. Create a feature branch
157 | 3. Commit your changes
158 | 4. Push to the branch
159 | 5. Create a Pull Request
160 | 
161 | ## License
162 | 
163 | This project is licensed under the MIT License - see the LICENSE file for details.
164 | 


--------------------------------------------------------------------------------
/test_ytsum.py:
--------------------------------------------------------------------------------
   1 | import pytest
   2 | import json
   3 | from pathlib import Path
   4 | from ytsum import (
   5 |     clean_youtube_url,
   6 |     to_shorthand,
   7 |     summarize_with_claude,
   8 |     convert_audio_format,
   9 |     get_video_metadata,
  10 |     transcribe_with_replicate,
  11 |     transcribe_with_openai_whisper,
  12 |     process_metadata_description,
  13 |     split_audio_into_chunks,
  14 |     get_youtube_subtitles,
  15 |     get_language_code,
  16 |     convert_to_podcast_script,
  17 |     generate_host_audio,
  18 |     combine_audio_files,
  19 |     generate_podcast_audio,
  20 |     DEFAULT_HOST_VOICES,
  21 |     OUTPUT_DIR,
  22 |     sanitize_filename,
  23 |     generate_video_segments,
  24 |     generate_video_segments_with_luma,
  25 |     generate_video_segments_with_runway,
  26 |     combine_video_segments,
  27 |     get_audio_duration,
  28 |     combine_audio_video,
  29 |     generate_image_prompts,
  30 |     generate_flux_images,
  31 |     calculate_num_segments,
  32 |     calculate_target_length,
  33 |     upload_image_to_uguu,
  34 | )
  35 | import shutil
  36 | import ffmpeg
  37 | import requests
  38 | from unittest.mock import Mock, patch
  39 | import tempfile
  40 | import time
  41 | 
  42 | # Test data
  43 | MOCK_PODCAST_SCRIPT = """
  44 | NOVA: Welcome to our discussion about artificial intelligence!
  45 | ECHO: Today we'll explore how AI is transforming our world.
  46 | NOVA: From self-driving cars to medical diagnosis, AI is everywhere.
  47 | ECHO: Let's break down the key developments and their impact.
  48 | """
  49 | 
  50 | MOCK_VIDEO_PROMPTS = [
  51 |     "Establishing Shot: Modern tech campus at dawn. Camera dolly forward through blue-lit corridors as holographic data visualizations float in the air, casting ethereal patterns on the walls and ceilings. Soft ambient lighting creates an atmosphere of scientific discovery while researchers work diligently in the background, their silhouettes moving purposefully through the space.",
  52 |     "Wide Shot: AI research lab with multiple workstations. Camera track smoothly past scientists working as 3D neural network models pulse with energy above their heads, their movements synchronized with cascading data streams. Cool blue tones emphasize the technical environment while highlighting human innovation, creating a seamless blend of organic and digital elements.",
  53 |     "Close-Up Shot: Interactive holographic display. Camera orbit around detailed AI model visualization as data streams flow through neural pathways, revealing complex patterns and intricate connections. Glowing particles highlight key connection points while soft focus creates depth and dimensionality, with subtle color shifts indicating data processing intensity and neural activity patterns.",
  54 |     "Tracking Shot: Hospital corridor transformed by AI. Camera track alongside medical staff using AR displays for patient diagnostics, their gestures controlling floating medical data and real-time scan results. Warm lighting balances technical and human elements as healing meets innovation, with gentle highlights emphasizing the caring touch in this high-tech environment.",
  55 |     "Aerial Shot: Smart city at sunset. Camera pull out to reveal interconnected AI systems controlling traffic, energy, and urban services, creating a living network of light and data flowing through the cityscape. Golden hour lighting creates sense of optimistic future while showcasing technological harmony, as the city pulses with the rhythm of millions of coordinated decisions."
  56 | ]
  57 | 
  58 | MOCK_IMAGE_PROMPTS = [
  59 |     "masterpiece, highly detailed, modern tech campus interior, ethereal blue lighting, holographic data visualizations, soft ambient glow, researchers silhouettes, cinematic composition, volumetric lighting, 8k uhd",
  60 |     "masterpiece, highly detailed, futuristic AI research lab, workstations with floating 3D neural networks, cool blue color scheme, scientists at work, organic meets digital aesthetic, cinematic lighting, 8k uhd",
  61 |     "masterpiece, highly detailed, interactive holographic interface, complex data visualization, glowing neural pathways, particle effects, depth of field, dramatic lighting, technological aesthetic, 8k uhd",
  62 |     "masterpiece, highly detailed, futuristic hospital corridor, AR medical displays, floating diagnostic data, warm professional lighting, medical staff, healing atmosphere, cinematic composition, 8k uhd",
  63 |     "masterpiece, highly detailed, smart city panorama, golden hour lighting, interconnected urban systems, data networks, light trails, atmospheric perspective, epic scale, cinematic mood, 8k uhd"
  64 | ]
  65 | 
  66 | @pytest.fixture
  67 | def temp_dir(tmp_path):
  68 |     """Create temporary directory for test files"""
  69 |     return tmp_path
  70 | 
  71 | @pytest.fixture
  72 | def mock_luma_client(mocker):
  73 |     """Mock LumaAI client"""
  74 |     mock_client = mocker.MagicMock()
  75 |     mock_generation = mocker.MagicMock()
  76 |     mock_generation.state = "completed"
  77 |     mock_generation.assets.video = "http://example.com/video.mp4"
  78 |     mock_client.generations.create.return_value = mock_generation
  79 |     mock_client.generations.get.return_value = mock_generation
  80 |     return mock_client
  81 | 
  82 | @pytest.fixture
  83 | def mock_runway_client(mocker):
  84 |     """Mock RunwayML client"""
  85 |     mock_client = mocker.MagicMock()
  86 |     mock_task = mocker.MagicMock()
  87 |     mock_task.status = "COMPLETED"
  88 |     mock_task.output.video_url = "http://example.com/video.mp4"
  89 |     mock_client.image_to_video.create.return_value = mock_task
  90 |     mock_client.tasks.retrieve.return_value = mock_task
  91 |     return mock_client
  92 | 
  93 | @pytest.fixture
  94 | def mock_replicate_client(mocker):
  95 |     """Mock Replicate client"""
  96 |     mock_run = mocker.patch('replicate.run')
  97 |     mock_run.return_value = "http://example.com/image.jpg"
  98 |     return mock_run
  99 | 
 100 | @pytest.fixture
 101 | def mock_uguu_response(mocker):
 102 |     """Mock successful Uguu API response"""
 103 |     mock_response = mocker.MagicMock()
 104 |     mock_response.status_code = 200
 105 |     mock_response.json.return_value = [{
 106 |         'url': 'https://uguu.se/files/example.jpg',
 107 |         'name': 'example.jpg',
 108 |         'size': 12345,
 109 |         'hash': 'abc123'
 110 |     }]
 111 |     return mock_response
 112 | 
 113 | @pytest.fixture
 114 | def mock_gradient_image(mocker):
 115 |     """Mock gradient image creation"""
 116 |     mock_image = mocker.MagicMock()
 117 |     mocker.patch("ytsum.create_gradient_image", return_value=mock_image)
 118 |     return mock_image
 119 | 
 120 | def test_clean_youtube_url():
 121 |     # Test video ID only
 122 |     assert clean_youtube_url("ggLvk7547_w") == "https://www.youtube.com/watch?v=ggLvk7547_w"
 123 |     
 124 |     # Test full URLs
 125 |     assert clean_youtube_url("https://www.youtube.com/watch?v=ggLvk7547_w") == "https://www.youtube.com/watch?v=ggLvk7547_w"
 126 |     assert clean_youtube_url("https://youtu.be/ggLvk7547_w") == "https://www.youtube.com/watch?v=ggLvk7547_w"
 127 |     
 128 |     # Test with extra parameters
 129 |     assert clean_youtube_url("https://www.youtube.com/watch?v=ggLvk7547_w&t=123") == "https://www.youtube.com/watch?v=ggLvk7547_w"
 130 | 
 131 | def test_to_shorthand():
 132 |     # Basic replacements
 133 |     assert to_shorthand("you are") == "u r"
 134 |     
 135 |     # Case-insensitive test
 136 |     assert to_shorthand("I am going to see you later") == "im going 2 c u l8r"
 137 |     assert to_shorthand("i am going to see you later") == "im going 2 c u l8r"
 138 |     
 139 |     # Article removal
 140 |     assert to_shorthand("the cat and a dog") == "cat and dog"
 141 | 
 142 | @pytest.mark.asyncio
 143 | async def test_summarize_with_claude(mocker):
 144 |     """Test summary generation"""
 145 |     # Mock Claude response
 146 |     mock_summary = "Test summary content"
 147 |     
 148 |     # Create mock decorator
 149 |     def mock_decorator(*args, **kwargs):
 150 |         def mock_function(func):
 151 |             def wrapper(*args, **kwargs):
 152 |                 return mock_summary
 153 |             return wrapper
 154 |         return mock_function
 155 |     
 156 |     # Patch ell.simple
 157 |     mocker.patch("ell.simple", mock_decorator)
 158 |     
 159 |     # Test with default language
 160 |     result = summarize_with_claude("test transcript", "test metadata", "english")
 161 |     assert result == mock_summary
 162 | 
 163 | def test_convert_audio_format(mocker):
 164 |     # Mock FFmpeg subprocess call
 165 |     mock_run = mocker.patch("subprocess.run")
 166 |     mock_run.return_value.returncode = 0
 167 |     
 168 |     # Test basic MP3 conversion
 169 |     result = convert_audio_format("test.m4a", "mp3")
 170 |     assert result == "test.mp3"
 171 |     
 172 |     # Verify FFmpeg was called with correct parameters
 173 |     mock_run.assert_called_once()
 174 |     args = mock_run.call_args[0][0]
 175 |     assert args[0] == "ffmpeg"
 176 |     assert "-acodec" in args
 177 |     assert "libmp3lame" in args
 178 |     assert "-ac" in args
 179 |     assert args[args.index("-ac") + 1] == "2"  # Stereo by default
 180 |     assert "-b:a" in args
 181 |     assert args[args.index("-b:a") + 1] == "192k"  # Default bitrate
 182 |     
 183 |     # Test mono conversion with custom bitrate
 184 |     result = convert_audio_format("test.m4a", "mp3", bitrate="32k", mono=True)
 185 |     assert result == "test.mp3"
 186 |     
 187 |     args = mock_run.call_args[0][0]
 188 |     assert "-ac" in args
 189 |     assert args[args.index("-ac") + 1] == "1"  # Mono
 190 |     assert "-b:a" in args
 191 |     assert args[args.index("-b:a") + 1] == "32k"  # Custom bitrate
 192 | 
 193 | def test_get_video_metadata(mocker):
 194 |     # Mock clean_youtube_url first
 195 |     mocker.patch("ytsum.clean_youtube_url", return_value="https://youtube.com/watch?v=test_id")
 196 |     
 197 |     # Mock yt-dlp JSON output
 198 |     mock_metadata = {
 199 |         "title": "Test Video",
 200 |         "channel": "Test Channel",
 201 |         "upload_date": "20240315",
 202 |         "duration_string": "1:23",
 203 |         "view_count": 12345,
 204 |         "description": "Test description with promotional content",
 205 |         "tags": ["tag1", "tag2", "tag3"]
 206 |     }
 207 |     
 208 |     # Mock subprocess run
 209 |     mock_run = mocker.patch("subprocess.run")
 210 |     mock_run.return_value.returncode = 0
 211 |     mock_run.return_value.stdout = json.dumps(mock_metadata)
 212 |     
 213 |     # Mock metadata processing
 214 |     mock_process = mocker.patch("ytsum.process_metadata_description")
 215 |     mock_process.return_value = "Processed description"
 216 |     
 217 |     result = get_video_metadata("test_id")
 218 |     
 219 |     # Verify metadata formatting
 220 |     assert "Title: Test Video" in result
 221 |     assert "Channel: Test Channel" in result
 222 |     assert "Views: 12,345" in result
 223 |     assert "Description: Processed description" in result
 224 |     
 225 |     # Verify processing was called
 226 |     mock_process.assert_any_call(mock_metadata["description"])
 227 |     mock_process.assert_any_call(" ".join(mock_metadata["tags"]))
 228 | 
 229 | def test_transcribe_with_replicate(mocker):
 230 |     # Mock FFmpeg conversion
 231 |     mock_convert = mocker.patch("ytsum.convert_audio_format")
 232 |     mock_convert.return_value = "test.mp3"
 233 |     
 234 |     # Mock file operations
 235 |     mock_file = mocker.mock_open(read_data=b"test audio data")
 236 |     mocker.patch("builtins.open", mock_file)
 237 |     
 238 |     # Mock os.path instead of pathlib.Path
 239 |     mocker.patch("os.path.exists", return_value=True)
 240 |     mocker.patch("os.path.getsize", return_value=1024)
 241 |     
 242 |     # Mock Replicate API call
 243 |     mock_replicate = mocker.patch("replicate.run")
 244 |     mock_replicate.return_value = {"text": "test transcript"}
 245 |     
 246 |     # Test transcription
 247 |     result = transcribe_with_replicate("test.m4a")
 248 |     assert result is True
 249 |     
 250 |     # Verify basic flow
 251 |     mock_convert.assert_called_once()
 252 |     
 253 |     # Verify Replicate was called correctly
 254 |     mock_replicate.assert_called_once()
 255 |     call_args = mock_replicate.call_args[1]
 256 |     assert "input" in call_args
 257 |     assert call_args["input"]["batch_size"] == 64
 258 |     
 259 |     # Verify file operations
 260 |     mock_file.assert_any_call("test.mp3", "rb")  # Check file was opened for reading
 261 |     mock_file.assert_any_call("test.txt", "w", encoding="utf-8")  # Check transcript was written
 262 | 
 263 | def test_process_metadata_description(mocker):
 264 |     # Mock Ell response
 265 |     mock_response = "Test summary"
 266 |     
 267 |     # Create mock decorator
 268 |     def mock_decorator(*args, **kwargs):
 269 |         def mock_function(func):
 270 |             return lambda x: mock_response
 271 |         return mock_function
 272 |     
 273 |     # Patch ell.simple
 274 |     mocker.patch("ell.simple", mock_decorator)
 275 |     
 276 |     # Test sample metadata
 277 |     test_metadata = {
 278 |         "description": """
 279 |         From Seinfeld Season 8 Episode 12 'The Money': Jerry buys back a car his parents sold.
 280 |         Watch all episodes on Netflix!
 281 |         """,
 282 |         "tags": ["seinfeld", "jerry", "george", "kramer"]
 283 |     }
 284 |     
 285 |     # Test description processing
 286 |     result = process_metadata_description(test_metadata["description"])
 287 |     assert result == mock_response
 288 |     
 289 |     # Test tags processing
 290 |     result = process_metadata_description(" ".join(test_metadata["tags"]))
 291 |     assert result == mock_response
 292 | 
 293 | def test_split_audio_into_chunks(mocker):
 294 |     # Mock file size (30MB)
 295 |     mocker.patch("os.path.getsize", return_value=30 * 1024 * 1024)
 296 |     
 297 |     # Mock ffprobe duration check
 298 |     mock_probe = mocker.MagicMock()
 299 |     mock_probe.stdout = "300.0\n"  # 5 minutes with newline
 300 |     mock_run = mocker.patch("subprocess.run", return_value=mock_probe)
 301 |     
 302 |     # Mock directory operations
 303 |     mocker.patch("os.makedirs")
 304 |     mocker.patch("os.path.dirname", return_value="/tmp")
 305 |     mocker.patch("os.path.join", side_effect=lambda *args: "/".join(args))
 306 |     
 307 |     # Test splitting
 308 |     chunks = split_audio_into_chunks("test.mp3")
 309 |     assert chunks is not None
 310 |     assert len(chunks) == 2  # Should split into 2 chunks for 30MB file
 311 |     
 312 |     # Verify FFmpeg calls
 313 |     ffmpeg_calls = [
 314 |         call for call in mock_run.call_args_list 
 315 |         if 'ffmpeg' in call.args[0][0]
 316 |     ]
 317 |     assert len(ffmpeg_calls) == 2  # Two chunks
 318 |     
 319 |     # Verify chunk paths
 320 |     assert all('chunk_' in path for path in chunks)
 321 |     assert all(path.endswith('.mp3') for path in chunks)
 322 | 
 323 | def test_transcribe_with_openai_whisper(mocker):
 324 |     # Mock file operations
 325 |     file_size_mock = mocker.patch("os.path.getsize")
 326 |     file_size_mock.side_effect = [
 327 |         20 * 1024 * 1024,  # Initial file size for supported format test
 328 |         20 * 1024 * 1024,  # Size check for transcription
 329 |         30 * 1024 * 1024,  # Initial size for unsupported format
 330 |         20 * 1024 * 1024,  # Size after compression
 331 |         20 * 1024 * 1024,  # Size check for transcription
 332 |         30 * 1024 * 1024,  # Initial size for large file test
 333 |         20 * 1024 * 1024,  # Size after compression
 334 |         20 * 1024 * 1024,  # Size check for transcription
 335 |     ]
 336 |     
 337 |     # Mock file paths and operations
 338 |     mocker.patch("pathlib.Path.suffix", ".m4a")  # Supported format
 339 |     mocker.patch("os.path.exists", return_value=True)
 340 |     mocker.patch("os.path.dirname", return_value="/tmp")
 341 |     mocker.patch("os.path.join", side_effect=lambda *args: "/".join(args))
 342 |     mocker.patch("os.makedirs")
 343 |     mock_remove = mocker.patch("os.remove")
 344 |     mock_rmdir = mocker.patch("os.rmdir")
 345 |     
 346 |     # Mock OpenAI client and response
 347 |     mock_client = mocker.MagicMock()
 348 |     mock_transcription = mocker.MagicMock()
 349 |     mock_transcription.text = "test transcript"
 350 |     mock_client.audio.transcriptions.create.return_value = mock_transcription
 351 |     mock_openai = mocker.patch("openai.OpenAI", return_value=mock_client)
 352 |     
 353 |     # Mock file operations
 354 |     mock_file = mocker.mock_open(read_data=b"test audio data")
 355 |     mocker.patch("builtins.open", mock_file)
 356 |     
 357 |     # Mock environment variable
 358 |     mocker.patch("os.getenv", return_value="test-api-key")
 359 |     
 360 |     # Mock audio conversion
 361 |     mock_convert = mocker.patch("ytsum.convert_audio_format")
 362 |     mock_convert.return_value = "test.mp3"
 363 |     
 364 |     # Test 1: Transcription with supported format
 365 |     result = transcribe_with_openai_whisper("test.m4a")
 366 |     assert result is True
 367 |     assert not mock_convert.called  # No conversion needed
 368 |     
 369 |     # Test 2: Unsupported format
 370 |     mocker.patch("pathlib.Path.suffix", ".aac")
 371 |     result = transcribe_with_openai_whisper("test.aac")
 372 |     assert result is True
 373 |     assert mock_convert.called
 374 |     
 375 |     # Verify compression settings
 376 |     args = mock_convert.call_args[1]
 377 |     assert args["bitrate"] == "32k"
 378 |     assert args["mono"] is True
 379 |     
 380 |     # Test 3: Large file that compresses successfully
 381 |     mock_convert.reset_mock()
 382 |     result = transcribe_with_openai_whisper("test.m4a")
 383 |     assert result is True
 384 |     
 385 |     # Verify compression was used
 386 |     assert mock_convert.called
 387 |     assert mock_client.audio.transcriptions.create.called
 388 |     
 389 |     # Verify compression settings
 390 |     args = mock_convert.call_args[1]
 391 |     assert args["bitrate"] == "32k"
 392 |     assert args["mono"] is True
 393 | 
 394 | def test_get_language_code(mocker):
 395 |     # Mock Ell response
 396 |     def mock_decorator(*args, **kwargs):
 397 |         def mock_function(func):
 398 |             def wrapper(lang: str):
 399 |                 # Simple mapping for testing
 400 |                 codes = {
 401 |                     "english": "en",
 402 |                     "russian": "ru",
 403 |                     "spanish": "es",
 404 |                     "invalid": "xyz",  # Should fallback to en
 405 |                 }
 406 |                 return codes.get(lang.lower(), "en")
 407 |             return wrapper
 408 |         return mock_function
 409 |     
 410 |     # Patch ell.simple
 411 |     mocker.patch("ell.simple", mock_decorator)
 412 |     
 413 |     # Test valid languages
 414 |     assert get_language_code("English") == "en"
 415 |     assert get_language_code("Russian") == "ru"
 416 |     assert get_language_code("Spanish") == "es"
 417 |     
 418 |     # Test fallbacks
 419 |     assert get_language_code("Invalid") == "en"
 420 |     assert get_language_code("") == "en"
 421 | 
 422 | def test_get_youtube_subtitles(mocker):
 423 |     # Mock clean_youtube_url
 424 |     mocker.patch("ytsum.clean_youtube_url", return_value="https://youtube.com/watch?v=test_id")
 425 |     
 426 |     # Mock language code conversion
 427 |     mock_get_code = mocker.patch("ytsum.get_language_code")
 428 |     mock_get_code.side_effect = lambda x: {
 429 |         "Russian": "ru",
 430 |         "English": "en"
 431 |     }.get(x, "en")
 432 |     
 433 |     # Mock subprocess for yt-dlp
 434 |     mock_run = mocker.patch("subprocess.run")
 435 |     mock_run.return_value.returncode = 0  # Ensure subprocess succeeds
 436 |     
 437 |     # Mock file existence checks
 438 |     mock_exists = mocker.patch("os.path.exists")
 439 |     
 440 |     # Mock file operations
 441 |     mock_file = mocker.mock_open(read_data="Test subtitles")
 442 |     mocker.patch("builtins.open", mock_file)
 443 |     
 444 |     # Test 1: Found subtitles in requested language
 445 |     mock_run.return_value.stdout = """
 446 |     [info] Writing video subtitles to: test_path.ru.vtt
 447 |     [download] 100% of 15.00KiB
 448 |     """
 449 |     mock_exists.side_effect = lambda x: "test_path.ru.vtt" in x  # Match exact file
 450 |     result = get_youtube_subtitles("test_url", "test_path", "Russian")
 451 |     assert result == "test_path.ru.txt"  # We return the converted txt file
 452 |     assert mock_get_code.called_with("Russian")
 453 |     
 454 |     # Test 2: Found English subtitles as fallback
 455 |     mock_run.return_value.stdout = """
 456 |     [info] Writing video subtitles to: test_path.en.vtt
 457 |     [download] 100% of 15.00KiB
 458 |     """
 459 |     mock_exists.side_effect = lambda x: "test_path.en.vtt" in x  # Match exact file
 460 |     result = get_youtube_subtitles("test_url", "test_path", "Russian")
 461 |     assert result == "test_path.en.txt"  # We return the converted txt file
 462 |     
 463 |     # Test 3: No subtitles available
 464 |     mock_run.return_value.stdout = "No subtitles available"
 465 |     mock_exists.side_effect = lambda x: False  # No files exist
 466 |     result = get_youtube_subtitles("test_url", "test_path", "Russian")
 467 |     assert result is None
 468 |     
 469 |     # Verify yt-dlp was called correctly
 470 |     calls = mock_run.call_args_list
 471 |     assert any("--write-subs" in str(call) for call in calls)
 472 |     assert any("--sub-langs" in str(call) for call in calls)
 473 |     assert any("ru" in str(call) for call in calls)
 474 |     
 475 |     # Verify file existence checks
 476 |     assert mock_exists.call_count >= 2  # At least one check per test
 477 |     assert any("test_path.ru.vtt" in str(call) for call in mock_exists.call_args_list)
 478 |     assert any("test_path.en.vtt" in str(call) for call in mock_exists.call_args_list)
 479 | 
 480 | def test_convert_to_podcast_script(mocker):
 481 |     """Test podcast script conversion"""
 482 |     # Mock Claude response
 483 |     mock_script = """
 484 |     NOVA: Welcome to our summary!
 485 |     ECHO: That's right, Nova. Let's break down the key points.
 486 |     NOVA: The first important topic is...
 487 |     """
 488 |     
 489 |     # Create mock decorator
 490 |     def mock_decorator(*args, **kwargs):
 491 |         def mock_function(func):
 492 |             def wrapper(*args, **kwargs):
 493 |                 return mock_script
 494 |             return wrapper
 495 |         return mock_function
 496 |     
 497 |     # Patch ell.simple and random choice
 498 |     mocker.patch("ell.simple", mock_decorator)
 499 |     mocker.patch("random.choice", side_effect=["nova", "echo"])
 500 |     
 501 |     # Test with default language
 502 |     result = convert_to_podcast_script("test summary", "english")
 503 |     assert result == mock_script
 504 | 
 505 | @pytest.fixture(autouse=True)
 506 | def setup_and_cleanup():
 507 |     """Create output directory before tests and clean it after"""
 508 |     OUTPUT_DIR.mkdir(exist_ok=True)
 509 |     yield
 510 |     if OUTPUT_DIR.exists():
 511 |         shutil.rmtree(OUTPUT_DIR)
 512 | 
 513 | def test_generate_host_audio(mocker):
 514 |     """Test host-specific audio generation"""
 515 |     # Mock response
 516 |     mock_response = mocker.MagicMock()
 517 |     mock_response.stream_to_file = mocker.MagicMock()
 518 |     mock_response.__enter__ = mocker.MagicMock(return_value=mock_response)
 519 |     mock_response.__exit__ = mocker.MagicMock(return_value=None)
 520 |     
 521 |     # Create mock speech object with create method
 522 |     mock_create = mocker.MagicMock()
 523 |     mock_create.return_value = mock_response
 524 |     
 525 |     # Create mock streaming response object
 526 |     mock_streaming = mocker.MagicMock()
 527 |     mock_streaming.create = mock_create
 528 |     
 529 |     # Create mock speech object
 530 |     mock_speech = mocker.MagicMock()
 531 |     mock_speech.with_streaming_response = mock_streaming
 532 |     
 533 |     # Create mock audio object
 534 |     mock_audio = mocker.MagicMock()
 535 |     mock_audio.speech = mock_speech
 536 |     
 537 |     # Create mock client
 538 |     mock_client = mocker.MagicMock()
 539 |     mock_client.audio = mock_audio
 540 |     
 541 |     # Mock OpenAI class in ytsum module
 542 |     mock_openai = mocker.patch("ytsum.OpenAI")
 543 |     mock_openai.return_value = mock_client
 544 |     
 545 |     # Mock environment variable
 546 |     mocker.patch("os.getenv", return_value="test-api-key")
 547 |     
 548 |     # Test host configurations
 549 |     output_file = OUTPUT_DIR / "output.mp3"
 550 |     host_config = {"voice": "alloy", "name": "Alex"}
 551 |     result = generate_host_audio("test text", host_config, output_file)
 552 |     assert result is True
 553 |     
 554 |     # Verify OpenAI API call
 555 |     mock_create.assert_called_once_with(
 556 |         model="tts-1",
 557 |         voice="alloy",
 558 |         input="test text"
 559 |     )
 560 |     
 561 |     # Verify stream_to_file was called
 562 |     mock_response.stream_to_file.assert_called_once_with(output_file)
 563 |     
 564 |     # Test error handling
 565 |     mock_create.reset_mock()
 566 |     mock_response.reset_mock()
 567 |     mock_create.side_effect = Exception("API Error")
 568 |     result = generate_host_audio("test text", host_config, output_file)
 569 |     assert result is False
 570 |     
 571 |     # Verify that stream_to_file was not called again
 572 |     mock_response.stream_to_file.assert_not_called()
 573 | 
 574 | def test_combine_audio_files(mocker):
 575 |     """Test audio file combination"""
 576 |     # Mock subprocess
 577 |     mock_run = mocker.patch("subprocess.run")
 578 |     mock_run.return_value.returncode = 0
 579 |     
 580 |     # Test successful combination
 581 |     audio_files = [
 582 |         str(OUTPUT_DIR / "part1.mp3"),
 583 |         str(OUTPUT_DIR / "part2.mp3")
 584 |     ]
 585 |     output_file = OUTPUT_DIR / "output.mp3"
 586 |     result = combine_audio_files(audio_files, output_file)
 587 |     assert result is True
 588 |     
 589 |     # Verify FFmpeg command
 590 |     ffmpeg_call = mock_run.call_args[0][0]
 591 |     assert "ffmpeg" in ffmpeg_call
 592 |     assert "-filter_complex" in ffmpeg_call
 593 |     assert "acrossfade" in ''.join(ffmpeg_call)  # Check for crossfade filter
 594 |     assert "-map" in ffmpeg_call
 595 |     
 596 |     # Verify input files
 597 |     for audio_file in audio_files:
 598 |         assert audio_file in ffmpeg_call
 599 |     
 600 |     # Verify output file
 601 |     assert str(output_file) in ffmpeg_call
 602 | 
 603 | def test_generate_podcast_audio(mocker):
 604 |     """Test full podcast audio generation"""
 605 |     # Mock temporary directory
 606 |     mock_temp_dir = mocker.patch("tempfile.TemporaryDirectory")
 607 |     mock_temp_dir.return_value.__enter__.return_value = "/tmp/test"
 608 |     
 609 |     # Mock host audio generation
 610 |     mock_host_audio = mocker.patch("ytsum.generate_host_audio")
 611 |     mock_host_audio.return_value = True
 612 |     
 613 |     # Mock audio combination
 614 |     mock_combine = mocker.patch("ytsum.combine_audio_files")
 615 |     mock_combine.return_value = True
 616 |     
 617 |     # Test script with both hosts
 618 |     test_script = """
 619 |     NOVA: Welcome to the podcast!
 620 |     ECHO: Thanks Nova, let's begin.
 621 |     NOVA: First point...
 622 |     ECHO: That's interesting...
 623 |     """
 624 |     
 625 |     # Test with voice detection
 626 |     output_file = OUTPUT_DIR / "output.mp3"
 627 |     result = generate_podcast_audio(test_script, output_file)
 628 |     assert result is True
 629 |     
 630 |     # Verify host audio generation calls
 631 |     assert mock_host_audio.call_count == 4  # Two lines per host
 632 |     
 633 |     # Verify voice assignments
 634 |     nova_calls = [
 635 |         call for call in mock_host_audio.call_args_list 
 636 |         if call[0][1]["voice"] == "nova"
 637 |     ]
 638 |     echo_calls = [
 639 |         call for call in mock_host_audio.call_args_list 
 640 |         if call[0][1]["voice"] == "echo"
 641 |     ]
 642 |     assert len(nova_calls) == 2  # Two lines for Nova
 643 |     assert len(echo_calls) == 2  # Two lines for Echo
 644 |     
 645 |     # Test error handling
 646 |     mock_host_audio.return_value = False
 647 |     result = generate_podcast_audio(test_script, output_file)
 648 |     assert result is False
 649 |     
 650 |     # Test empty script
 651 |     result = generate_podcast_audio("", output_file)
 652 |     assert result is False
 653 |     
 654 |     # Test invalid script format
 655 |     invalid_script = "Invalid format without proper voice names"
 656 |     result = generate_podcast_audio(invalid_script, output_file)
 657 |     assert result is False
 658 |     
 659 |     # Test invalid voice name
 660 |     invalid_voice_script = "INVALID_VOICE: This should be skipped"
 661 |     result = generate_podcast_audio(invalid_voice_script, output_file)
 662 |     assert result is False
 663 | 
 664 | def test_sanitize_filename():
 665 |     """Test filename sanitization"""
 666 |     # Test URL with parameters
 667 |     assert sanitize_filename("watch?v=-moW9jvvMr4") == "watch_v_moW9jvvMr4"
 668 |     
 669 |     # Test full URL
 670 |     assert sanitize_filename("https://youtube.com/watch?v=abc123") == "https_youtube_com_watch_v_abc123"
 671 |     
 672 |     # Test special characters
 673 |     assert sanitize_filename("test/file:name*?") == "test_file_name_"
 674 |     
 675 |     # Test video ID only
 676 |     assert sanitize_filename("-moW9jvvMr4") == "_moW9jvvMr4"
 677 | 
 678 | def test_generate_video_segments(mocker):
 679 |     """Test video prompt generation from podcast script"""
 680 |     mock_get_prompts = mocker.patch('ell.simple')
 681 |     mock_decorator = mocker.MagicMock()
 682 |     mock_function = mocker.MagicMock()
 683 |     mock_function.return_value = json.dumps(MOCK_VIDEO_PROMPTS)
 684 |     mock_decorator.return_value = mock_function
 685 |     mock_get_prompts.return_value = mock_decorator
 686 |     
 687 |     prompts = generate_video_segments(MOCK_PODCAST_SCRIPT)
 688 |     
 689 |     assert prompts is not None
 690 |     assert len(prompts) == 5
 691 |     assert all("Camera" in prompt for prompt in prompts)
 692 |     assert all(any(shot in prompt for shot in ["Establishing Shot", "Wide Shot", "Close-Up Shot", "Tracking Shot", "Aerial Shot"]) for prompt in prompts)
 693 | 
 694 | @pytest.mark.luma
 695 | def test_generate_video_segments_with_luma(mock_luma_client, temp_dir, mocker):
 696 |     """Test video generation with LumaAI"""
 697 |     mocker.patch('requests.get', return_value=mocker.MagicMock(content=b"mock video data"))
 698 |     mocker.patch('ytsum.luma_client', mock_luma_client)
 699 |     
 700 |     video_paths = generate_video_segments_with_luma(MOCK_VIDEO_PROMPTS, temp_dir)
 701 |     
 702 |     assert video_paths is not None
 703 |     assert len(video_paths) == 5
 704 |     assert all(Path(path).exists() for path in video_paths)
 705 | 
 706 | @pytest.mark.timeout(30)  # Timeout after 30 seconds
 707 | def test_generate_video_segments_with_runway():
 708 |     """Test video generation with RunwayML"""
 709 |     # Mock RunwayML client and responses
 710 |     mock_task = Mock()
 711 |     mock_task.id = "test_task_id"
 712 |     
 713 |     # Mock task status responses
 714 |     class MockTaskStatus:
 715 |         def __init__(self, status, progress=0):
 716 |             self.status = status
 717 |             self.progress = progress
 718 |             self.output = ["https://example.com/test.mp4"] if status == "SUCCEEDED" else None
 719 |     
 720 |     # Create sequence of status responses
 721 |     status_responses = iter([
 722 |         MockTaskStatus("PENDING"),
 723 |         MockTaskStatus("RUNNING", 0.5),
 724 |         MockTaskStatus("SUCCEEDED"),
 725 |         # Second prompt responses
 726 |         MockTaskStatus("PENDING"),
 727 |         MockTaskStatus("RUNNING", 0.5),
 728 |         MockTaskStatus("SUCCEEDED")
 729 |     ])
 730 |     
 731 |     mock_runway = Mock()
 732 |     mock_runway.image_to_video.create.return_value = mock_task
 733 |     
 734 |     # Set up status retrieval to return sequence of responses
 735 |     def mock_retrieve(*args, **kwargs):
 736 |         try:
 737 |             return next(status_responses)
 738 |         except StopIteration:
 739 |             return MockTaskStatus("SUCCEEDED")
 740 |     
 741 |     mock_runway.tasks.retrieve.side_effect = mock_retrieve
 742 |     
 743 |     with tempfile.TemporaryDirectory() as temp_dir:
 744 |         temp_dir = Path(temp_dir)
 745 |         
 746 |         # Create test prompts
 747 |         prompts = [
 748 |             "Test prompt 1",
 749 |             "Test prompt 2"
 750 |         ]
 751 |         
 752 |         # Create test base images
 753 |         base_images = []
 754 |         for i in range(len(prompts)):
 755 |             img_path = temp_dir / f"test_image_{i}.jpg"
 756 |             img_path.write_bytes(b"test image data")
 757 |             base_images.append(img_path)
 758 |         
 759 |         # Mock requests.get for video download
 760 |         mock_response = Mock()
 761 |         mock_response.content = b"test video data"
 762 |         mock_response.iter_content.return_value = [b"test video data"]
 763 |         mock_response.status_code = 200
 764 |         mock_response.raise_for_status = Mock()
 765 |         
 766 |         with patch("ytsum.runway_client", mock_runway), \
 767 |              patch("requests.get", return_value=mock_response), \
 768 |              patch("time.sleep", return_value=None):  # Speed up by skipping sleeps
 769 |             
 770 |             # Call function with longer timeout
 771 |             result = generate_video_segments_with_runway(
 772 |                 prompts=prompts,
 773 |                 output_dir=temp_dir,
 774 |                 base_images=base_images,
 775 |                 timeout=30  # Longer timeout
 776 |             )
 777 |             
 778 |             # Verify results
 779 |             assert result is not None
 780 |             assert len(result) == len(prompts)
 781 |             for path in result:
 782 |                 assert path.exists()
 783 |                 assert path.stat().st_size > 0
 784 |             
 785 |             # Verify API calls
 786 |             assert mock_runway.image_to_video.create.call_count == len(prompts)
 787 |             assert mock_runway.tasks.retrieve.call_count >= len(prompts)
 788 | 
 789 | def test_generate_video_segments_with_runway_failure():
 790 |     """Test handling of failed video generation"""
 791 |     mock_task = Mock()
 792 |     mock_task.id = "test_task_id"
 793 |     
 794 |     class MockTaskStatus:
 795 |         def __init__(self, status):
 796 |             self.status = status
 797 |             self.failure = "Test failure"
 798 |             self.failureCode = "TEST_ERROR"
 799 |             self.output = None
 800 |     
 801 |     mock_runway = Mock()
 802 |     mock_runway.image_to_video.create.return_value = mock_task
 803 |     mock_runway.tasks.retrieve.return_value = MockTaskStatus("FAILED")
 804 |     
 805 |     with tempfile.TemporaryDirectory() as temp_dir:
 806 |         temp_dir = Path(temp_dir)
 807 |         prompts = ["Test prompt"]
 808 |         
 809 |         with patch("ytsum.runway_client", mock_runway):
 810 |             result = generate_video_segments_with_runway(
 811 |                 prompts=prompts,
 812 |                 output_dir=temp_dir,
 813 |                 timeout=5
 814 |             )
 815 |             
 816 |             assert result is None
 817 |             assert mock_runway.tasks.retrieve.called
 818 | 
 819 | def test_generate_video_segments_with_runway_timeout():
 820 |     """Test handling of timeout during video generation"""
 821 |     mock_task = Mock()
 822 |     mock_task.id = "test_task_id"
 823 |     
 824 |     class MockTaskStatus:
 825 |         def __init__(self):
 826 |             self.status = "RUNNING"
 827 |             self.progress = 0.5
 828 |     
 829 |     mock_runway = Mock()
 830 |     mock_runway.image_to_video.create.return_value = mock_task
 831 |     mock_runway.tasks.retrieve.return_value = MockTaskStatus()
 832 |     
 833 |     with tempfile.TemporaryDirectory() as temp_dir:
 834 |         temp_dir = Path(temp_dir)
 835 |         prompts = ["Test prompt"]
 836 |         
 837 |         with patch("ytsum.runway_client", mock_runway):
 838 |             result = generate_video_segments_with_runway(
 839 |                 prompts=prompts,
 840 |                 output_dir=temp_dir,
 841 |                 timeout=1  # Short timeout
 842 |             )
 843 |             
 844 |             assert result is None
 845 |             assert mock_runway.tasks.cancel.called
 846 | 
 847 | def test_combine_video_segments(temp_dir, mocker):
 848 |     """Test combining video segments"""
 849 |     video_paths = []
 850 |     for i in range(5):
 851 |         path = temp_dir / f"segment_{i:02d}.mp4"
 852 |         path.write_bytes(b"mock video data")
 853 |         video_paths.append(path)
 854 |     
 855 |     mock_run = mocker.patch('subprocess.run')
 856 |     mock_run.return_value.stdout = "60.0"
 857 |     
 858 |     output_path = temp_dir / "combined.mp4"
 859 |     result = combine_video_segments(video_paths, 120.0, output_path)
 860 |     
 861 |     assert result is True
 862 |     assert mock_run.call_count >= 3
 863 | 
 864 | def test_get_audio_duration(temp_dir, mocker):
 865 |     """Test getting audio duration"""
 866 |     audio_path = temp_dir / "test.mp3"
 867 |     audio_path.write_bytes(b"mock audio data")
 868 |     
 869 |     # Mock ffprobe call
 870 |     mock_run = mocker.patch('subprocess.run')
 871 |     mock_run.return_value.stdout = "180.5"
 872 |     
 873 |     duration = get_audio_duration(str(audio_path))
 874 |     
 875 |     assert duration == 180.5
 876 |     mock_run.assert_called_once()
 877 | 
 878 | def test_combine_audio_video(temp_dir, mocker):
 879 |     """Test combining audio and video"""
 880 |     # Create test files
 881 |     video_path = temp_dir / "video.mp4"
 882 |     audio_path = temp_dir / "audio.mp3"
 883 |     output_path = temp_dir / "final.mp4"
 884 |     
 885 |     video_path.write_bytes(b"mock video data")
 886 |     audio_path.write_bytes(b"mock audio data")
 887 |     
 888 |     # Mock ffmpeg probe
 889 |     mock_probe = mocker.patch('ffmpeg.probe')
 890 |     mock_probe.return_value = {'streams': [{'duration': '60.0'}]}
 891 |     
 892 |     # Mock ffmpeg run
 893 |     mock_run = mocker.patch('ffmpeg.run')
 894 |     
 895 |     result = combine_audio_video(str(video_path), str(audio_path), str(output_path))
 896 |     
 897 |     assert result is True
 898 |     mock_run.assert_called_once()
 899 | 
 900 | def test_sanitize_filename():
 901 |     """Test filename sanitization"""
 902 |     test_cases = [
 903 |         ("https://youtube.com/watch?v=abc123", "abc123"),
 904 |         ("abc123?feature=share", "abc123_feature_share"),
 905 |         ("test/file:name*", "test_file_name_"),
 906 |         ("Test File Name!", "Test_File_Name_")
 907 |     ]
 908 |     
 909 |     for input_name, expected in test_cases:
 910 |         assert sanitize_filename(input_name) == expected
 911 | 
 912 | def test_generate_video_segments_invalid_response(mocker):
 913 |     """Test handling of invalid prompt generation response"""
 914 |     mock_get_prompts = mocker.patch('ell.simple')
 915 |     mock_decorator = mocker.MagicMock()
 916 |     mock_function = mocker.MagicMock()
 917 |     
 918 |     # Test with invalid number of prompts
 919 |     mock_function.return_value = json.dumps(["only one prompt"])
 920 |     mock_decorator.return_value = mock_function
 921 |     mock_get_prompts.return_value = mock_decorator
 922 |     
 923 |     prompts = generate_video_segments(MOCK_PODCAST_SCRIPT)
 924 |     assert prompts is None
 925 |     
 926 |     # Test with missing shot type
 927 |     invalid_prompts = [
 928 |         "Scene one: Description. Camera dolly through space. Mood.",
 929 |         "Scene two: More description. Camera track along path. Mood.",
 930 |         "Scene three: Another description. Camera orbit around subject. Mood.",
 931 |         "Scene four: Yet more description. Camera glide forward. Mood.",
 932 |         "Scene five: Final description. Camera pull back to reveal all. Mood."
 933 |     ]
 934 |     mock_function.return_value = json.dumps(invalid_prompts)
 935 |     prompts = generate_video_segments(MOCK_PODCAST_SCRIPT)
 936 |     assert prompts is None
 937 | 
 938 | def test_generate_image_prompts(mocker):
 939 |     """Test conversion of video prompts to image prompts"""
 940 |     mock_get_prompts = mocker.patch('ell.simple')
 941 |     mock_decorator = mocker.MagicMock()
 942 |     mock_function = mocker.MagicMock()
 943 |     mock_function.return_value = json.dumps(MOCK_IMAGE_PROMPTS)
 944 |     mock_decorator.return_value = mock_function
 945 |     mock_get_prompts.return_value = mock_decorator
 946 |     
 947 |     prompts = generate_image_prompts(MOCK_VIDEO_PROMPTS)
 948 |     
 949 |     assert prompts is not None
 950 |     assert len(prompts) == len(MOCK_VIDEO_PROMPTS)
 951 |     assert all("masterpiece" in prompt for prompt in prompts)
 952 | 
 953 | def test_generate_flux_images(mock_replicate_client, temp_dir):
 954 |     """Test Flux image generation"""
 955 |     image_paths = generate_flux_images(MOCK_IMAGE_PROMPTS, temp_dir)
 956 |     
 957 |     assert image_paths is not None
 958 |     assert len(image_paths) == len(MOCK_IMAGE_PROMPTS)
 959 |     assert all(Path(path).exists() for path in image_paths)
 960 | 
 961 | def test_calculate_num_segments():
 962 |     """Test segment number calculation"""
 963 |     # Test with Luma AI (5s segments)
 964 |     assert calculate_num_segments(4, "luma") == 1  # Very short
 965 |     assert calculate_num_segments(8, "luma") == 2  # Short
 966 |     assert calculate_num_segments(20, "luma") == 4  # Medium
 967 |     assert calculate_num_segments(50, "luma") == 5  # Long
 968 |     
 969 |     # Test with RunwayML (10s segments)
 970 |     assert calculate_num_segments(8, "runway") == 1  # Very short
 971 |     assert calculate_num_segments(15, "runway") == 2  # Short
 972 |     assert calculate_num_segments(40, "runway") == 4  # Medium
 973 |     assert calculate_num_segments(100, "runway") == 5  # Long
 974 | 
 975 | def test_calculate_target_length():
 976 |     """Test target length calculation"""
 977 |     short = calculate_target_length(180)  # 3 minutes
 978 |     medium = calculate_target_length(600)  # 10 minutes
 979 |     long = calculate_target_length(1800)  # 30 minutes
 980 |     
 981 |     assert short['summary'] < medium['summary'] < long['summary']
 982 |     assert short['podcast'] < medium['podcast'] < long['podcast']
 983 | 
 984 | def test_upload_image_to_uguu_success(temp_dir, mocker):
 985 |     """Test successful image upload to Uguu"""
 986 |     # Create test image
 987 |     test_image = temp_dir / "test.jpg"
 988 |     test_image.write_bytes(b"fake image data")
 989 |     
 990 |     # Test different response formats
 991 |     response_formats = [
 992 |         # JSON success format
 993 |         ({
 994 |             'success': True,
 995 |             'files': [{
 996 |                 'hash': '123abc',
 997 |                 'name': 'test.jpg',
 998 |                 'url': 'https://uguu.se/files/test.jpg',
 999 |                 'size': 1234
1000 |             }]
1001 |         }, 'https://uguu.se/files/test.jpg'),
1002 |         # Direct URL format (fallback)
1003 |         ('https://uguu.se/files/test.jpg', 'https://uguu.se/files/test.jpg')
1004 |     ]
1005 |     
1006 |     for response_data, expected_url in response_formats:
1007 |         # Mock response
1008 |         mock_response = mocker.MagicMock()
1009 |         mock_response.status_code = 200
1010 |         
1011 |         if isinstance(response_data, dict):
1012 |             mock_response.json.return_value = response_data
1013 |             mock_response.text = json.dumps(response_data)
1014 |         else:
1015 |             mock_response.json.side_effect = ValueError("Not JSON")
1016 |             mock_response.text = response_data
1017 |         
1018 |         # Mock requests.post
1019 |         mock_post = mocker.patch('requests.post', return_value=mock_response)
1020 |         
1021 |         # Test upload
1022 |         result = upload_image_to_uguu(test_image)
1023 |         
1024 |         # Verify result
1025 |         assert result == expected_url
1026 |         
1027 |         # Verify API call
1028 |         mock_post.assert_called_once()
1029 |         args, kwargs = mock_post.call_args
1030 |         assert args[0] == 'https://uguu.se/upload'
1031 |         assert 'files' in kwargs
1032 |         assert 'files[]' in kwargs['files']
1033 |         
1034 |         # Reset mocks
1035 |         mock_post.reset_mock()
1036 | 
1037 | def test_upload_image_to_uguu_http_error(temp_dir, mocker):
1038 |     """Test Uguu upload with HTTP error"""
1039 |     # Create test image
1040 |     test_image = temp_dir / "test.jpg"
1041 |     test_image.write_bytes(b"fake image data")
1042 |     
1043 |     # Mock failed response
1044 |     mock_response = mocker.MagicMock()
1045 |     mock_response.status_code = 500
1046 |     mocker.patch('requests.post', return_value=mock_response)
1047 |     
1048 |     # Test upload
1049 |     result = upload_image_to_uguu(test_image)
1050 |     
1051 |     # Verify failure
1052 |     assert result is None
1053 | 
1054 | def test_upload_image_to_uguu_invalid_response(temp_dir, mocker):
1055 |     """Test Uguu upload with invalid response format"""
1056 |     # Create test image
1057 |     test_image = temp_dir / "test.jpg"
1058 |     test_image.write_bytes(b"fake image data")
1059 |     
1060 |     # Mock response with invalid format
1061 |     mock_response = mocker.MagicMock()
1062 |     mock_response.status_code = 200
1063 |     mock_response.json.return_value = []  # Empty array
1064 |     mocker.patch('requests.post', return_value=mock_response)
1065 |     
1066 |     # Test upload
1067 |     result = upload_image_to_uguu(test_image)
1068 |     
1069 |     # Verify failure
1070 |     assert result is None
1071 | 
1072 | def test_upload_image_to_uguu_missing_file(temp_dir):
1073 |     """Test Uguu upload with missing file"""
1074 |     # Test with non-existent file
1075 |     result = upload_image_to_uguu(temp_dir / "nonexistent.jpg")
1076 |     
1077 |     # Verify failure
1078 |     assert result is None
1079 | 
1080 | def test_upload_image_to_uguu_network_error(temp_dir, mocker):
1081 |     """Test Uguu upload with network error"""
1082 |     # Create test image
1083 |     test_image = temp_dir / "test.jpg"
1084 |     test_image.write_bytes(b"fake image data")
1085 |     
1086 |     # Mock network error
1087 |     mocker.patch('requests.post', side_effect=requests.exceptions.RequestException("Network error"))
1088 |     
1089 |     # Test upload
1090 |     result = upload_image_to_uguu(test_image)
1091 |     
1092 |     # Verify failure
1093 |     assert result is None
1094 | 
1095 | @pytest.mark.luma
1096 | def test_luma_with_uguu_image(mock_luma_client, temp_dir, mocker, mock_uguu_response):
1097 |     """Test Luma AI video generation with Uguu image upload"""
1098 |     # Create test image and prompt
1099 |     test_image = temp_dir / "test.jpg"
1100 |     test_image.write_bytes(b"fake image data")
1101 |     test_prompt = "Test video prompt"
1102 |     
1103 |     # Mock Uguu upload
1104 |     mocker.patch('requests.post', return_value=mock_uguu_response)
1105 |     
1106 |     # Mock video download
1107 |     mocker.patch('requests.get', return_value=mocker.MagicMock(content=b"mock video data"))
1108 |     
1109 |     # Mock Luma client
1110 |     mocker.patch('ytsum.luma_client', mock_luma_client)
1111 |     
1112 |     # Test video generation with image
1113 |     video_paths = generate_video_segments_with_luma([test_prompt], temp_dir, [test_image])
1114 |     
1115 |     # Verify success
1116 |     assert video_paths is not None
1117 |     assert len(video_paths) == 1
1118 |     assert Path(video_paths[0]).exists()
1119 |     
1120 |     # Verify Luma API call included image URL
1121 |     generation_call = mock_luma_client.generations.create.call_args
1122 |     assert generation_call is not None
1123 |     assert 'keyframes' in generation_call[1]
1124 |     assert generation_call[1]['keyframes']['frame0']['url'] == 'https://uguu.se/files/example.jpg'
1125 | 
1126 | if __name__ == "__main__":
1127 |     pytest.main([__file__, "-v"]) 


--------------------------------------------------------------------------------
/ytsum.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python3
   2 | import sys
   3 | import os
   4 | import subprocess
   5 | import tempfile
   6 | from pathlib import Path
   7 | import json
   8 | import argparse
   9 | import urllib.parse
  10 | import ell
  11 | from anthropic import Anthropic
  12 | from colorama import init, Fore, Style
  13 | import replicate
  14 | import time
  15 | from openai import OpenAI
  16 | import shutil
  17 | import re
  18 | from lumaai import LumaAI
  19 | import requests
  20 | import ffmpeg
  21 | from runwayml import RunwayML
  22 | from PIL import Image, ImageDraw
  23 | import base64
  24 | import io
  25 | import math
  26 | 
  27 | # Initialize colorama
  28 | init()
  29 | 
  30 | # Initialize Anthropic client and register with Ell
  31 | api_key = os.getenv("ANTHROPIC_API_KEY")
  32 | if not api_key:
  33 |     print_error("ANTHROPIC_API_KEY environment variable not set")
  34 |     sys.exit(1)
  35 | 
  36 | claude_client = Anthropic()
  37 | ell.config.register_model("claude-3-5-sonnet-20241022", claude_client)
  38 | 
  39 | # Initialize LumaAI client
  40 | luma_api_key = os.getenv("LUMAAI_API_KEY")
  41 | if luma_api_key:
  42 |     luma_client = LumaAI(auth_token=luma_api_key)
  43 | else:
  44 |     luma_client = None
  45 | 
  46 | # Initialize RunwayML client
  47 | runway_api_key = os.getenv("RUNWAYML_API_SECRET")
  48 | if runway_api_key:
  49 |     runway_client = RunwayML()
  50 | else:
  51 |     runway_client = None
  52 | 
  53 | # Emoji constants
  54 | EMOJI_DOWNLOAD = "⬇️ "
  55 | EMOJI_TRANSCRIBE = "🎯 "
  56 | EMOJI_SUMMARY = "📝 "
  57 | EMOJI_SUCCESS = "✅ "
  58 | EMOJI_ERROR = "❌ "
  59 | EMOJI_SEARCH = "🔍 "
  60 | EMOJI_SAVE = "💾 "
  61 | EMOJI_PODCAST = "🎙️ "
  62 | EMOJI_AUDIO = "🔊 "
  63 | EMOJI_VIDEO = "🎥 "
  64 | 
  65 | # Add after other constants
  66 | DEFAULT_HOST_VOICES = {
  67 |     "host1": {"voice": "alloy", "name": "Alex"},
  68 |     "host2": {"voice": "nova", "name": "Sarah"}
  69 | }
  70 | 
  71 | # Update constants
  72 | AVAILABLE_VOICES = {
  73 |     "alloy": "Neutral voice",
  74 |     "echo": "Male voice",
  75 |     "fable": "Male voice",
  76 |     "onyx": "Male voice",
  77 |     "nova": "Female voice",
  78 |     "shimmer": "Female voice"
  79 | }
  80 | 
  81 | # Add after OpenAI client initialization
  82 | # Create output directory if it doesn't exist
  83 | OUTPUT_DIR = Path("out")
  84 | OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
  85 | 
  86 | # Create subdirectories
  87 | (OUTPUT_DIR / "temp_videos").mkdir(exist_ok=True)
  88 | 
  89 | def print_step(emoji, message, color=Fore.BLUE):
  90 |     """Print a step with emoji and color"""
  91 |     print(f"{color}{emoji}{message}{Style.RESET_ALL}")
  92 | 
  93 | def print_error(message):
  94 |     """Print error message in red with emoji"""
  95 |     print(f"{Fore.RED}{EMOJI_ERROR}{message}{Style.RESET_ALL}")
  96 | 
  97 | def print_success(message):
  98 |     """Print success message in green with emoji"""
  99 |     print(f"{Fore.GREEN}{EMOJI_SUCCESS}{message}{Style.RESET_ALL}")
 100 | 
 101 | def to_shorthand(text):
 102 |     """Convert text to shorthand format"""
 103 |     replacements = {
 104 |         'you': 'u',
 105 |         'are': 'r',
 106 |         'see': 'c',
 107 |         'for': '4',
 108 |         'to': '2',
 109 |         'too': '2',
 110 |         'two': '2',
 111 |         'four': '4',
 112 |         'be': 'b',
 113 |         'before': 'b4',
 114 |         'great': 'gr8',
 115 |         'thanks': 'thx',
 116 |         'thank you': 'ty',
 117 |         'because': 'bc',
 118 |         'people': 'ppl',
 119 |         'want': 'wnt',
 120 |         'love': 'luv',
 121 |         'okay': 'k',
 122 |         'yes': 'y',
 123 |         'no': 'n',
 124 |         'please': 'plz',
 125 |         'sorry': 'sry',
 126 |         'see you': 'cya',
 127 |         'I am': 'im',
 128 |         'i am': 'im',
 129 |         'good': 'gd',
 130 |         'right': 'rt',
 131 |         'later': 'l8r',
 132 |         'have': 'hv',
 133 |         'see you later': 'cul8r',
 134 |         'laughing': 'lol',
 135 |         'message': 'msg',
 136 |         'information': 'info',
 137 |         'about': 'abt',
 138 |         'awesome': 'awsm',
 139 |         'quickly': 'quick',
 140 |         'first': '1st',
 141 |         'second': '2nd',
 142 |         'third': '3rd',
 143 |     }
 144 |     
 145 |     # Convert to lowercase first
 146 |     result = text.lower()
 147 |     
 148 |     # Split into words, remove articles, and rejoin
 149 |     words = result.split()
 150 |     words = [w for w in words if w not in ['the', 'a', 'an']]
 151 |     result = ' '.join(words)
 152 |     
 153 |     # Apply other replacements
 154 |     for old, new in replacements.items():
 155 |         result = result.replace(old.lower(), new)
 156 |     
 157 |     return result
 158 | 
 159 | def clean_youtube_url(url):
 160 |     """Clean and validate YouTube URL or video ID"""
 161 |     # Extract video ID from various URL formats
 162 |     video_id = None
 163 |     
 164 |     # Unescape URL first
 165 |     url = urllib.parse.unquote(url.replace('\\', ''))
 166 |     
 167 |     # Handle full URLs
 168 |     if url.startswith(('http://', 'https://')):
 169 |         try:
 170 |             parsed = urllib.parse.urlparse(url)
 171 |             if 'youtu.be' in parsed.netloc.lower():
 172 |                 video_id = parsed.path.strip('/')
 173 |             else:
 174 |                 params = urllib.parse.parse_qs(parsed.query)
 175 |                 video_id = params['v'][0]
 176 |         except:
 177 |             pass
 178 |     
 179 |     # Handle partial URLs
 180 |     elif 'youtube.com' in url.lower() or 'youtu.be' in url.lower():
 181 |         try:
 182 |             if 'youtu.be' in url.lower():
 183 |                 video_id = url.split('youtu.be/')[-1].split('?')[0]
 184 |             else:
 185 |                 video_id = url.split('v=')[1].split('&')[0]
 186 |         except:
 187 |             pass
 188 |     
 189 |     # Handle direct video ID
 190 |     else:
 191 |         video_id = url.strip('/')
 192 |     
 193 |     # Validate video ID format (11 characters, alphanumeric and -_)
 194 |     if not video_id or not re.match(r'^[A-Za-z0-9_-]{11}$', video_id):
 195 |         raise ValueError(f"Invalid YouTube video ID: {video_id}")
 196 |     
 197 |     # Check if video exists
 198 |     try:
 199 |         result = subprocess.run([
 200 |             'yt-dlp',
 201 |             '--simulate',
 202 |             '--no-warnings',
 203 |             '--no-playlist',
 204 |             f'https://www.youtube.com/watch?v={video_id}'
 205 |         ], capture_output=True, text=True)
 206 |         
 207 |         if result.returncode != 0:
 208 |             error_msg = result.stderr.strip()
 209 |             if "Video unavailable" in error_msg:
 210 |                 raise ValueError(f"Video {video_id} is unavailable or has been removed")
 211 |             elif "Private video" in error_msg:
 212 |                 raise ValueError(f"Video {video_id} is private")
 213 |             else:
 214 |                 raise ValueError(f"Error accessing video: {error_msg}")
 215 |     except subprocess.CalledProcessError:
 216 |         raise ValueError(f"Could not verify video availability")
 217 |     
 218 |     return f"https://www.youtube.com/watch?v={video_id}"
 219 | 
 220 | def download_video(url, output_path):
 221 |     """Download audio using yt-dlp"""
 222 |     try:
 223 |         clean_url = clean_youtube_url(url)
 224 |         print_step(EMOJI_DOWNLOAD, "Downloading audio...")
 225 |         
 226 |         subprocess.run([
 227 |             'yt-dlp',
 228 |             '--output', output_path,
 229 |             '--format', 'ba[ext=m4a]',
 230 |             '--extract-audio',
 231 |             '--force-overwrites',
 232 |             clean_url
 233 |         ], check=True)
 234 |         return True
 235 |     except subprocess.CalledProcessError:
 236 |         print_error("Failed to download audio")
 237 |         return False
 238 | 
 239 | def get_language_code(language_name: str) -> str:
 240 |     """Convert language name to ISO 639-1 code using Claude"""
 241 |     
 242 |     @ell.simple(model="claude-3-5-sonnet-20241022", temperature=0.0, max_tokens=2)
 243 |     def get_code(lang: str) -> str:
 244 |         """You are an expert in language codes. Return only the ISO 639-1 code (2 letters) for the given language name.
 245 |         For example:
 246 |         - English -> en
 247 |         - Russian -> ru
 248 |         - Spanish -> es
 249 |         - Chinese -> zh
 250 |         - Japanese -> ja
 251 |         If unsure, return 'en' as fallback."""
 252 |         return f"Convert this language name to ISO 639-1 code: {lang}. No \`\`\` or \`\`\`python, no intro, no commentaries, only the code."
 253 |     
 254 |     try:
 255 |         code = get_code(language_name).strip().lower()
 256 |         # Validate it's a 2-letter code
 257 |         if len(code) == 2 and code.isalpha():
 258 |             return code
 259 |         return 'en'
 260 |     except:
 261 |         return 'en'
 262 | 
 263 | def get_youtube_subtitles(url, output_path, language="en"):
 264 |     """Try to download subtitles from YouTube using yt-dlp"""
 265 |     try:
 266 |         # Convert language name to code
 267 |         language_code = get_language_code(language)
 268 |         print_step(EMOJI_SEARCH, f"Searching for YouTube subtitles in {language} ({language_code})...")
 269 |         clean_url = clean_youtube_url(url)
 270 |         
 271 |         # Try to download subtitles directly with basic command
 272 |         result = subprocess.run([
 273 |             'yt-dlp',
 274 |             '--write-subs',
 275 |             '--sub-langs', language_code,
 276 |             '--skip-download',
 277 |             clean_url
 278 |         ], capture_output=True, text=True)
 279 |         
 280 |         # Look for the downloaded subtitle file in current directory
 281 |         if "Writing video subtitles to:" in result.stdout:
 282 |             # Extract the filename from yt-dlp output
 283 |             for line in result.stdout.splitlines():
 284 |                 if "Writing video subtitles to:" in line:
 285 |                     subtitle_file = line.split("Writing video subtitles to:", 1)[1].strip()
 286 |                     if os.path.exists(subtitle_file):
 287 |                         print_success(f"Found subtitles!")
 288 |                         # Convert VTT to plain text
 289 |                         text = convert_vtt_to_text(subtitle_file)
 290 |                         txt_file = subtitle_file.replace('.vtt', '.txt')
 291 |                         with open(txt_file, 'w', encoding='utf-8') as f:
 292 |                             f.write(text)
 293 |                         return txt_file
 294 |         
 295 |         print_step(EMOJI_SEARCH, "No subtitles found, will transcribe audio...")
 296 |         return None
 297 |         
 298 |     except Exception as e:
 299 |         print_error(f"Failed to get subtitles: {e}")
 300 |         return None
 301 | 
 302 | def convert_vtt_to_text(vtt_file):
 303 |     """Convert VTT subtitles to plain text"""
 304 |     text = []
 305 |     with open(vtt_file, 'r', encoding='utf-8') as f:
 306 |         lines = f.readlines()
 307 |         
 308 |     # Skip VTT header
 309 |     start = 0
 310 |     for i, line in enumerate(lines):
 311 |         if line.strip() == "WEBVTT":
 312 |             start = i + 1
 313 |             break
 314 |     
 315 |     # Process subtitle content
 316 |     for line in lines[start:]:
 317 |         # Skip timing lines and empty lines
 318 |         if '-->' in line or not line.strip():
 319 |             continue
 320 |         # Add non-empty lines to text
 321 |         if line.strip():
 322 |             text.append(line.strip())
 323 |     
 324 |     return ' '.join(text)
 325 | 
 326 | def transcribe_with_fast_whisper(video_path):
 327 |     """Transcribe video using Faster Whisper"""
 328 |     try:
 329 |         from faster_whisper import WhisperModel
 330 |         
 331 |         print_step(EMOJI_TRANSCRIBE, "Transcribing with Fast Whisper...")
 332 |         model = WhisperModel("base", device="auto", compute_type="auto")
 333 |         
 334 |         segments, _ = model.transcribe(video_path)
 335 |         transcript = " ".join([segment.text for segment in segments])
 336 |         
 337 |         transcript_path = str(Path(video_path).with_suffix('.txt'))
 338 |         with open(transcript_path, 'w', encoding='utf-8') as f:
 339 |             f.write(transcript)
 340 |         
 341 |         return True
 342 |         
 343 |     except ImportError:
 344 |         print_error("Faster Whisper not found. Please install it with:")
 345 |         print(f"{Fore.YELLOW}pip install faster-whisper{Style.RESET_ALL}")
 346 |         return False
 347 |     except Exception as e:
 348 |         print_error(f"Fast transcription error: {e}")
 349 |         return False
 350 | 
 351 | def transcribe_with_replicate(video_path, language=None):
 352 |     """Transcribe video using Replicate's Incredibly Fast Whisper"""
 353 |     try:
 354 |         print_step(EMOJI_TRANSCRIBE, "Transcribing with Incredibly Fast Whisper...")
 355 |         
 356 |         # Convert audio to MP3 format
 357 |         mp3_path = convert_audio_format(video_path, 'mp3')
 358 |         if not mp3_path:
 359 |             print_error("Failed to convert audio to MP3")
 360 |             return False
 361 |         
 362 |         # Prepare input parameters
 363 |         input_params = {
 364 |             "audio": open(mp3_path, 'rb'),  # Send file directly
 365 |             "batch_size": 64,
 366 |         }
 367 |         
 368 |         if language:
 369 |             input_params["language"] = language.lower()
 370 |         
 371 |         # Run transcription
 372 |         output = replicate.run(
 373 |             "vaibhavs10/incredibly-fast-whisper:3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c",
 374 |             input=input_params
 375 |         )
 376 |         
 377 |         if not output or "text" not in output:
 378 |             print_error("Invalid response from Replicate")
 379 |             return False
 380 |         
 381 |         # Write transcript to file
 382 |         transcript_path = os.path.splitext(video_path)[0] + '.txt'
 383 |         with open(transcript_path, 'w', encoding='utf-8') as f:
 384 |             f.write(output["text"])
 385 |         
 386 |         return True
 387 |         
 388 |     except Exception as e:
 389 |         print_error(f"Replicate transcription error: {e}")
 390 |         return False
 391 | 
 392 | def split_audio_into_chunks(input_path, chunk_size_mb=20):
 393 |     """Split audio file into chunks under specified size"""
 394 |     try:
 395 |         # Get file size in MB
 396 |         file_size = os.path.getsize(input_path) / (1024 * 1024)
 397 |         if file_size <= chunk_size_mb:
 398 |             return [input_path]
 399 |         
 400 |         # Calculate duration of each chunk
 401 |         duration_info = subprocess.run([
 402 |             'ffprobe',
 403 |             '-v', 'error',
 404 |             '-show_entries', 'format=duration',
 405 |             '-of', 'default=noprint_wrappers=1:nokey=1',
 406 |             input_path
 407 |         ], capture_output=True, text=True)
 408 |         
 409 |         total_duration = float(duration_info.stdout.strip())  # Strip whitespace
 410 |         if total_duration <= 0:
 411 |             print_error("Invalid audio duration")
 412 |             return None
 413 |             
 414 |         # Calculate chunk duration (ensure it's at least 1 second)
 415 |         chunk_duration = max(1, int((chunk_size_mb / file_size) * total_duration))
 416 |         
 417 |         # Create chunks directory
 418 |         chunks_dir = os.path.join(os.path.dirname(input_path), "chunks")
 419 |         os.makedirs(chunks_dir, exist_ok=True)
 420 |         
 421 |         chunk_paths = []
 422 |         for i in range(0, int(total_duration), chunk_duration):
 423 |             chunk_path = os.path.join(chunks_dir, f"chunk_{i}.mp3")
 424 |             subprocess.run([
 425 |                 'ffmpeg',
 426 |                 '-i', input_path,
 427 |                 '-y',  # Overwrite output
 428 |                 '-ss', str(i),  # Start time
 429 |                 '-t', str(chunk_duration),  # Duration
 430 |                 '-acodec', 'libmp3lame',
 431 |                 '-ar', '44100',
 432 |                 '-ac', '2',
 433 |                 '-b:a', '192k',
 434 |                 chunk_path
 435 |             ], check=True, capture_output=True)
 436 |             chunk_paths.append(chunk_path)
 437 |         
 438 |         return chunk_paths
 439 |         
 440 |     except Exception as e:
 441 |         print_error(f"Error splitting audio: {e}")
 442 |         return None
 443 | 
 444 | def transcribe_with_openai_whisper(video_path):
 445 |     """Transcribe video using OpenAI's Whisper API"""
 446 |     try:
 447 |         from openai import OpenAI
 448 |         
 449 |         # Check for API key
 450 |         if not os.getenv("OPENAI_API_KEY"):
 451 |             print_error("OPENAI_API_KEY environment variable not set")
 452 |             return False
 453 |         
 454 |         print_step(EMOJI_TRANSCRIBE, "Transcribing with OpenAI Whisper...")
 455 |         client = OpenAI()
 456 |         
 457 |         # Check if input format is supported
 458 |         supported_formats = {'.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm'}
 459 |         input_ext = Path(video_path).suffix.lower()
 460 |         
 461 |         # Convert only if needed
 462 |         audio_path = video_path
 463 |         if input_ext not in supported_formats:
 464 |             print_step(EMOJI_TRANSCRIBE, "Converting to supported format...")
 465 |             audio_path = convert_audio_format(video_path, 'mp3', bitrate='32k', mono=True)
 466 |             if not audio_path:
 467 |                 return False
 468 |         
 469 |         # Check file size (25MB limit)
 470 |         MAX_SIZE_MB = 25
 471 |         file_size_mb = os.path.getsize(audio_path) / (1024 * 1024)
 472 |         
 473 |         if file_size_mb > MAX_SIZE_MB:
 474 |             print_step(EMOJI_TRANSCRIBE, f"File too large ({file_size_mb:.1f}MB), optimizing...")
 475 |             
 476 |             # Try aggressive compression first
 477 |             compressed_path = convert_audio_format(audio_path, 'mp3', bitrate='32k', mono=True)
 478 |             if not compressed_path:
 479 |                 return False
 480 |             
 481 |             # Check if compression was enough
 482 |             compressed_size_mb = os.path.getsize(compressed_path) / (1024 * 1024)
 483 |             if compressed_size_mb > MAX_SIZE_MB:
 484 |                 print_step(EMOJI_TRANSCRIBE, "Still too large, splitting into chunks...")
 485 |                 chunk_paths = split_audio_into_chunks(compressed_path, chunk_size_mb=20)
 486 |             else:
 487 |                 chunk_paths = [compressed_path]
 488 |         else:
 489 |             chunk_paths = [audio_path]
 490 |         
 491 |         if not chunk_paths:
 492 |             return False
 493 |         
 494 |         # Transcribe each chunk
 495 |         transcripts = []
 496 |         for chunk_path in chunk_paths:
 497 |             # Verify chunk size
 498 |             chunk_size_mb = os.path.getsize(chunk_path) / (1024 * 1024)
 499 |             if chunk_size_mb > MAX_SIZE_MB:
 500 |                 print_error(f"Chunk too large: {chunk_size_mb:.1f}MB")
 501 |                 continue
 502 |                 
 503 |             with open(chunk_path, "rb") as audio_file:
 504 |                 transcription = client.audio.transcriptions.create(
 505 |                     model="whisper-1",
 506 |                     file=audio_file
 507 |                 )
 508 |                 transcripts.append(transcription.text)
 509 |         
 510 |         if not transcripts:
 511 |             print_error("No successful transcriptions")
 512 |             return False
 513 |         
 514 |         # Combine transcripts
 515 |         full_transcript = " ".join(transcripts)
 516 |         
 517 |         # Write transcript to file
 518 |         transcript_path = os.path.splitext(video_path)[0] + '.txt'
 519 |         with open(transcript_path, 'w', encoding='utf-8') as f:
 520 |             f.write(full_transcript)
 521 |         
 522 |         # Clean up chunks if we created them
 523 |         if len(chunk_paths) > 1:
 524 |             chunks_dir = os.path.dirname(chunk_paths[0])
 525 |             for chunk in chunk_paths:
 526 |                 os.remove(chunk)
 527 |             os.rmdir(chunks_dir)
 528 |         
 529 |         return True
 530 |         
 531 |     except ImportError:
 532 |         print_error("OpenAI package not found. Please install it with:")
 533 |         print(f"{Fore.YELLOW}pip install openai{Style.RESET_ALL}")
 534 |         return False
 535 |     except Exception as e:
 536 |         print_error(f"OpenAI Whisper error: {e}")
 537 |         return False
 538 | 
 539 | def transcribe_video(video_path, use_fast_whisper=False, use_replicate=False, language=None):
 540 |     """Transcribe video using chosen transcription method"""
 541 |     if use_replicate:
 542 |         return transcribe_with_replicate(video_path, language)
 543 |     elif use_fast_whisper:
 544 |         return transcribe_with_fast_whisper(video_path)
 545 |     else:
 546 |         return transcribe_with_openai_whisper(video_path)  # Default to OpenAI API
 547 | 
 548 | def summarize_with_claude(transcript, metadata="", language="english"):
 549 |     """Generate summary using Claude"""
 550 |     # Get video duration from metadata or use default
 551 |     try:
 552 |         duration = float(re.search(r'Duration: (\d+\.\d+)', metadata).group(1))
 553 |     except:
 554 |         duration = 600  # Default to 10 minutes
 555 |     
 556 |     targets = calculate_target_length(duration)
 557 |     
 558 |     # Read the prompt template
 559 |     try:
 560 |         with open('prompt.txt', 'r', encoding='utf-8') as f:
 561 |             prompt_template = f.read()
 562 |     except Exception as e:
 563 |         print_error(f"Error reading prompt template: {e}")
 564 |         return None
 565 |     
 566 |     @ell.simple(model="claude-3-5-sonnet-20241022", temperature=0.3, max_tokens=8192)
 567 |     def get_summary(content: str, target_words: int) -> str:
 568 |         # Format the prompt template with the target language
 569 |         formatted_prompt = prompt_template.format(language=language)
 570 |         
 571 |         return f"""{formatted_prompt}
 572 | 
 573 |         Target length: {target_words} words.
 574 |         
 575 |         Transcript:
 576 |         {content}"""
 577 |     
 578 |     try:
 579 |         return get_summary(f"{transcript}\n\nMetadata:\n{metadata}", targets['summary'])
 580 |     except Exception as e:
 581 |         print_error(f"Error generating summary: {e}")
 582 |         return None
 583 | 
 584 | def get_video_metadata(url):
 585 |     """Get video metadata using yt-dlp"""
 586 |     try:
 587 |         clean_url = clean_youtube_url(url)
 588 |         print_step(EMOJI_SEARCH, "Fetching video metadata...")
 589 |         
 590 |         result = subprocess.run([
 591 |             'yt-dlp',
 592 |             '--dump-json',
 593 |             '--no-playlist',
 594 |             clean_url
 595 |         ], check=True, capture_output=True, text=True)
 596 |         
 597 |         metadata = json.loads(result.stdout)
 598 |         header_parts = ["---"]
 599 |         
 600 |         # Add metadata fields only if they exist
 601 |         if title := metadata.get('title'):
 602 |             header_parts.append(f"Title: {title}")
 603 |         
 604 |         if channel := metadata.get('channel'):
 605 |             header_parts.append(f"Channel: {channel}")
 606 |         
 607 |         if upload_date := metadata.get('upload_date'):
 608 |             header_parts.append(f"Upload Date: {upload_date}")
 609 |         
 610 |         if duration := metadata.get('duration_string'):
 611 |             header_parts.append(f"Duration: {duration}")
 612 |         
 613 |         if views := metadata.get('view_count'):
 614 |             header_parts.append(f"Views: {views:,}")
 615 |         
 616 |         if description := metadata.get('description'):
 617 |             # Process description with Ell
 618 |             processed = process_metadata_description(description)
 619 |             header_parts.append(f"Description: {processed}")
 620 |         
 621 |         if tags := metadata.get('tags'):
 622 |             # Process tags with Ell
 623 |             processed_tags = process_metadata_description(' '.join(tags))
 624 |             header_parts.append(f"Tags: {processed_tags}")
 625 |         
 626 |         header_parts.extend(["---", ""])
 627 |         
 628 |         return '\n'.join(header_parts)
 629 |     except Exception as e:
 630 |         print_error(f"Failed to fetch metadata: {e}")
 631 |         return ""
 632 | 
 633 | def convert_audio_format(input_path, output_format='mp3', bitrate='192k', mono=False):
 634 |     """Convert audio to specified format using FFmpeg"""
 635 |     try:
 636 |         print_step(EMOJI_TRANSCRIBE, f"Converting audio to {output_format} ({bitrate}{'mono' if mono else ''})...")
 637 |         output_path = str(Path(input_path).with_suffix(f'.{output_format}'))
 638 |         
 639 |         # Build FFmpeg command
 640 |         cmd = [
 641 |             'ffmpeg',
 642 |             '-i', input_path,
 643 |             '-y',  # Overwrite output file if exists
 644 |             '-vn',  # No video
 645 |             '-acodec', 'libmp3lame' if output_format == 'mp3' else output_format,
 646 |             '-ar', '44100',  # Sample rate
 647 |             '-ac', '1' if mono else '2',  # Mono/Stereo
 648 |             '-b:a', bitrate,  # Bitrate
 649 |             output_path
 650 |         ]
 651 |         
 652 |         # Run FFmpeg with error output
 653 |         result = subprocess.run(cmd, check=True, capture_output=True, text=True)
 654 |         
 655 |         # Verify file exists and is not empty
 656 |         if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
 657 |             print_error("FFmpeg output file is missing or empty")
 658 |             return None
 659 |         
 660 |         return output_path
 661 |         
 662 |     except subprocess.CalledProcessError as e:
 663 |         print_error(f"FFmpeg conversion failed: {e.stderr}")
 664 |         return None
 665 |     except Exception as e:
 666 |         print_error(f"Audio conversion error: {e}")
 667 |         return None
 668 | 
 669 | def process_metadata_description(metadata):
 670 |     """Process metadata description using Ell"""
 671 |     
 672 |     @ell.simple(model="claude-3-5-sonnet-20241022", temperature=0.3, max_tokens=1000)
 673 |     def summarize_metadata(content: str) -> str:
 674 |         """You are a metadata processor that creates concise video descriptions.
 675 |         Rules:
 676 |         1. Description must be a single line, max 3 semicolon-separated points
 677 |         2. Tags must be grouped by theme with parentheses, max 5 groups
 678 |         3. Remove all URLs, social media links, and promotional text
 679 |         4. Focus only on plot/content-relevant information
 680 |         5. Use semicolons to separate multiple plot points
 681 |         6. Group related tags inside parentheses
 682 |         7. Exclude generic/redundant tags"""
 683 |         
 684 |         return f"""Process this video metadata into a concise format:
 685 | 1. Extract main plot points (max 3, separated by semicolons)
 686 | 2. Group related tags (max 5 groups, use parentheses)
 687 | 
 688 | Metadata:
 689 | {content}
 690 | 
 691 | Format output as:
 692 | Description: [plot point 1]; [plot point 2]; [plot point 3]
 693 | Tags: [group1], [group2 (item1, item2)], [group3], [group4 (items...)]"""
 694 | 
 695 |     try:
 696 |         result = summarize_metadata(metadata)
 697 |         return result
 698 |     except Exception as e:
 699 |         print_error(f"Error processing metadata: {e}")
 700 |         return metadata
 701 | 
 702 | def convert_to_podcast_script(summary, language="english", duration=None):
 703 |     """Convert summary to podcast script using Claude"""
 704 |     if duration is None:
 705 |         # Estimate duration from summary length
 706 |         duration = len(summary.split()) * 0.5  # rough estimate: 0.5 seconds per word
 707 |     
 708 |     targets = calculate_target_length(duration)
 709 |     
 710 |     @ell.simple(model="claude-3-5-sonnet-20241022", temperature=0.3, max_tokens=4096)
 711 |     def get_podcast(content: str, voice1: str, voice2: str, target_lang: str) -> str:
 712 |         return f"""Convert this summary into an engaging podcast script with two hosts.
 713 |         Target length: {targets['podcast']} words total.
 714 |         Output language: {target_lang}
 715 |         Use these voice names for the hosts: {voice1.upper()} and {voice2.upper()}.
 716 | 
 717 |         Rules:
 718 |         1. Format each line as: "VOICE_NAME: <dialogue>"
 719 |            Example: "{voice1.upper()}: That's an interesting point!"
 720 |         2. Use only {voice1.upper()} and {voice2.upper()} consistently
 721 |         3. Make it conversational but informative
 722 |         4. Keep all dialogue in {target_lang} language
 723 |         5. Include brief reactions and interactions between hosts
 724 |         6. Start with one host introducing the topic
 725 |         7. End with the other host wrapping up
 726 |         8. Keep the original insights and information
 727 |         9. Avoid meta-commentary or introductions
 728 |         10. Do NOT use typical AI buzzwords: dive in, delve into, fascinating,etc.
 729 |         10. Come up with original beginning (use ending for that). Do NOT start with "Today we are..."
 730 | 
 731 |         Available voices:
 732 |         {json.dumps(AVAILABLE_VOICES, indent=2)}
 733 | 
 734 |         Summary to convert:
 735 |         {content}"""
 736 |     
 737 |     try:
 738 |         # Randomly select two different voices
 739 |         import random
 740 |         available_voices = list(AVAILABLE_VOICES.keys())
 741 |         host1_voice = random.choice(available_voices)
 742 |         available_voices.remove(host1_voice)
 743 |         host2_voice = random.choice(available_voices)
 744 |         
 745 |         return get_podcast(summary, host1_voice, host2_voice, language)
 746 |     except Exception as e:
 747 |         print_error(f"Error converting to podcast script: {e}")
 748 |         return None
 749 | 
 750 | def generate_host_audio(text, host_config, output_path):
 751 |     """Generate audio for a specific host"""
 752 |     try:
 753 |         if not os.getenv("OPENAI_API_KEY"):
 754 |             print_error("OPENAI_API_KEY environment variable not set")
 755 |             return False
 756 |             
 757 |         client = OpenAI()
 758 |         print_step(EMOJI_AUDIO, f"Generating audio for {host_config['name']}...")
 759 |         
 760 |         with client.audio.speech.with_streaming_response.create(
 761 |             model="tts-1",
 762 |             voice=host_config['voice'],
 763 |             input=text
 764 |         ) as response:
 765 |             response.stream_to_file(output_path)
 766 |         return True
 767 |     except Exception as e:
 768 |         print_error(f"Error generating audio: {e}")
 769 |         return False
 770 | 
 771 | def combine_audio_files(audio_files, output_file):
 772 |     """Combine multiple audio files with crossfade"""
 773 |     try:
 774 |         print_step(EMOJI_AUDIO, "Combining audio files...")
 775 |         
 776 |         if len(audio_files) < 2:
 777 |             print_error("Need at least two audio files to combine.")
 778 |             return False
 779 |         
 780 |         # Ensure output directory exists
 781 |         output_file = Path(output_file)
 782 |         output_file.parent.mkdir(parents=True, exist_ok=True)
 783 |         
 784 |         # Build filter complex for crossfade
 785 |         filter_parts = []
 786 |         n = len(audio_files)
 787 |         
 788 |         # Label all inputs
 789 |         labels = [f'[{i}:a]' for i in range(n)]
 790 |         
 791 |         # Build the filter chain
 792 |         current_label = 0
 793 |         next_tmp = n  # Start temporary labels after input labels
 794 |         
 795 |         for i in range(n-1):
 796 |             if i == 0:
 797 |                 # First merge
 798 |                 filter_parts.append(f'{labels[i]}{labels[i+1]}acrossfade=d=0.5:c1=tri:c2=tri[tmp{next_tmp}]')
 799 |                 current_label = next_tmp
 800 |                 next_tmp += 1
 801 |             else:
 802 |                 # Merge result with next input
 803 |                 filter_parts.append(f'[tmp{current_label}]{labels[i+1]}acrossfade=d=0.5:c1=tri:c2=tri[tmp{next_tmp}]')
 804 |                 current_label = next_tmp
 805 |                 next_tmp += 1
 806 |         
 807 |         # Create input arguments
 808 |         inputs = []
 809 |         for audio_file in audio_files:
 810 |             inputs.extend(['-i', str(audio_file)])
 811 |         
 812 |         # Build final command
 813 |         cmd = [
 814 |             'ffmpeg', '-y',
 815 |             *inputs,
 816 |             '-filter_complex',
 817 |             ';'.join(filter_parts),
 818 |             '-map', f'[tmp{current_label}]',
 819 |             '-ac', '2',  # Convert to stereo
 820 |             '-ar', '44100',  # Standard sample rate
 821 |             str(output_file)
 822 |         ]
 823 |         
 824 |         # Run FFmpeg
 825 |         result = subprocess.run(cmd, capture_output=True, text=True)
 826 |         if result.returncode != 0:
 827 |             print_error(f"FFmpeg error: {result.stderr}")
 828 |             return False
 829 |             
 830 |         return True
 831 |         
 832 |     except Exception as e:
 833 |         print_error(f"Error combining audio files: {e}")
 834 |         return False
 835 | 
 836 | def generate_podcast_audio(script, output_file):
 837 |     """Generate podcast audio with detected voices"""
 838 |     temp_files = []
 839 |     voice_configs = {}  # Will store voice configs as we discover them
 840 |     
 841 |     try:
 842 |         with tempfile.TemporaryDirectory() as temp_dir:
 843 |             # Process each line of the script
 844 |             for i, line in enumerate(script.split('\n')):
 845 |                 if not line.strip():
 846 |                     continue
 847 |                     
 848 |                 # Parse voice and text
 849 |                 try:
 850 |                     voice_name, text = line.split(':', 1)
 851 |                     voice_name = voice_name.strip().lower()
 852 |                     text = text.strip()
 853 |                 except ValueError:
 854 |                     continue
 855 |                 
 856 |                 # Skip if not a valid voice
 857 |                 if voice_name not in AVAILABLE_VOICES:
 858 |                     continue
 859 |                 
 860 |                 # Create voice config if not seen before
 861 |                 if voice_name not in voice_configs:
 862 |                     voice_configs[voice_name] = {
 863 |                         "voice": voice_name,
 864 |                         "name": voice_name.capitalize()
 865 |                     }
 866 |                 
 867 |                 # Generate audio for this line
 868 |                 temp_file = os.path.join(temp_dir, f"part_{i:03d}.mp3")
 869 |                 if generate_host_audio(text, voice_configs[voice_name], temp_file):
 870 |                     temp_files.append(temp_file)
 871 |             
 872 |             # Combine all audio files
 873 |             if temp_files:
 874 |                 return combine_audio_files(temp_files, output_file)
 875 |             
 876 |         return False
 877 |     except Exception as e:
 878 |         print_error(f"Error generating podcast: {e}")
 879 |         return False
 880 | 
 881 | def sanitize_filename(filename):
 882 |     """Convert URL or video ID to safe filename"""
 883 |     # Extract video ID from URL if present
 884 |     if 'youtube.com' in filename or 'youtu.be' in filename:
 885 |         try:
 886 |             if 'youtu.be' in filename:
 887 |                 video_id = filename.split('/')[-1].split('?')[0]
 888 |             else:
 889 |                 query = urllib.parse.urlparse(filename).query
 890 |                 params = urllib.parse.parse_qs(query)
 891 |                 video_id = params['v'][0]
 892 |             return video_id
 893 |         except:
 894 |             pass
 895 |     
 896 |     # Handle query parameters
 897 |     if '?' in filename:
 898 |         parts = filename.split('?')
 899 |         filename = parts[0]
 900 |         params = parts[1].replace('=', '_').replace('&', '_')
 901 |         filename = f"{filename}_{params}"
 902 |     
 903 |     # Count trailing special characters
 904 |     trailing_specials = len(filename) - len(filename.rstrip(r'\\/:*"<>|!'))
 905 |     
 906 |     # First replace special characters with underscores
 907 |     clean = re.sub(r'[\\/:*"<>|]', '_', filename)  # Replace invalid chars with underscore
 908 |     
 909 |     # Replace spaces and other non-alphanumeric chars (except dashes) with underscore
 910 |     clean = re.sub(r'[^\w\-]', '_', clean)
 911 |     
 912 |     # Replace multiple consecutive underscores with a single one
 913 |     clean = re.sub(r'_+', '_', clean)
 914 |     
 915 |     # Remove leading underscores
 916 |     clean = clean.lstrip('_')
 917 |     
 918 |     # Add single trailing underscore if original had special chars at the end
 919 |     if trailing_specials > 0:
 920 |         clean = clean.rstrip('_') + '_'
 921 |     
 922 |     # Preserve casing from original filename
 923 |     if filename.isupper():
 924 |         clean = clean.upper()
 925 |     elif not filename.islower():  # If mixed case or title case
 926 |         parts = clean.split('_')
 927 |         clean = '_'.join(p.capitalize() for p in parts)
 928 |     
 929 |     return clean
 930 | 
 931 | def generate_video_segments(podcast_script, num_segments=5, seed=42):
 932 |     """Generate video prompts that match podcast content and flow"""
 933 |     
 934 |     @ell.simple(model="claude-3-5-sonnet-20241022", temperature=0.3, max_tokens=2048)
 935 |     def get_video_prompts(script: str, num: int) -> str:
 936 |         return f"""Create {num} detailed video prompts that directly visualize the key moments from this podcast conversation.
 937 |         Each prompt must be under 500 characters long and create a clear, engaging scene.
 938 | 
 939 |         Podcast Script:
 940 |         {script}
 941 | 
 942 |         Guidelines for Each Prompt:
 943 |         1. Scene Content:
 944 |            - Focus on the specific topic being discussed
 945 |            - Show real environments and objects
 946 |            - Include relevant details mentioned by hosts
 947 |            - Keep descriptions concise but clear
 948 | 
 949 |         2. Visual Style:
 950 |            - Professional documentary style
 951 |            - Clean, high-quality visuals
 952 |            - Natural lighting
 953 |            - Clear focal points
 954 | 
 955 |         3. Required Structure (keep under 500 chars):
 956 |            "A [brief location] shows [main subject/action]. [Supporting details]. [Human elements] [interact with] [key concept]. [Lighting] highlights [focus]. [Camera angle]."
 957 | 
 958 |         4. Key Points:
 959 |            - Be specific but concise
 960 |            - Use concrete imagery
 961 |            - Match the conversation
 962 |            - Stay under length limit
 963 | 
 964 |         Instructions:
 965 |         1. Read the section
 966 |         2. Identify key concept
 967 |         3. Create concise scene
 968 |         4. Check character count
 969 |         5. Trim if needed
 970 | 
 971 |         Return a properly formatted JSON array of strings like this:
 972 |         [
 973 |             "First scene (under 500 chars)...",
 974 |             "Second scene (under 500 chars)..."
 975 |         ]
 976 | 
 977 |         Important: Use double quotes and ensure valid JSON format."""
 978 | 
 979 |     try:
 980 |         # Generate prompts and ensure valid JSON
 981 |         response = get_video_prompts(podcast_script, num_segments)
 982 |         
 983 |         # Parse JSON
 984 |         prompts = json.loads(response)
 985 |         
 986 |         if not isinstance(prompts, list) or len(prompts) != num_segments:
 987 |             raise ValueError(f"Invalid prompt format - must be array of exactly {num_segments} strings")
 988 |         
 989 |         # Validate and truncate prompts
 990 |         MAX_LENGTH = 500  # Keep some buffer below 512
 991 |         processed_prompts = []
 992 |         
 993 |         for i, prompt in enumerate(prompts, 1):
 994 |             if not isinstance(prompt, str):
 995 |                 raise ValueError(f"Prompt {i} must be a string")
 996 |             
 997 |             # Ensure minimum detail
 998 |             if len(prompt.split()) < 20:
 999 |                 raise ValueError(f"Prompt {i} is too short - needs more detail")
1000 |             
1001 |             # Truncate if too long
1002 |             if len(prompt) > MAX_LENGTH:
1003 |                 # Find last complete sentence that fits
1004 |                 sentences = prompt.split('.')
1005 |                 truncated = ''
1006 |                 for sentence in sentences:
1007 |                     if len(truncated + sentence + '.') <= MAX_LENGTH:
1008 |                         truncated += sentence + '.'
1009 |                     else:
1010 |                         break
1011 |                 prompt = truncated.strip()
1012 |             
1013 |             processed_prompts.append(prompt)
1014 |         
1015 |         return processed_prompts
1016 |         
1017 |     except json.JSONDecodeError as e:
1018 |         print_error(f"Error parsing JSON response: {e}")
1019 |         print_error(f"Raw response: {response[:200]}...")
1020 |         return None
1021 |     except Exception as e:
1022 |         print_error(f"Error generating video prompts: {e}")
1023 |         return None
1024 | 
1025 | def upload_image_to_uguu(image_path, max_retries=3):
1026 |     """Upload image to uguu.se and get URL"""
1027 |     try:
1028 |         url = "https://uguu.se/upload"
1029 |         
1030 |         # Prepare the file with proper format
1031 |         with open(image_path, 'rb') as f:
1032 |             files = {
1033 |                 'files[]': (
1034 |                     Path(image_path).name,
1035 |                     f,
1036 |                     'image/jpeg'
1037 |                 )
1038 |             }
1039 |             
1040 |             # Try upload with retries
1041 |             for attempt in range(max_retries):
1042 |                 try:
1043 |                     response = requests.post(
1044 |                         url, 
1045 |                         files=files,
1046 |                         timeout=30
1047 |                     )
1048 |                     
1049 |                     if response.status_code != 200:
1050 |                         print_error(f"Upload failed with status {response.status_code}")
1051 |                         if attempt < max_retries - 1:
1052 |                             time.sleep(1)
1053 |                             continue
1054 |                         return None
1055 |                     
1056 |                     # Parse JSON response
1057 |                     try:
1058 |                         result = response.json()
1059 |                         if (result.get('success') and 
1060 |                             isinstance(result.get('files'), list) and 
1061 |                             result['files'] and 
1062 |                             'url' in result['files'][0]):
1063 |                             return result['files'][0]['url']
1064 |                     except (ValueError, KeyError, AttributeError):
1065 |                         # If JSON parsing fails or format is unexpected, try text response
1066 |                         text = response.text.strip()
1067 |                         if text.startswith('http'):
1068 |                             return text
1069 |                     
1070 |                     print_error(f"Invalid response format: {response.text[:100]}")
1071 |                     if attempt < max_retries - 1:
1072 |                         time.sleep(1)
1073 |                         continue
1074 |                     return None
1075 |                     
1076 |                 except requests.exceptions.RequestException as e:
1077 |                     print_error(f"Upload attempt {attempt + 1} failed: {e}")
1078 |                     if attempt < max_retries - 1:
1079 |                         time.sleep(1)
1080 |                         continue
1081 |                     return None
1082 |             
1083 |             return None
1084 |             
1085 |     except Exception as e:
1086 |         print_error(f"Error uploading image: {e}")
1087 |         return None
1088 | 
1089 | def generate_video_segments_with_luma(prompts, output_dir, base_images=None, podcast_script=None):
1090 |     """Generate video segments using LumaAI with optional base images"""
1091 |     if not luma_client:
1092 |         print_error("LUMA_API_KEY environment variable not set")
1093 |         return None
1094 |     
1095 |     video_paths = []
1096 |     for i, prompt in enumerate(prompts):
1097 |         try:
1098 |             print_step(EMOJI_VIDEO, f"Generating video segment {i+1}/{len(prompts)}...")
1099 |             
1100 |             # Set up generation parameters
1101 |             generation_params = {
1102 |                 "prompt": prompt,
1103 |                 "aspect_ratio": "16:9",
1104 |                 "loop": False
1105 |             }
1106 |             
1107 |             # Add base image if available
1108 |             if base_images and i < len(base_images):
1109 |                 image_url = upload_image_to_uguu(base_images[i])
1110 |                 if not image_url:
1111 |                     print_error(f"Failed to upload image {i+1}, continuing without image")
1112 |                 else:
1113 |                     generation_params["keyframes"] = {
1114 |                         "frame0": {
1115 |                             "type": "image",
1116 |                             "url": image_url
1117 |                         }
1118 |                     }
1119 |             
1120 |             # Try generation with retries and prompt regeneration
1121 |             max_retries = 3
1122 |             max_prompt_retries = 3
1123 |             generation = None
1124 |             
1125 |             for prompt_attempt in range(max_prompt_retries):
1126 |                 try:
1127 |                     # Create generation with retries
1128 |                     for attempt in range(max_retries):
1129 |                         try:
1130 |                             generation = luma_client.generations.create(**generation_params)
1131 |                             break
1132 |                         except Exception as e:
1133 |                             if attempt < max_retries - 1:
1134 |                                 print_error(f"Generation attempt {attempt + 1} failed: {e}, retrying...")
1135 |                                 time.sleep(2)
1136 |                             else:
1137 |                                 raise
1138 |                     
1139 |                     if not generation:
1140 |                         raise Exception("Failed to create generation after retries")
1141 |                     
1142 |                     # Poll for completion with timeout
1143 |                     start_time = time.time()
1144 |                     timeout = 300
1145 |                     completed = False
1146 |                     moderation_failed = False
1147 |                     
1148 |                     while not completed and time.time() - start_time < timeout:
1149 |                         try:
1150 |                             generation = luma_client.generations.get(id=generation.id)
1151 |                             
1152 |                             if generation.state == "completed":
1153 |                                 completed = True
1154 |                                 break
1155 |                             elif generation.state == "failed":
1156 |                                 error_msg = getattr(generation, 'failure_reason', 'Unknown error')
1157 |                                 if "moderation failed" in error_msg.lower():
1158 |                                     moderation_failed = True
1159 |                                     break
1160 |                                 # Add regeneration for any failure
1161 |                                 if prompt_attempt < max_prompt_retries - 1:
1162 |                                     print_error(f"Generation failed: {error_msg}, regenerating prompt...")
1163 |                                     new_prompts = generate_video_segments(podcast_script, num_segments=1)
1164 |                                     if new_prompts and len(new_prompts) > 0:
1165 |                                         generation_params["prompt"] = new_prompts[0]
1166 |                                         break
1167 |                                 raise Exception(f"Video generation failed: {error_msg}")
1168 |                             elif generation.state == "canceled":
1169 |                                 raise Exception("Video generation was cancelled")
1170 |                             else:
1171 |                                 print_step(EMOJI_VIDEO, f"Generating segment {i+1}...", color=Fore.YELLOW)
1172 |                                 time.sleep(3)
1173 |                                 
1174 |                         except Exception as e:
1175 |                             print_error(f"Error checking generation status: {e}")
1176 |                             time.sleep(3)
1177 |                     
1178 |                     if moderation_failed:
1179 |                         if prompt_attempt < max_prompt_retries - 1:
1180 |                             print_error("Moderation failed, regenerating prompt...")
1181 |                             # Regenerate prompt for this segment
1182 |                             new_prompts = generate_video_segments(podcast_script, num_segments=1)
1183 |                             if new_prompts and len(new_prompts) > 0:
1184 |                                 generation_params["prompt"] = new_prompts[0]
1185 |                                 continue
1186 |                         raise Exception("Failed to generate acceptable prompt after retries")
1187 |                     
1188 |                     if not completed:
1189 |                         raise Exception(f"Generation timed out after {timeout} seconds")
1190 |                     
1191 |                     # If we get here, generation was successful
1192 |                     break
1193 |                     
1194 |                 except Exception as e:
1195 |                     if prompt_attempt < max_prompt_retries - 1:
1196 |                         print_error(f"Prompt attempt {prompt_attempt + 1} failed: {e}, trying new prompt...")
1197 |                         continue
1198 |                     raise
1199 |             
1200 |             # Download video with retries
1201 |             max_download_retries = 3
1202 |             for attempt in range(max_download_retries):
1203 |                 try:
1204 |                     output_path = output_dir / f"segment_{i:02d}.mp4"
1205 |                     response = requests.get(generation.assets.video, stream=True, timeout=30)
1206 |                     response.raise_for_status()
1207 |                     
1208 |                     with open(output_path, 'wb') as file:
1209 |                         for chunk in response.iter_content(chunk_size=8192):
1210 |                             file.write(chunk)
1211 |                             
1212 |                     if output_path.stat().st_size == 0:
1213 |                         raise Exception("Downloaded file is empty")
1214 |                         
1215 |                     video_paths.append(output_path)
1216 |                     break
1217 |                     
1218 |                 except Exception as e:
1219 |                     if attempt < max_download_retries - 1:
1220 |                         print_error(f"Download attempt {attempt + 1} failed: {e}, retrying...")
1221 |                         time.sleep(2)
1222 |                     else:
1223 |                         raise
1224 |             
1225 |         except Exception as e:
1226 |             print_error(f"Error generating video segment {i+1}: {e}")
1227 |             return None
1228 |     
1229 |     return video_paths
1230 | 
1231 | def combine_video_segments(video_paths, target_duration, output_path):
1232 |     """Combine video segments and adjust to match target duration"""
1233 |     try:
1234 |         print_step(EMOJI_VIDEO, "Combining video segments...")
1235 |         
1236 |         # Create temporary file for concatenation
1237 |         with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
1238 |             # Write input files list with absolute paths
1239 |             for video_path in video_paths:
1240 |                 # Convert to absolute path
1241 |                 abs_path = Path(video_path).resolve()
1242 |                 if not abs_path.exists():
1243 |                     raise FileNotFoundError(f"Video file not found: {abs_path}")
1244 |                 f.write(f"file '{abs_path}'\n")
1245 |             temp_list = f.name
1246 |         
1247 |         try:
1248 |             # Create output directory if it doesn't exist
1249 |             output_path = Path(output_path)
1250 |             output_path.parent.mkdir(parents=True, exist_ok=True)
1251 |             
1252 |             # Concatenate videos
1253 |             temp_concat = output_path.parent / 'temp_concat.mp4'
1254 |             subprocess.run([
1255 |                 'ffmpeg', '-y', '-f', 'concat', '-safe', '0',
1256 |                 '-i', temp_list, '-c', 'copy', str(temp_concat)
1257 |             ], check=True, capture_output=True)
1258 |             
1259 |             # Get concatenated video duration
1260 |             probe = subprocess.run([
1261 |                 'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
1262 |                 '-of', 'default=noprint_wrappers=1:nokey=1', str(temp_concat)
1263 |             ], capture_output=True, text=True)
1264 |             current_duration = float(probe.stdout.strip())
1265 |             
1266 |             # Calculate speed factor to stretch video to match target duration
1267 |             # If current_duration is 30s and target is 60s, we want speed_factor = 2
1268 |             # to make the video twice as slow
1269 |             speed_factor = target_duration / current_duration
1270 |             
1271 |             subprocess.run([
1272 |                 'ffmpeg', '-y', '-i', str(temp_concat),
1273 |                 '-filter:v', f'setpts={speed_factor}*PTS',
1274 |                 '-an', str(output_path)
1275 |             ], check=True, capture_output=True)
1276 |             
1277 |             return True
1278 |             
1279 |         finally:
1280 |             # Clean up temporary files
1281 |             os.unlink(temp_list)
1282 |             if temp_concat.exists():
1283 |                 os.unlink(temp_concat)
1284 |             
1285 |     except subprocess.CalledProcessError as e:
1286 |         print_error(f"FFmpeg error: {e.stderr.decode()}")
1287 |         return False
1288 |     except Exception as e:
1289 |         print_error(f"Error combining videos: {e}")
1290 |         return False
1291 | 
1292 | def get_audio_duration(audio_path):
1293 |     """Get duration of audio file in seconds"""
1294 |     try:
1295 |         probe = subprocess.run([
1296 |             'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
1297 |             '-of', 'default=noprint_wrappers=1:nokey=1', audio_path
1298 |         ], capture_output=True, text=True)
1299 |         return float(probe.stdout.strip())
1300 |     except:
1301 |         return None
1302 | 
1303 | def combine_audio_video(video_path, audio_path, output_path):
1304 |     """Combine video with audio track using ffmpeg and add fade out"""
1305 |     try:
1306 |         print_step(EMOJI_VIDEO, "Combining video and audio...")
1307 |         
1308 |         # Get video duration
1309 |         probe = ffmpeg.probe(video_path)
1310 |         duration = float(probe['streams'][0]['duration'])
1311 |         fade_start = duration - 1  # Start fade 1 second before end
1312 |         
1313 |         # Create filter complex for fade out
1314 |         # Apply fade filter directly to video stream
1315 |         stream = (
1316 |             ffmpeg
1317 |             .input(video_path)
1318 |             .filter('fade', type='out', start_time=fade_start, duration=1)
1319 |             .output(
1320 |                 ffmpeg.input(audio_path),
1321 |                 str(output_path),
1322 |                 acodec='aac',
1323 |                 strict='experimental',
1324 |                 **{
1325 |                     'filter_complex_threads': 1,
1326 |                     'max_muxing_queue_size': 1024
1327 |                 }
1328 |             )
1329 |         )
1330 |         
1331 |         # Run ffmpeg with overwrite and error handling
1332 |         try:
1333 |             ffmpeg.run(
1334 |                 stream,
1335 |                 overwrite_output=True,
1336 |                 capture_stdout=True,
1337 |                 capture_stderr=True
1338 |             )
1339 |             return True
1340 |             
1341 |         except ffmpeg.Error as e:
1342 |             if e.stderr:
1343 |                 print_error(f"FFmpeg error: {e.stderr.decode()}")
1344 |             if e.stdout:
1345 |                 print_error(f"FFmpeg output: {e.stdout.decode()}")
1346 |             return False
1347 |         
1348 |     except Exception as e:
1349 |         print_error(f"Error combining audio and video: {e}")
1350 |         return False
1351 | 
1352 | def generate_video_segments_with_runway(prompts, output_dir, base_images=None, timeout=900, podcast_script=None):
1353 |     """Generate video segments using RunwayML with optional base images"""
1354 |     if not runway_client:
1355 |         print_error("RUNWAYML_API_SECRET environment variable not set")
1356 |         return None
1357 |     
1358 |     video_paths = []
1359 |     for i, prompt in enumerate(prompts):
1360 |         try:
1361 |             print_step(EMOJI_VIDEO, f"Generating video segment {i+1}/{len(prompts)}...")
1362 |             
1363 |             # Try generation with retries and prompt regeneration
1364 |             max_retries = 3
1365 |             max_prompt_retries = 3
1366 |             
1367 |             for prompt_attempt in range(max_prompt_retries):
1368 |                 try:
1369 |                     # Use base image if available, otherwise create gradient
1370 |                     if base_images and i < len(base_images):
1371 |                         with open(base_images[i], 'rb') as f:
1372 |                             image_bytes = f.read()
1373 |                         image_b64 = base64.b64encode(image_bytes).decode('utf-8')
1374 |                         image_uri = f"data:image/jpeg;base64,{image_b64}"
1375 |                     else:
1376 |                         temp_image = output_dir / f"input_{i:02d}.png"
1377 |                         gradient = create_gradient_image()
1378 |                         gradient.save(temp_image)
1379 |                         with open(temp_image, 'rb') as f:
1380 |                             image_bytes = f.read()
1381 |                         image_b64 = base64.b64encode(image_bytes).decode('utf-8')
1382 |                         image_uri = f"data:image/png;base64,{image_b64}"
1383 |                         temp_image.unlink()
1384 |                     
1385 |                     # Create task with current prompt
1386 |                     task = runway_client.image_to_video.create(
1387 |                         model='gen3a_turbo',
1388 |                         prompt_text=prompt,
1389 |                         prompt_image=image_uri,
1390 |                         duration=10,
1391 |                         ratio="1280:768"
1392 |                     )
1393 |                     
1394 |                     # Poll for completion with timeout
1395 |                     start_time = time.time()
1396 |                     max_retries = 180
1397 |                     retries = 0
1398 |                     moderation_failed = False
1399 |                     
1400 |                     while retries < max_retries:
1401 |                         if time.time() - start_time > timeout:
1402 |                             print_error(f"Timeout after {timeout} seconds")
1403 |                             try:
1404 |                                 runway_client.tasks.cancel(id=task.id)
1405 |                             except:
1406 |                                 pass
1407 |                             return None
1408 |                         
1409 |                         try:
1410 |                             task_status = runway_client.tasks.retrieve(id=task.id)
1411 |                         except Exception as e:
1412 |                             print_error(f"Error retrieving task status: {e}")
1413 |                             time.sleep(5)
1414 |                             retries += 1
1415 |                             continue
1416 |                         
1417 |                         if task_status.status == "SUCCEEDED":
1418 |                             if not hasattr(task_status, 'output') or not task_status.output:
1419 |                                 print_error("No output in completed task")
1420 |                                 return None
1421 |                             
1422 |                             video_urls = task_status.output
1423 |                             if not video_urls or not isinstance(video_urls, list):
1424 |                                 print_error("Invalid output format in task")
1425 |                                 return None
1426 |                             
1427 |                             video_url = video_urls[0]
1428 |                             break
1429 |                             
1430 |                         elif task_status.status == "FAILED":
1431 |                             error_msg = getattr(task_status, 'failure', '') or getattr(task_status, 'failureCode', 'Unknown error')
1432 |                             if "moderation" in error_msg.lower():
1433 |                                 moderation_failed = True
1434 |                                 break
1435 |                             # Add regeneration for any failure
1436 |                             if prompt_attempt < max_prompt_retries - 1:
1437 |                                 print_error(f"Generation failed: {error_msg}, regenerating prompt...")
1438 |                                 new_prompts = generate_video_segments(podcast_script, num_segments=1)
1439 |                                 if new_prompts and len(new_prompts) > 0:
1440 |                                     prompt = new_prompts[0]  # Update prompt for next attempt
1441 |                                     break
1442 |                             print_error(f"Video generation failed: {error_msg}")
1443 |                             return None
1444 |                             
1445 |                         elif task_status.status == "CANCELLED":
1446 |                             print_error("Video generation was cancelled")
1447 |                             return None
1448 |                             
1449 |                         elif task_status.status == "THROTTLED":
1450 |                             print_step(EMOJI_VIDEO, f"Generation queued (throttled)... Attempt {retries+1}/{max_retries}", color=Fore.YELLOW)
1451 |                             
1452 |                         elif task_status.status == "PENDING":
1453 |                             print_step(EMOJI_VIDEO, f"Generation pending... Attempt {retries+1}/{max_retries}", color=Fore.YELLOW)
1454 |                             
1455 |                         elif task_status.status == "RUNNING":
1456 |                             progress = float(getattr(task_status, 'progress', 0) or 0) * 100
1457 |                             elapsed = int(time.time() - start_time)
1458 |                             print_step(EMOJI_VIDEO, 
1459 |                                      f"Generating segment {i+1}... ({progress:.0f}%) - {elapsed}s elapsed", 
1460 |                                      color=Fore.YELLOW)
1461 |                         
1462 |                         time.sleep(5)
1463 |                         retries += 1
1464 |                         continue
1465 |                     
1466 |                     if moderation_failed:
1467 |                         if prompt_attempt < max_prompt_retries - 1:
1468 |                             print_error("Moderation failed, regenerating prompt...")
1469 |                             # Regenerate prompt for this segment
1470 |                             new_prompts = generate_video_segments(podcast_script, num_segments=1)
1471 |                             if new_prompts and len(new_prompts) > 0:
1472 |                                 prompt = new_prompts[0]  # Update prompt for next attempt
1473 |                                 break
1474 |                             raise Exception("Failed to generate acceptable prompt after retries")
1475 |                     
1476 |                     # Download video with retries
1477 |                     max_download_retries = 3
1478 |                     for download_attempt in range(max_download_retries):
1479 |                         try:
1480 |                             output_path = output_dir / f"segment_{i:02d}.mp4"
1481 |                             response = requests.get(video_url, stream=True, timeout=30)
1482 |                             response.raise_for_status()
1483 |                             
1484 |                             with open(output_path, 'wb') as file:
1485 |                                 for chunk in response.iter_content(chunk_size=8192):
1486 |                                     file.write(chunk)
1487 |                                     
1488 |                             if output_path.stat().st_size == 0:
1489 |                                 raise Exception("Downloaded file is empty")
1490 |                                 
1491 |                             video_paths.append(output_path)
1492 |                             break
1493 |                             
1494 |                         except Exception as e:
1495 |                             if download_attempt < max_download_retries - 1:
1496 |                                 print_error(f"Download attempt {download_attempt + 1} failed: {e}, retrying...")
1497 |                                 time.sleep(2)
1498 |                             else:
1499 |                                 raise
1500 |                     
1501 |                     # If we get here, generation and download were successful
1502 |                     break
1503 |                     
1504 |                 except Exception as e:
1505 |                     if prompt_attempt < max_prompt_retries - 1:
1506 |                         print_error(f"Prompt attempt {prompt_attempt + 1} failed: {e}, trying new prompt...")
1507 |                         continue
1508 |                     raise
1509 |                     
1510 |         except Exception as e:
1511 |             print_error(f"Error generating video segment {i+1}: {e}")
1512 |             return None
1513 |     
1514 |     return video_paths
1515 | 
1516 | def calculate_num_segments(audio_duration, provider="luma"):
1517 |     """Calculate optimal number of video segments based on audio duration and provider"""
1518 |     # Provider-specific segment durations
1519 |     SEGMENT_DURATIONS = {
1520 |         "luma": 5,    # LumaAI generates 5s videos
1521 |         "runway": 10  # RunwayML generates 10s videos
1522 |     }
1523 |     
1524 |     # Provider-specific maximum segments
1525 |     MAX_SEGMENTS = {
1526 |         "luma": 10,   # Allow more segments for LumaAI due to shorter duration
1527 |         "runway": 5   # Keep RunwayML at 5 segments max
1528 |     }
1529 |     
1530 |     segment_duration = SEGMENT_DURATIONS.get(provider, 5)  # Default to 5s if provider unknown
1531 |     max_segments = MAX_SEGMENTS.get(provider, 5)  # Default to 5 if provider unknown
1532 |     
1533 |     # Calculate ideal number of segments to cover the audio
1534 |     ideal_segments = math.ceil(audio_duration / segment_duration)
1535 |     
1536 |     # Keep segments between 2 and max_segments
1537 |     if audio_duration <= segment_duration:
1538 |         # Very short audio - single segment
1539 |         return 1
1540 |     elif audio_duration <= 2 * segment_duration:
1541 |         # Short audio - two segments
1542 |         return 2
1543 |     elif audio_duration <= max_segments * segment_duration:
1544 |         # Medium audio - scale segments based on duration
1545 |         return min(max_segments, max(2, ideal_segments))
1546 |     else:
1547 |         # Long audio - cap at max_segments
1548 |         return max_segments
1549 | 
1550 | def calculate_target_length(duration_seconds):
1551 |     """Calculate target word counts based on content duration"""
1552 |     # Base lengths for a 10-minute video
1553 |     BASE_DURATION = 600  # 10 minutes in seconds
1554 |     BASE_SUMMARY_WORDS = 300
1555 |     BASE_PODCAST_WORDS = 600
1556 |     
1557 |     # Calculate scaling factor (with min/max limits)
1558 |     scale = min(max(duration_seconds / BASE_DURATION, 0.3), 2.0)
1559 |     
1560 |     return {
1561 |         'summary': int(BASE_SUMMARY_WORDS * scale),
1562 |         'podcast': int(BASE_PODCAST_WORDS * scale)
1563 |     }
1564 | 
1565 | def generate_image_prompts(video_prompts):
1566 |     """Generate relevant, concrete image prompts that match podcast content"""
1567 |     
1568 |     @ell.simple(model="claude-3-5-sonnet-20241022", temperature=0.3, max_tokens=2048)
1569 |     def get_image_prompts(prompts: list, summary: str, podcast: str) -> str:
1570 |         return f"""Create {len(prompts)} detailed image prompts for Stable Diffusion that illustrate the key topics being discussed.
1571 |         Each prompt should create a clear, realistic visualization of the concepts, using concrete imagery.
1572 | 
1573 |         Content Summary:
1574 |         {summary}
1575 | 
1576 |         Podcast Script:
1577 |         {podcast}
1578 | 
1579 |         Required Elements for Each Prompt:
1580 |         1. Base Quality:
1581 |            - Start with: "masterpiece, highly detailed, 8k uhd, photorealistic"
1582 |            - End with: "professional lighting, cinematic composition"
1583 | 
1584 |         2. Scene Components:
1585 |            - Main Subject: Primary topic or concept being discussed
1586 |            - Environment: Relevant setting or location
1587 |            - Supporting Elements: Objects, tools, or items that relate to the topic
1588 |            - Human Element: People, hands, or human presence when relevant
1589 |            - Scale: Show size and scope of the subject matter
1590 | 
1591 |         3. Visual Guidelines:
1592 |            - Create documentary-style scenes
1593 |            - Show real objects and environments
1594 |            - Include relevant details from the discussion
1595 |            - Use appropriate lighting for the setting
1596 |            - Choose engaging camera angles
1597 |            - Keep scenes grounded and realistic
1598 | 
1599 |         4. Scene Types:
1600 |            - Process/Action: Show something being done or created
1601 |            - Location/Setting: Establish where something happens
1602 |            - Object/Detail: Focus on specific items being discussed
1603 |            - Interaction: Show how things or people work together
1604 |            - Result/Impact: Visualize outcomes or effects
1605 | 
1606 |         Instructions:
1607 |         1. Read the current section of discussion
1608 |         2. Identify the main concept or point
1609 |         3. Choose the most appropriate scene type
1610 |         4. Include specific details mentioned in the content
1611 |         5. Make it concrete and photorealistic
1612 |         6. Ensure it matches the topic being discussed
1613 | 
1614 |         Example Structure:
1615 |         "masterpiece, highly detailed, 8k uhd, photorealistic, [main subject in action/setting], [environment details], [supporting elements], [human presence if relevant], [lighting and atmosphere], professional lighting, cinematic composition"
1616 | 
1617 |         Return a JSON array of {len(prompts)} strings.
1618 |         No code blocks, only the JSON array."""
1619 |     
1620 |     try:
1621 |         # Read the summary and podcast files for context
1622 |         summary_file = next(Path("out").glob("summary-*.txt"))
1623 |         podcast_file = next(Path("out").glob("podcast-*.txt"))
1624 |         summary = summary_file.read_text()
1625 |         podcast = podcast_file.read_text()
1626 |         
1627 |         # Generate prompts with content context
1628 |         prompts = json.loads(get_image_prompts(video_prompts, summary, podcast))
1629 |         
1630 |         # Validate prompts
1631 |         if not isinstance(prompts, list) or len(prompts) != len(video_prompts):
1632 |             raise ValueError(f"Invalid prompt format - must be array of exactly {len(video_prompts)} strings")
1633 |             
1634 |         # Ensure all prompts are strings and have required elements
1635 |         prompts = [str(p) for p in prompts]
1636 |         
1637 |         # Validate prompt structure
1638 |         for i, prompt in enumerate(prompts):
1639 |             if not isinstance(prompt, str):
1640 |                 raise ValueError(f"Prompt {i} must be a string")
1641 |             if not prompt.startswith("masterpiece, highly detailed, 8k uhd, photorealistic"):
1642 |                 raise ValueError(f"Prompt {i} must start with the required quality elements")
1643 |             if not prompt.endswith("professional lighting, cinematic composition"):
1644 |                 raise ValueError(f"Prompt {i} must end with the required composition elements")
1645 |         
1646 |         return prompts
1647 |         
1648 |     except Exception as e:
1649 |         print_error(f"Error generating image prompts: {e}")
1650 |         return None
1651 | 
1652 | def generate_flux_images(prompts, output_dir):
1653 |     """Generate images using Flux Pro Ultra for each prompt"""
1654 |     if not os.getenv("REPLICATE_API_TOKEN"):
1655 |         print_error("REPLICATE_API_TOKEN environment variable not set")
1656 |         return None
1657 |     
1658 |     image_paths = []  # Store local paths
1659 |     for i, prompt in enumerate(prompts):
1660 |         try:
1661 |             print_step(EMOJI_VIDEO, f"Generating base image {i+1}/{len(prompts)}...")
1662 |             
1663 |             # Get image URL from Replicate
1664 |             output_url = replicate.run(
1665 |                 "black-forest-labs/flux-1.1-pro-ultra",
1666 |                 input={
1667 |                     "raw": False,
1668 |                     "prompt": prompt,
1669 |                     "aspect_ratio": "16:9",
1670 |                     "output_format": "jpg",
1671 |                     "safety_tolerance": 2,
1672 |                     "image_prompt_strength": 0.1
1673 |                 }
1674 |             )
1675 |             
1676 |             # Download and save image
1677 |             output_path = output_dir / f"base_{i:02d}.jpg"
1678 |             response = requests.get(output_url, stream=True)
1679 |             with open(output_path, 'wb') as file:
1680 |                 file.write(response.content)
1681 |             
1682 |             image_paths.append(output_path)
1683 |             
1684 |         except Exception as e:
1685 |             print_error(f"Error generating base image {i+1}: {e}")
1686 |             return None
1687 |     
1688 |     return image_paths
1689 | 
1690 | def create_gradient_image(width=1280, height=768):
1691 |     """Create a simple gradient image for video generation"""
1692 |     image = Image.new('RGB', (width, height))
1693 |     draw = ImageDraw.Draw(image)
1694 |     
1695 |     # Create a vertical gradient from dark to light blue
1696 |     for y in range(height):
1697 |         # Calculate color components
1698 |         r = int(20 * y / height)  # Dark to slightly red
1699 |         g = int(50 * y / height)  # Dark to medium green
1700 |         b = int(255 * y / height)  # Dark to bright blue
1701 |         
1702 |         # Draw horizontal line with current color
1703 |         draw.line([(0, y), (width, y)], fill=(r, g, b))
1704 |     
1705 |     return image
1706 | 
1707 | def main():
1708 |     # Set up argument parser
1709 |     parser = argparse.ArgumentParser(description='Summarize YouTube videos')
1710 |     parser.add_argument('url', help='YouTube video URL or video ID')
1711 |     parser.add_argument('--language', default='english',
1712 |                        help='Output language for the summary (default: english)')
1713 |     parser.add_argument('--podcast', action='store_true',
1714 |                        help='Generate podcast version with audio')
1715 |     parser.add_argument('--ignore-subs', action='store_true',
1716 |                        help='Ignore YouTube subtitles and force transcription')
1717 |     
1718 |     # Add transcription method group
1719 |     trans_group = parser.add_mutually_exclusive_group()
1720 |     trans_group.add_argument('--fast-whisper', action='store_true',
1721 |                            help='Use Fast Whisper for transcription (faster)')
1722 |     trans_group.add_argument('--whisper', action='store_true',
1723 |                            help='Use OpenAI Whisper for transcription (slower but may be more accurate)')
1724 |     trans_group.add_argument('--replicate', action='store_true',
1725 |                            help='Use Replicate Incredibly Fast Whisper (fastest, requires API key)')
1726 |     
1727 |     # Video generation group
1728 |     video_group = parser.add_mutually_exclusive_group()
1729 |     video_group.add_argument('--lumaai', action='store_true',
1730 |                           help='Generate video using Luma AI (requires --podcast)')
1731 |     video_group.add_argument('--runwayml', action='store_true',
1732 |                           help='Generate video using RunwayML (requires --podcast)')
1733 |     
1734 |     args = parser.parse_args()
1735 |     
1736 |     try:
1737 |         # Clean and validate URL
1738 |         clean_url = clean_youtube_url(args.url)
1739 |         
1740 |         # Get video ID for filenames
1741 |         try:
1742 |             video_id = clean_url.split('v=')[1].split('&')[0]
1743 |         except:
1744 |             print_error("Could not extract video ID from URL")
1745 |             sys.exit(1)
1746 |         
1747 |         # Check for existing files
1748 |         summary_file = OUTPUT_DIR / f"summary-{video_id}.txt"
1749 |         podcast_script_file = OUTPUT_DIR / f"podcast-{video_id}.txt"
1750 |         podcast_audio_file = OUTPUT_DIR / f"podcast-{video_id}.mp3"
1751 |         final_video_file = OUTPUT_DIR / f"video-{video_id}.mp4"
1752 |         
1753 |         # Get video metadata first (always do this to verify video exists)
1754 |         try:
1755 |             metadata = get_video_metadata(clean_url)
1756 |             # Get video duration from metadata
1757 |             duration = None
1758 |             if metadata:
1759 |                 try:
1760 |                     duration = float(re.search(r'Duration: (\d+\.\d+)', metadata).group(1))
1761 |                 except:
1762 |                     pass
1763 |         except Exception as e:
1764 |             print_error(f"Error processing metadata: {e}")
1765 |             metadata = ""  # Continue without metadata
1766 |             duration = None
1767 |         
1768 |         # Check if we need to generate summary
1769 |         if summary_file.exists():
1770 |             print_step(EMOJI_SUCCESS, f"Summary already exists at {summary_file}")
1771 |             summary = summary_file.read_text()
1772 |         else:
1773 |             with tempfile.TemporaryDirectory() as temp_dir:
1774 |                 temp_dir = Path(temp_dir)
1775 |                 audio_path = temp_dir / "audio.m4a"
1776 |                 base_path = temp_dir / "audio"
1777 |                 
1778 |                 # Try YouTube subtitles first (unless --ignore-subs is used)
1779 |                 transcript = None
1780 |                 if not args.ignore_subs:
1781 |                     subtitle_language = args.language.lower() if args.language else "en"
1782 |                     subtitle_file = get_youtube_subtitles(clean_url, str(base_path), subtitle_language)
1783 |                     
1784 |                     if subtitle_file:
1785 |                         # Read the subtitle file
1786 |                         with open(subtitle_file, 'r', encoding='utf-8') as f:
1787 |                             transcript = f.read()
1788 |                         # Clean up the downloaded subtitle file
1789 |                         os.remove(subtitle_file)
1790 |                 
1791 |                 # If no transcript yet (no subs or --ignore-subs), transcribe audio
1792 |                 if not transcript:
1793 |                     method = ('Fast Whisper' if args.fast_whisper 
1794 |                              else 'OpenAI Whisper' if args.whisper 
1795 |                              else 'Incredibly Fast Whisper' if args.replicate
1796 |                              else 'Fast Whisper')  # Default
1797 |                     print_step(EMOJI_TRANSCRIBE, f"Using {method} for transcription...")
1798 |                     
1799 |                     if not download_video(clean_url, str(audio_path)):
1800 |                         sys.exit(1)
1801 |                     
1802 |                     if not transcribe_video(str(audio_path), 
1803 |                                           use_fast_whisper=args.fast_whisper or (not args.whisper and not args.replicate),
1804 |                                           use_replicate=args.replicate,
1805 |                                           language=args.language):
1806 |                         sys.exit(1)
1807 |                     
1808 |                     transcript = (temp_dir / "audio.txt").read_text()
1809 |                 
1810 |                 # Convert to shorthand
1811 |                 shorthand = to_shorthand(transcript)
1812 |                 
1813 |                 # Generate summary with appropriate length
1814 |                 summary = summarize_with_claude(shorthand, metadata, args.language)
1815 |                 if not summary:
1816 |                     sys.exit(1)
1817 |                 
1818 |                 # Save summary
1819 |                 Path(summary_file).write_text(metadata + summary)
1820 |                 print_success(f"Summary saved to {summary_file}")
1821 |         
1822 |         # If podcast option is enabled
1823 |         if args.podcast:
1824 |             # Check if podcast files exist
1825 |             if podcast_script_file.exists() and podcast_audio_file.exists():
1826 |                 print_step(EMOJI_SUCCESS, f"Podcast script already exists at {podcast_script_file}")
1827 |                 print_step(EMOJI_SUCCESS, f"Podcast audio already exists at {podcast_audio_file}")
1828 |                 podcast_script = podcast_script_file.read_text()
1829 |             else:
1830 |                 # Convert to podcast script and generate audio
1831 |                 podcast_script = convert_to_podcast_script(summary, args.language, duration)
1832 |                 if not podcast_script:
1833 |                     sys.exit(1)
1834 |                     
1835 |                 # Save podcast script
1836 |                 podcast_script_file.write_text(podcast_script)
1837 |                 
1838 |                 # Generate audio file
1839 |                 if not generate_podcast_audio(podcast_script, podcast_audio_file):
1840 |                     sys.exit(1)
1841 |                 
1842 |                 print_success(f"Podcast script saved to {podcast_script_file}")
1843 |                 print_success(f"Podcast audio saved to {podcast_audio_file}")
1844 |             
1845 |             # If video generation is enabled
1846 |             if args.lumaai or args.runwayml:
1847 |                 # Check if final video exists
1848 |                 if final_video_file.exists():
1849 |                     print_step(EMOJI_SUCCESS, f"Final video already exists at {final_video_file}")
1850 |                     return
1851 |                 
1852 |                 # Create temporary directory for video segments
1853 |                 video_temp_dir = OUTPUT_DIR / "temp_videos"
1854 |                 video_temp_dir.mkdir(exist_ok=True)
1855 |                 
1856 |                 temp_video = None
1857 |                 try:
1858 |                     # Get podcast audio duration
1859 |                     audio_duration = get_audio_duration(podcast_audio_file)
1860 |                     if not audio_duration:
1861 |                         print_error("Could not determine podcast duration")
1862 |                         sys.exit(1)
1863 |                     
1864 |                     # Calculate number of segments needed
1865 |                     num_segments = calculate_num_segments(
1866 |                         audio_duration, 
1867 |                         provider="luma" if args.lumaai else "runway"
1868 |                     )
1869 |                     
1870 |                     # Generate video prompts
1871 |                     prompts = generate_video_segments(podcast_script, num_segments=num_segments)
1872 |                     if not prompts:
1873 |                         sys.exit(1)
1874 |                     
1875 |                     # Generate base images with Flux
1876 |                     image_prompts = generate_image_prompts(prompts)
1877 |                     if not image_prompts:
1878 |                         sys.exit(1)
1879 |                     
1880 |                     base_images = generate_flux_images(image_prompts, video_temp_dir)
1881 |                     if not base_images:
1882 |                         sys.exit(1)
1883 |                     
1884 |                     # Generate video segments with selected provider
1885 |                     if args.lumaai:
1886 |                         video_paths = generate_video_segments_with_luma(
1887 |                             prompts, 
1888 |                             video_temp_dir, 
1889 |                             base_images,
1890 |                             podcast_script=podcast_script
1891 |                         )
1892 |                     else:  # args.runwayml
1893 |                         video_paths = generate_video_segments_with_runway(
1894 |                             prompts, 
1895 |                             video_temp_dir, 
1896 |                             base_images,
1897 |                             podcast_script=podcast_script
1898 |                         )
1899 |                     
1900 |                     if not video_paths:
1901 |                         sys.exit(1)
1902 |                     
1903 |                     # Combine videos and match audio duration
1904 |                     temp_video = OUTPUT_DIR / f"temp-video-{video_id}.mp4"
1905 |                     
1906 |                     if not combine_video_segments(video_paths, audio_duration, temp_video):
1907 |                         sys.exit(1)
1908 |                     
1909 |                     # Combine with podcast audio
1910 |                     if not combine_audio_video(temp_video, podcast_audio_file, final_video_file):
1911 |                         sys.exit(1)
1912 |                     
1913 |                     print_success(f"Final video saved to {final_video_file}")
1914 |                     
1915 |                 finally:
1916 |                     # Clean up temporary files
1917 |                     if video_temp_dir.exists():
1918 |                         shutil.rmtree(video_temp_dir)
1919 |                     if temp_video and temp_video.exists():
1920 |                         os.remove(temp_video)
1921 | 
1922 |     except KeyboardInterrupt:
1923 |         print_error("\nOperation cancelled by user")
1924 |         sys.exit(1)
1925 | 
1926 | if __name__ == "__main__":
1927 |     main()
1928 | 


--------------------------------------------------------------------------------