├── .gitignore ├── .env.example ├── requirements.txt ├── requirements-core.txt ├── install.sh ├── format_video.py ├── readme.md └── main.py /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | *.mp4 3 | *.mp3 4 | __pycache__/ 5 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # Copy this file to .env and fill in your API keys 2 | 3 | # OpenAI API Key 4 | OPEN_API_KEY="" 5 | 6 | # Fal AI API Key (for image generation with Flux Schnell) 7 | FAL_API_KEY="" 8 | 9 | # ElevenLabs API Key (for voice generation) 10 | ELVEN_LABS_API_KEY="" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # YouTube Shorts Generator - Core Dependencies 2 | # Tested and working versions as of September 2025 3 | 4 | # Core libraries 5 | requests==2.32.5 6 | python-dotenv==1.1.1 7 | 8 | # OpenAI for script generation 9 | openai==1.102.0 10 | 11 | # MoviePy for video processing (specific version for PIL compatibility) 12 | moviepy==1.0.3 13 | decorator==4.4.2 # Required by moviepy 1.0.3 14 | 15 | # Video/Image processing dependencies 16 | imageio==2.37.0 17 | imageio-ffmpeg==0.6.0 18 | numpy==2.3.2 19 | pillow==11.3.0 # Latest version with our compatibility fix 20 | 21 | # Progress bar for video processing 22 | tqdm==4.67.1 23 | proglog==0.1.12 24 | 25 | # Additional moviepy dependencies 26 | scipy==1.16.0 # Required for some moviepy operations 27 | 28 | # HTTP client libraries (for API calls) 29 | certifi==2025.8.3 30 | charset-normalizer==3.4.3 31 | idna==3.10 32 | urllib3==2.5.0 33 | 34 | # OpenAI dependencies 35 | pydantic==2.11.7 36 | pydantic-core==2.33.2 37 | distro==1.9.0 38 | jiter==0.10.0 39 | anyio==4.9.0 40 | httpcore==1.0.9 41 | httpx==0.28.1 42 | sniffio==1.3.1 43 | typing-extensions==4.14.1 44 | annotated-types==0.7.0 45 | typing-inspection==0.4.1 46 | -------------------------------------------------------------------------------- /requirements-core.txt: -------------------------------------------------------------------------------- 1 | # YouTube Shorts Generator - Core Dependencies 2 | # Tested and working versions as of September 2025 3 | 4 | # Core libraries 5 | requests==2.32.5 6 | python-dotenv==1.1.1 7 | 8 | # OpenAI for script generation 9 | openai==1.102.0 10 | 11 | # MoviePy for video processing (specific version for PIL compatibility) 12 | moviepy==1.0.3 13 | decorator==4.4.2 # Required by moviepy 1.0.3 14 | 15 | # Video/Image processing dependencies 16 | imageio==2.37.0 17 | imageio-ffmpeg==0.6.0 18 | numpy==2.3.2 19 | pillow==11.3.0 # Latest version with our compatibility fix 20 | 21 | # Progress bar for video processing 22 | tqdm==4.67.1 23 | proglog==0.1.12 24 | 25 | # Additional moviepy dependencies 26 | scipy==1.16.0 # Required for some moviepy operations 27 | 28 | # HTTP client libraries (for API calls) 29 | certifi==2025.8.3 30 | charset-normalizer==3.4.3 31 | idna==3.10 32 | urllib3==2.5.0 33 | 34 | # OpenAI dependencies 35 | pydantic==2.11.7 36 | pydantic-core==2.33.2 37 | distro==1.9.0 38 | jiter==0.10.0 39 | anyio==4.9.0 40 | httpcore==1.0.9 41 | httpx==0.28.1 42 | sniffio==1.3.1 43 | typing-extensions==4.14.1 44 | annotated-types==0.7.0 45 | typing-inspection==0.4.1 46 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # YouTube Shorts Generator - Installation Script 4 | # This script sets up the environment with locked dependency versions 5 | 6 | echo "🎬 YouTube Shorts Generator - Setup Script" 7 | echo "==========================================" 8 | 9 | # Check if Python is installed 10 | if ! command -v python &> /dev/null; then 11 | echo "❌ Python is not installed. Please install Python 3.8+ first." 12 | exit 1 13 | fi 14 | 15 | echo "✅ Python found: $(python --version)" 16 | 17 | # Check if pip is available 18 | if ! command -v pip &> /dev/null; then 19 | echo "❌ pip is not installed. Please install pip first." 20 | exit 1 21 | fi 22 | 23 | echo "✅ pip found: $(pip --version)" 24 | 25 | # Install locked dependencies 26 | echo "" 27 | echo "📦 Installing locked dependencies..." 28 | echo "This may take a few minutes..." 29 | 30 | pip install -r requirements.txt 31 | 32 | if [ $? -eq 0 ]; then 33 | echo "" 34 | echo "✅ All dependencies installed successfully!" 35 | echo "" 36 | echo "🔧 Next steps:" 37 | echo "1. Copy .env.example to .env" 38 | echo "2. Fill in your API keys in the .env file:" 39 | echo " - OPEN_API_KEY (OpenAI)" 40 | echo " - FAL_API_KEY (Fal AI)" 41 | echo " - ELVEN_LABS_API_KEY (ElevenLabs)" 42 | echo "3. Run: python main.py \"Your topic here\"" 43 | echo "" 44 | echo "📖 See readme.md for more detailed instructions." 45 | else 46 | echo "" 47 | echo "❌ Installation failed. Please check the error messages above." 48 | exit 1 49 | fi 50 | -------------------------------------------------------------------------------- /format_video.py: -------------------------------------------------------------------------------- 1 | from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip , ColorClip , CompositeVideoClip 2 | import os 3 | import math 4 | 5 | def reformat_video(video_filename): 6 | try: 7 | if not os.path.exists(video_filename): 8 | print(f"File not found: {video_filename}") 9 | return None 10 | 11 | clip = VideoFileClip(video_filename) 12 | clip = clip.without_audio() # Remove audio 13 | 14 | target_width, target_height = 1080, 1920 # Vertical video resolution 15 | original_width, original_height = clip.size 16 | aspect_ratio = target_width / target_height 17 | 18 | # Determine whether to crop horizontally or vertically 19 | if original_width / original_height > aspect_ratio: 20 | # Crop horizontally 21 | new_height = original_height 22 | new_width = int(new_height * aspect_ratio) 23 | x1 = (original_width - new_width) // 2 24 | y1 = 0 25 | else: 26 | # Crop vertically 27 | new_width = original_width 28 | new_height = int(new_width / aspect_ratio) 29 | x1 = 0 30 | y1 = (original_height - new_height) // 2 31 | 32 | x2 = x1 + new_width 33 | y2 = y1 + new_height 34 | 35 | # Crop and resize the video 36 | resized_clip = clip.crop(x1=x1, y1=y1, x2=x2, y2=y2).resize((target_width, target_height)) 37 | resized_filename = f"vertical_{video_filename}" 38 | resized_clip.write_videofile(resized_filename, codec="libx264", audio_codec="aac") 39 | resized_clip.close() # Make sure to close the clip 40 | print(f"Vertical resized video created: {resized_filename}") 41 | return resized_filename 42 | except Exception as e: 43 | print(f"Error processing file {video_filename}: {e}") 44 | return None 45 | 46 | def merge_audio_video(audio_filename, video_filename): 47 | video_filename = os.path.basename(video_filename) 48 | video_clip = VideoFileClip(video_filename) 49 | audio_clip = AudioFileClip(audio_filename) 50 | # Loop the video clip if it's shorter than the audio clip 51 | if video_clip.duration < audio_clip.duration: 52 | loops_required = math.ceil(audio_clip.duration / video_clip.duration) 53 | video_clip = concatenate_videoclips([video_clip] * loops_required) 54 | # Set the audio of the video clip to match the audio duration 55 | video_clip = video_clip.set_audio(audio_clip) 56 | video_clip = video_clip.subclip(0, audio_clip.duration) 57 | 58 | merged_filename = f"merged_{video_filename}" 59 | video_clip.write_videofile(merged_filename, codec="libx264", audio_codec="aac") 60 | return merged_filename 61 | 62 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # YouTube Shorts Generator Script Documentation 2 | 3 | ## Overview 4 | This script automates the creation of YouTube Shorts videos on a given topic. It uses OpenAI's GPT-4o for script generation, ElevenLabs for voiceovers, and Fal AI with Flux Schnell for image generation. 5 | 6 | ### Key Features: 7 | - **Script Generation:** Leverages GPT-4o-2024-05-13 to create engaging scripts. 8 | - **Voiceover Creation:** Uses ElevenLabs API for high-quality voiceovers. 9 | - **Image Generation:** Uses Fal AI with Flux Schnell model for generating relevant images. 10 | - **Automated Video Assembly:** Combines audio and generated images into a complete YouTube Short. 11 | 12 | ## Dependencies 13 | - Python 3.x 14 | - `requests` 15 | - `json` 16 | - `python-dotenv` 17 | - `openai` 18 | - `moviepy` 19 | 20 | ## Setup Instructions 21 | 22 | ### Option 1: Quick Setup (Recommended) 23 | Run the installation script: 24 | ```bash 25 | ./install.sh 26 | ``` 27 | 28 | ### Option 2: Manual Setup 29 | 1. **Install Dependencies:** Ensure Python 3.x is installed, then run: 30 | ```bash 31 | pip install -r requirements.txt 32 | ``` 33 | 34 | *Note: We use locked versions to ensure compatibility. The `requirements.txt` includes specific versions that have been tested and confirmed working together.* 35 | 36 | 2. **Environment Variables:** Set up `.env` file with API keys: 37 | - `OPEN_API_KEY` for OpenAI. 38 | - `FAL_API_KEY` for Fal AI. 39 | - `ELVEN_LABS_API_KEY` for ElevenLabs. 40 | 41 | ## Usage 42 | Run the script with a topic as an argument: `python main.py "Your Topic"`. 43 | 44 | ### Script Workflow: 45 | 1. **Topic Input:** Accepts a topic for the YouTube Short. 46 | 2. **Scenario Generation:** Uses GPT-4o-2024-05-13 to generate a video script based on the topic. 47 | 3. **Image Generation:** Creates relevant images using Fal AI with Flux Schnell model. 48 | 4. **Audio Generation:** Creates a voiceover using ElevenLabs API. 49 | 5. **Video Assembly:** Merges audio and generated images into video clips, and outputs the final video. 50 | 51 | ## Functions Overview 52 | - `generate_audio_from_text(text, scene_id, voice_id)`: Generates a voiceover for a given text. 53 | - `generate_images_from_fal(keywords, scene_id)`: Generates images using Fal AI with Flux Schnell model based on provided keywords. 54 | - `create_video_from_images(images, scene_id, duration)`: Creates a video from generated images. 55 | - `generate_scenario(topic)`: Generates the script and scene descriptions using GPT-4o-2024-05-13. 56 | 57 | ## Limitations & Notes 58 | - **API Quotas:** Be aware of rate limits and quotas for OpenAI, Fal AI, and ElevenLabs APIs. 59 | - **Image Quality:** The quality of the final video depends on the generated images and TTS quality. 60 | - **Error Handling:** The script includes basic error handling, which can be expanded for robustness. 61 | 62 | ## Future Enhancements 63 | - **Customization Options:** Adding more options for voice and style customization in voiceovers. 64 | - **Improved Error Handling:** Enhance error detection and handling for more stability. 65 | - **User Interface:** Develop a GUI for easier interaction and topic input. 66 | - **Image Style Control:** Add options to control the style and theme of generated images. 67 | 68 | ## Recent Updates 69 | - **✅ Updated to GPT-4o-2024-05-13** for improved script generation 70 | - **✅ Replaced Pexels with Fal AI + Flux Schnell** for high-quality image generation 71 | - **✅ Added automatic video creation** from generated images 72 | - **✅ Fixed compatibility issues** between MoviePy and newer Pillow versions 73 | - **✅ Locked dependency versions** in `requirements.txt` to prevent compatibility issues 74 | 75 | ## Important Notes 76 | - **Dependency Versions:** This project uses specific library versions (see `requirements.txt`) that have been tested for compatibility. Using different versions may cause issues, particularly with MoviePy and Pillow. 77 | - **PIL Compatibility:** The code includes a compatibility fix for newer Pillow versions with older MoviePy. 78 | - **System Dependencies:** Make sure you have the required system libraries for image processing (automatically installed in most environments). 79 | 80 | ## Contributing 81 | Contributions to improve or extend the script are welcome. Please adhere to standard coding practices and provide documentation for any changes. 82 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from dotenv import load_dotenv 4 | import os 5 | from openai import OpenAI 6 | from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip 7 | import os 8 | from format_video import reformat_video , merge_audio_video 9 | import sys 10 | 11 | # Fix for PIL.Image.ANTIALIAS compatibility issue 12 | try: 13 | from PIL import Image 14 | if not hasattr(Image, 'ANTIALIAS'): 15 | Image.ANTIALIAS = Image.LANCZOS 16 | except ImportError: 17 | pass 18 | # Load environment variables 19 | load_dotenv() 20 | # Get API keys from environment variables 21 | openai_api_key = os.getenv("OPEN_API_KEY") 22 | fal_api_key = os.getenv("FAL_API_KEY") 23 | elevenlabs_api_key = os.getenv("ELVEN_LABS_API_KEY") 24 | scenes = [] 25 | 26 | def generate_audio_from_text(text, scene_id, voice_id = "21m00Tcm4TlvDq8ikWAM"): 27 | url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" 28 | 29 | payload = { 30 | "model_id": "eleven_multilingual_v2", 31 | "voice":"Bella", 32 | "text": text, 33 | "voice_settings": { 34 | "similarity_boost": 0.5, 35 | "stability": 0.5, 36 | # "style": 123, # Example value 37 | # "use_speaker_boost": True 38 | } 39 | } 40 | headers = { 41 | "xi-api-key": f"{elevenlabs_api_key}", 42 | "Content-Type": "application/json" 43 | } 44 | 45 | response = requests.post(url, json=payload, headers=headers) 46 | if response.status_code == 200: 47 | # downondld from response.text 48 | with open(f"scene_{scene_id}.mp3", "wb") as f: 49 | f.write(response.content) 50 | else: 51 | print(f"Failed to generate audio for scene {scene_id}: {response.text}") 52 | 53 | 54 | def generate_images_from_fal(keywords, scene_id): 55 | """Generate images using Fal API with Flux Schnell model""" 56 | keywords_list = keywords.split(',') 57 | generated_images = [] 58 | 59 | for i, keyword in enumerate(keywords_list): 60 | keyword = keyword.strip() 61 | if not keyword: 62 | continue 63 | 64 | # Create a more detailed prompt for better image generation 65 | prompt = f"High quality, cinematic, professional photograph of {keyword}, detailed, realistic, good lighting" 66 | 67 | url = "https://fal.run/fal-ai/flux/schnell" 68 | 69 | payload = { 70 | "prompt": prompt, 71 | "image_size": "portrait_16_9", 72 | "num_inference_steps": 4, 73 | "enable_safety_checker": True 74 | } 75 | 76 | headers = { 77 | "Authorization": f"Key {fal_api_key}", 78 | "Content-Type": "application/json" 79 | } 80 | 81 | try: 82 | response = requests.post(url, json=payload, headers=headers) 83 | 84 | if response.status_code == 200: 85 | result = response.json() 86 | if 'images' in result and len(result['images']) > 0: 87 | image_url = result['images'][0]['url'] 88 | 89 | # Download the generated image 90 | image_response = requests.get(image_url) 91 | if image_response.status_code == 200: 92 | filename = f'scene_{scene_id}_image_{i}.jpg' 93 | with open(filename, 'wb') as f: 94 | f.write(image_response.content) 95 | generated_images.append(filename) 96 | print(f"Generated image for '{keyword}': {filename}") 97 | else: 98 | print(f"Failed to download image for '{keyword}'") 99 | else: 100 | print(f"No images generated for '{keyword}'") 101 | else: 102 | print(f"Failed to generate image for '{keyword}': {response.text}") 103 | 104 | except Exception as e: 105 | print(f"Error generating image for '{keyword}': {str(e)}") 106 | 107 | return generated_images 108 | 109 | 110 | def create_video_from_images(images, scene_id, duration=5): 111 | """Create a video from generated images""" 112 | if not images: 113 | print(f"No images available for scene {scene_id}") 114 | return None 115 | 116 | try: 117 | video_clips = [] 118 | clip_duration = duration / len(images) if len(images) > 0 else duration 119 | 120 | for image_path in images: 121 | if os.path.exists(image_path): 122 | clip = ImageClip(image_path, duration=clip_duration) 123 | clip = clip.resize((1080, 1920)) # Portrait orientation for YouTube Shorts 124 | video_clips.append(clip) 125 | 126 | if video_clips: 127 | final_clip = concatenate_videoclips(video_clips, method="compose") 128 | video_filename = f'scene_{scene_id}_video.mp4' 129 | final_clip.write_videofile(video_filename, codec="libx264", fps=24) 130 | final_clip.close() 131 | 132 | # Clean up image files 133 | for image_path in images: 134 | if os.path.exists(image_path): 135 | os.remove(image_path) 136 | 137 | print(f"Created video from images: {video_filename}") 138 | return video_filename 139 | else: 140 | print(f"No valid images found for scene {scene_id}") 141 | return None 142 | 143 | except Exception as e: 144 | print(f"Error creating video from images for scene {scene_id}: {str(e)}") 145 | return None 146 | 147 | # Function to generate scenarios using OpenAI API 148 | def generate_scenario(topic): 149 | # OpenAI API Client setup... 150 | # ... existing code ... 151 | client = OpenAI( 152 | # This is the default and can be omitted 153 | api_key=openai_api_key 154 | ) 155 | 156 | tools = [ 157 | { 158 | "name": "get_scenes", 159 | "description": "Get the scenes for a video voiceover script without scene descriptions", 160 | "parameters": { 161 | "type": "object", 162 | "properties": { 163 | "scenes": { 164 | "type": "array", 165 | "description": "The scenes for the video voiceover script", 166 | "items": { 167 | "type": "object", 168 | "properties": { 169 | "id": {"type": "integer"}, 170 | "script": {"type": "string" , "description": "The script for the scene.MUST include only the text that will be spoken by the narrator"}, 171 | "keywords": {"type": "string" , "description": "Relevant keywords for image generation using AI, describe visual elements that would complement the narration"}, 172 | }, 173 | }, 174 | }, 175 | }, 176 | } 177 | } 178 | ] 179 | 180 | try: 181 | response = client.chat.completions.create( 182 | model="gpt-4o-2024-05-13", 183 | messages=[ 184 | {"role": "system", "content": "You are youtube creator. You are creating a 1mn video script for a given topic"}, 185 | {"role": "user", "content": topic}, 186 | ], 187 | functions = tools, 188 | function_call = { 189 | "name": "get_scenes", 190 | }, 191 | ) 192 | # Loading the response as a JSON object 193 | json_response = json.loads(response.choices[0].message.function_call.arguments) 194 | return response.choices[0].message.function_call.arguments 195 | except Exception as e: 196 | print(e) 197 | return None 198 | 199 | 200 | # Check if the first argument as the topic exists 201 | if len(sys.argv) > 1: 202 | topic = sys.argv[1] 203 | response = generate_scenario(topic) 204 | if response: 205 | json_response = json.loads(response) 206 | 207 | for scene in json_response['scenes']: 208 | # Get scene ID, handle different possible keys 209 | scene_id = scene.get('id') or scene.get('scene_id') or len(scenes) + 1 210 | 211 | # Generate images using Fal API 212 | generated_images = generate_images_from_fal(scene['keywords'], scene_id) 213 | 214 | # Generate audio 215 | generate_audio_from_text(scene['script'], scene_id) 216 | 217 | # Create video from generated images 218 | video_filename = create_video_from_images(generated_images, scene_id) 219 | 220 | if video_filename: 221 | scenes.append((video_filename, f"scene_{scene_id}.mp3")) 222 | print(f"Created video for scene {scene_id}: {video_filename}") 223 | else: 224 | print(f"Could not create video for scene {scene_id}") 225 | 226 | merged_clips = [] 227 | for video_filename, audio_filename in scenes: 228 | resized_video = reformat_video(video_filename) 229 | merged_clip = merge_audio_video(audio_filename, resized_video) 230 | merged_clips.append(VideoFileClip(merged_clip)) 231 | 232 | # Concatenate all the clips 233 | final_clip = concatenate_videoclips(merged_clips, method="compose") 234 | final_clip.write_videofile("final_youtube_short.mp4", codec="libx264", audio_codec="aac") 235 | 236 | # Clean up 237 | for clip in merged_clips: 238 | os.remove(clip.filename) 239 | else: 240 | print("Please provide a topic as an argument") --------------------------------------------------------------------------------