├── .gitignore
├── .env.example
├── requirements.txt
├── requirements-core.txt
├── install.sh
├── format_video.py
├── readme.md
└── main.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | *.mp4
3 | *.mp3
4 | __pycache__/
5 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | # Copy this file to .env and fill in your API keys
 2 | 
 3 | # OpenAI API Key
 4 | OPEN_API_KEY=""
 5 | 
 6 | # Fal AI API Key (for image generation with Flux Schnell)
 7 | FAL_API_KEY=""
 8 | 
 9 | # ElevenLabs API Key (for voice generation)
10 | ELVEN_LABS_API_KEY=""


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # YouTube Shorts Generator - Core Dependencies
 2 | # Tested and working versions as of September 2025
 3 | 
 4 | # Core libraries
 5 | requests==2.32.5
 6 | python-dotenv==1.1.1
 7 | 
 8 | # OpenAI for script generation
 9 | openai==1.102.0
10 | 
11 | # MoviePy for video processing (specific version for PIL compatibility)
12 | moviepy==1.0.3
13 | decorator==4.4.2  # Required by moviepy 1.0.3
14 | 
15 | # Video/Image processing dependencies
16 | imageio==2.37.0
17 | imageio-ffmpeg==0.6.0
18 | numpy==2.3.2
19 | pillow==11.3.0  # Latest version with our compatibility fix
20 | 
21 | # Progress bar for video processing
22 | tqdm==4.67.1
23 | proglog==0.1.12
24 | 
25 | # Additional moviepy dependencies
26 | scipy==1.16.0  # Required for some moviepy operations
27 | 
28 | # HTTP client libraries (for API calls)
29 | certifi==2025.8.3
30 | charset-normalizer==3.4.3
31 | idna==3.10
32 | urllib3==2.5.0
33 | 
34 | # OpenAI dependencies
35 | pydantic==2.11.7
36 | pydantic-core==2.33.2
37 | distro==1.9.0
38 | jiter==0.10.0
39 | anyio==4.9.0
40 | httpcore==1.0.9
41 | httpx==0.28.1
42 | sniffio==1.3.1
43 | typing-extensions==4.14.1
44 | annotated-types==0.7.0
45 | typing-inspection==0.4.1
46 | 


--------------------------------------------------------------------------------
/requirements-core.txt:
--------------------------------------------------------------------------------
 1 | # YouTube Shorts Generator - Core Dependencies
 2 | # Tested and working versions as of September 2025
 3 | 
 4 | # Core libraries
 5 | requests==2.32.5
 6 | python-dotenv==1.1.1
 7 | 
 8 | # OpenAI for script generation
 9 | openai==1.102.0
10 | 
11 | # MoviePy for video processing (specific version for PIL compatibility)
12 | moviepy==1.0.3
13 | decorator==4.4.2  # Required by moviepy 1.0.3
14 | 
15 | # Video/Image processing dependencies
16 | imageio==2.37.0
17 | imageio-ffmpeg==0.6.0
18 | numpy==2.3.2
19 | pillow==11.3.0  # Latest version with our compatibility fix
20 | 
21 | # Progress bar for video processing
22 | tqdm==4.67.1
23 | proglog==0.1.12
24 | 
25 | # Additional moviepy dependencies
26 | scipy==1.16.0  # Required for some moviepy operations
27 | 
28 | # HTTP client libraries (for API calls)
29 | certifi==2025.8.3
30 | charset-normalizer==3.4.3
31 | idna==3.10
32 | urllib3==2.5.0
33 | 
34 | # OpenAI dependencies
35 | pydantic==2.11.7
36 | pydantic-core==2.33.2
37 | distro==1.9.0
38 | jiter==0.10.0
39 | anyio==4.9.0
40 | httpcore==1.0.9
41 | httpx==0.28.1
42 | sniffio==1.3.1
43 | typing-extensions==4.14.1
44 | annotated-types==0.7.0
45 | typing-inspection==0.4.1
46 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # YouTube Shorts Generator - Installation Script
 4 | # This script sets up the environment with locked dependency versions
 5 | 
 6 | echo "🎬 YouTube Shorts Generator - Setup Script"
 7 | echo "=========================================="
 8 | 
 9 | # Check if Python is installed
10 | if ! command -v python &> /dev/null; then
11 |     echo "❌ Python is not installed. Please install Python 3.8+ first."
12 |     exit 1
13 | fi
14 | 
15 | echo "✅ Python found: $(python --version)"
16 | 
17 | # Check if pip is available
18 | if ! command -v pip &> /dev/null; then
19 |     echo "❌ pip is not installed. Please install pip first."
20 |     exit 1
21 | fi
22 | 
23 | echo "✅ pip found: $(pip --version)"
24 | 
25 | # Install locked dependencies
26 | echo ""
27 | echo "📦 Installing locked dependencies..."
28 | echo "This may take a few minutes..."
29 | 
30 | pip install -r requirements.txt
31 | 
32 | if [ $? -eq 0 ]; then
33 |     echo ""
34 |     echo "✅ All dependencies installed successfully!"
35 |     echo ""
36 |     echo "🔧 Next steps:"
37 |     echo "1. Copy .env.example to .env"
38 |     echo "2. Fill in your API keys in the .env file:"
39 |     echo "   - OPEN_API_KEY (OpenAI)"
40 |     echo "   - FAL_API_KEY (Fal AI)" 
41 |     echo "   - ELVEN_LABS_API_KEY (ElevenLabs)"
42 |     echo "3. Run: python main.py \"Your topic here\""
43 |     echo ""
44 |     echo "📖 See readme.md for more detailed instructions."
45 | else
46 |     echo ""
47 |     echo "❌ Installation failed. Please check the error messages above."
48 |     exit 1
49 | fi
50 | 


--------------------------------------------------------------------------------
/format_video.py:
--------------------------------------------------------------------------------
 1 | from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip , ColorClip , CompositeVideoClip
 2 | import os
 3 | import math
 4 | 
 5 | def reformat_video(video_filename):
 6 |     try:
 7 |         if not os.path.exists(video_filename):
 8 |             print(f"File not found: {video_filename}")
 9 |             return None
10 | 
11 |         clip = VideoFileClip(video_filename)
12 |         clip = clip.without_audio()  # Remove audio
13 | 
14 |         target_width, target_height = 1080, 1920  # Vertical video resolution
15 |         original_width, original_height = clip.size
16 |         aspect_ratio = target_width / target_height
17 | 
18 |         # Determine whether to crop horizontally or vertically
19 |         if original_width / original_height > aspect_ratio:
20 |             # Crop horizontally
21 |             new_height = original_height
22 |             new_width = int(new_height * aspect_ratio)
23 |             x1 = (original_width - new_width) // 2
24 |             y1 = 0
25 |         else:
26 |             # Crop vertically
27 |             new_width = original_width
28 |             new_height = int(new_width / aspect_ratio)
29 |             x1 = 0
30 |             y1 = (original_height - new_height) // 2
31 | 
32 |         x2 = x1 + new_width
33 |         y2 = y1 + new_height
34 | 
35 |         # Crop and resize the video
36 |         resized_clip = clip.crop(x1=x1, y1=y1, x2=x2, y2=y2).resize((target_width, target_height))
37 |         resized_filename = f"vertical_{video_filename}"
38 |         resized_clip.write_videofile(resized_filename, codec="libx264", audio_codec="aac")
39 |         resized_clip.close()  # Make sure to close the clip
40 |         print(f"Vertical resized video created: {resized_filename}")
41 |         return resized_filename
42 |     except Exception as e:
43 |         print(f"Error processing file {video_filename}: {e}")
44 |         return None
45 | 
46 | def merge_audio_video(audio_filename, video_filename):
47 |     video_filename = os.path.basename(video_filename)
48 |     video_clip = VideoFileClip(video_filename)
49 |     audio_clip = AudioFileClip(audio_filename)
50 |     # Loop the video clip if it's shorter than the audio clip
51 |     if video_clip.duration < audio_clip.duration:
52 |         loops_required = math.ceil(audio_clip.duration / video_clip.duration)
53 |         video_clip = concatenate_videoclips([video_clip] * loops_required)
54 |     # Set the audio of the video clip to match the audio duration
55 |     video_clip = video_clip.set_audio(audio_clip)
56 |     video_clip = video_clip.subclip(0, audio_clip.duration)
57 | 
58 |     merged_filename = f"merged_{video_filename}"
59 |     video_clip.write_videofile(merged_filename, codec="libx264", audio_codec="aac")
60 |     return merged_filename
61 | 
62 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # YouTube Shorts Generator Script Documentation
 2 | 
 3 | ## Overview
 4 | This script automates the creation of YouTube Shorts videos on a given topic. It uses OpenAI's GPT-4o for script generation, ElevenLabs for voiceovers, and Fal AI with Flux Schnell for image generation.
 5 | 
 6 | ### Key Features:
 7 | - **Script Generation:** Leverages GPT-4o-2024-05-13 to create engaging scripts.
 8 | - **Voiceover Creation:** Uses ElevenLabs API for high-quality voiceovers.
 9 | - **Image Generation:** Uses Fal AI with Flux Schnell model for generating relevant images.
10 | - **Automated Video Assembly:** Combines audio and generated images into a complete YouTube Short.
11 | 
12 | ## Dependencies
13 | - Python 3.x
14 | - `requests`
15 | - `json`
16 | - `python-dotenv`
17 | - `openai`
18 | - `moviepy`
19 | 
20 | ## Setup Instructions
21 | 
22 | ### Option 1: Quick Setup (Recommended)
23 | Run the installation script:
24 | ```bash
25 | ./install.sh
26 | ```
27 | 
28 | ### Option 2: Manual Setup
29 | 1. **Install Dependencies:** Ensure Python 3.x is installed, then run:
30 |    ```bash
31 |    pip install -r requirements.txt
32 |    ```
33 |    
34 |    *Note: We use locked versions to ensure compatibility. The `requirements.txt` includes specific versions that have been tested and confirmed working together.*
35 | 
36 | 2. **Environment Variables:** Set up `.env` file with API keys:
37 |    - `OPEN_API_KEY` for OpenAI.
38 |    - `FAL_API_KEY` for Fal AI.
39 |    - `ELVEN_LABS_API_KEY` for ElevenLabs.
40 | 
41 | ## Usage
42 | Run the script with a topic as an argument: `python main.py "Your Topic"`.
43 | 
44 | ### Script Workflow:
45 | 1. **Topic Input:** Accepts a topic for the YouTube Short.
46 | 2. **Scenario Generation:** Uses GPT-4o-2024-05-13 to generate a video script based on the topic.
47 | 3. **Image Generation:** Creates relevant images using Fal AI with Flux Schnell model.
48 | 4. **Audio Generation:** Creates a voiceover using ElevenLabs API.
49 | 5. **Video Assembly:** Merges audio and generated images into video clips, and outputs the final video.
50 | 
51 | ## Functions Overview
52 | - `generate_audio_from_text(text, scene_id, voice_id)`: Generates a voiceover for a given text.
53 | - `generate_images_from_fal(keywords, scene_id)`: Generates images using Fal AI with Flux Schnell model based on provided keywords.
54 | - `create_video_from_images(images, scene_id, duration)`: Creates a video from generated images.
55 | - `generate_scenario(topic)`: Generates the script and scene descriptions using GPT-4o-2024-05-13.
56 | 
57 | ## Limitations & Notes
58 | - **API Quotas:** Be aware of rate limits and quotas for OpenAI, Fal AI, and ElevenLabs APIs.
59 | - **Image Quality:** The quality of the final video depends on the generated images and TTS quality.
60 | - **Error Handling:** The script includes basic error handling, which can be expanded for robustness.
61 | 
62 | ## Future Enhancements
63 | - **Customization Options:** Adding more options for voice and style customization in voiceovers.
64 | - **Improved Error Handling:** Enhance error detection and handling for more stability.
65 | - **User Interface:** Develop a GUI for easier interaction and topic input.
66 | - **Image Style Control:** Add options to control the style and theme of generated images.
67 | 
68 | ## Recent Updates
69 | - **✅ Updated to GPT-4o-2024-05-13** for improved script generation
70 | - **✅ Replaced Pexels with Fal AI + Flux Schnell** for high-quality image generation
71 | - **✅ Added automatic video creation** from generated images  
72 | - **✅ Fixed compatibility issues** between MoviePy and newer Pillow versions
73 | - **✅ Locked dependency versions** in `requirements.txt` to prevent compatibility issues
74 | 
75 | ## Important Notes
76 | - **Dependency Versions:** This project uses specific library versions (see `requirements.txt`) that have been tested for compatibility. Using different versions may cause issues, particularly with MoviePy and Pillow.
77 | - **PIL Compatibility:** The code includes a compatibility fix for newer Pillow versions with older MoviePy.
78 | - **System Dependencies:** Make sure you have the required system libraries for image processing (automatically installed in most environments).
79 | 
80 | ## Contributing
81 | Contributions to improve or extend the script are welcome. Please adhere to standard coding practices and provide documentation for any changes.
82 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | from dotenv import load_dotenv
  4 | import os
  5 | from openai import OpenAI
  6 | from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip
  7 | import os
  8 | from format_video import reformat_video , merge_audio_video
  9 | import sys
 10 | 
 11 | # Fix for PIL.Image.ANTIALIAS compatibility issue
 12 | try:
 13 |     from PIL import Image
 14 |     if not hasattr(Image, 'ANTIALIAS'):
 15 |         Image.ANTIALIAS = Image.LANCZOS
 16 | except ImportError:
 17 |     pass
 18 | # Load environment variables
 19 | load_dotenv()
 20 | # Get API keys from environment variables
 21 | openai_api_key = os.getenv("OPEN_API_KEY")
 22 | fal_api_key = os.getenv("FAL_API_KEY")
 23 | elevenlabs_api_key = os.getenv("ELVEN_LABS_API_KEY")
 24 | scenes = []
 25 | 
 26 | def generate_audio_from_text(text, scene_id, voice_id = "21m00Tcm4TlvDq8ikWAM"):
 27 |     url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
 28 | 
 29 |     payload = {
 30 |         "model_id": "eleven_multilingual_v2",
 31 |         "voice":"Bella",
 32 |         "text": text,
 33 |         "voice_settings": {
 34 |             "similarity_boost": 0.5,
 35 |             "stability": 0.5,
 36 |             # "style": 123,            # Example value
 37 |             # "use_speaker_boost": True
 38 |         }
 39 |     }
 40 |     headers = {
 41 |         "xi-api-key": f"{elevenlabs_api_key}",
 42 |         "Content-Type": "application/json"
 43 |     }
 44 | 
 45 |     response = requests.post(url, json=payload, headers=headers)
 46 |     if response.status_code == 200:
 47 |         # downondld from response.text
 48 |         with open(f"scene_{scene_id}.mp3", "wb") as f:
 49 |             f.write(response.content)
 50 |     else:
 51 |         print(f"Failed to generate audio for scene {scene_id}: {response.text}")
 52 | 
 53 | 
 54 | def generate_images_from_fal(keywords, scene_id):
 55 |     """Generate images using Fal API with Flux Schnell model"""
 56 |     keywords_list = keywords.split(',')
 57 |     generated_images = []
 58 |     
 59 |     for i, keyword in enumerate(keywords_list):
 60 |         keyword = keyword.strip()
 61 |         if not keyword:
 62 |             continue
 63 |             
 64 |         # Create a more detailed prompt for better image generation
 65 |         prompt = f"High quality, cinematic, professional photograph of {keyword}, detailed, realistic, good lighting"
 66 |         
 67 |         url = "https://fal.run/fal-ai/flux/schnell"
 68 |         
 69 |         payload = {
 70 |             "prompt": prompt,
 71 |             "image_size": "portrait_16_9",
 72 |             "num_inference_steps": 4,
 73 |             "enable_safety_checker": True
 74 |         }
 75 |         
 76 |         headers = {
 77 |             "Authorization": f"Key {fal_api_key}",
 78 |             "Content-Type": "application/json"
 79 |         }
 80 |         
 81 |         try:
 82 |             response = requests.post(url, json=payload, headers=headers)
 83 |             
 84 |             if response.status_code == 200:
 85 |                 result = response.json()
 86 |                 if 'images' in result and len(result['images']) > 0:
 87 |                     image_url = result['images'][0]['url']
 88 |                     
 89 |                     # Download the generated image
 90 |                     image_response = requests.get(image_url)
 91 |                     if image_response.status_code == 200:
 92 |                         filename = f'scene_{scene_id}_image_{i}.jpg'
 93 |                         with open(filename, 'wb') as f:
 94 |                             f.write(image_response.content)
 95 |                         generated_images.append(filename)
 96 |                         print(f"Generated image for '{keyword}': {filename}")
 97 |                     else:
 98 |                         print(f"Failed to download image for '{keyword}'")
 99 |                 else:
100 |                     print(f"No images generated for '{keyword}'")
101 |             else:
102 |                 print(f"Failed to generate image for '{keyword}': {response.text}")
103 |                 
104 |         except Exception as e:
105 |             print(f"Error generating image for '{keyword}': {str(e)}")
106 |     
107 |     return generated_images
108 | 
109 | 
110 | def create_video_from_images(images, scene_id, duration=5):
111 |     """Create a video from generated images"""
112 |     if not images:
113 |         print(f"No images available for scene {scene_id}")
114 |         return None
115 |     
116 |     try:
117 |         video_clips = []
118 |         clip_duration = duration / len(images) if len(images) > 0 else duration
119 |         
120 |         for image_path in images:
121 |             if os.path.exists(image_path):
122 |                 clip = ImageClip(image_path, duration=clip_duration)
123 |                 clip = clip.resize((1080, 1920))  # Portrait orientation for YouTube Shorts
124 |                 video_clips.append(clip)
125 |         
126 |         if video_clips:
127 |             final_clip = concatenate_videoclips(video_clips, method="compose")
128 |             video_filename = f'scene_{scene_id}_video.mp4'
129 |             final_clip.write_videofile(video_filename, codec="libx264", fps=24)
130 |             final_clip.close()
131 |             
132 |             # Clean up image files
133 |             for image_path in images:
134 |                 if os.path.exists(image_path):
135 |                     os.remove(image_path)
136 |             
137 |             print(f"Created video from images: {video_filename}")
138 |             return video_filename
139 |         else:
140 |             print(f"No valid images found for scene {scene_id}")
141 |             return None
142 |             
143 |     except Exception as e:
144 |         print(f"Error creating video from images for scene {scene_id}: {str(e)}")
145 |         return None
146 | 
147 | # Function to generate scenarios using OpenAI API
148 | def generate_scenario(topic):
149 |     # OpenAI API Client setup...
150 |     # ... existing code ...
151 |     client = OpenAI(
152 |     # This is the default and can be omitted
153 |         api_key=openai_api_key
154 |     )
155 | 
156 |     tools = [
157 |         {
158 |             "name": "get_scenes",
159 |             "description": "Get the scenes for a video voiceover script without scene descriptions",
160 |             "parameters": {
161 |                 "type": "object",
162 |                 "properties": {
163 |                     "scenes": {
164 |                         "type": "array",
165 |                         "description": "The scenes for the video voiceover script",
166 |                         "items": {
167 |                             "type": "object",
168 |                             "properties": {
169 |                                 "id": {"type": "integer"},
170 |                                 "script": {"type": "string" , "description": "The script for the scene.MUST include only the text that will be spoken by the narrator"},
171 |                                 "keywords": {"type": "string" , "description": "Relevant keywords for image generation using AI, describe visual elements that would complement the narration"},
172 |                             },            
173 |                 },
174 |             },
175 |             },
176 |         }
177 |         }
178 |     ]
179 | 
180 |     try:
181 |         response = client.chat.completions.create(
182 |             model="gpt-4o-2024-05-13",
183 |              messages=[
184 |         {"role": "system", "content": "You are youtube  creator. You are creating a 1mn video script for a given topic"},
185 |         {"role": "user", "content":  topic},
186 |         ],
187 |         functions = tools,
188 |         function_call = {
189 |             "name": "get_scenes",
190 |         },
191 |         )
192 |         # Loading the response as a JSON object
193 |         json_response = json.loads(response.choices[0].message.function_call.arguments)
194 |         return response.choices[0].message.function_call.arguments
195 |     except Exception as e:
196 |         print(e)
197 |         return None
198 | 
199 | 
200 | # Check if the first argument as the topic exists
201 | if len(sys.argv) > 1:
202 |     topic = sys.argv[1]
203 |     response = generate_scenario(topic)
204 |     if response:
205 |         json_response = json.loads(response)
206 |         
207 |         for scene in json_response['scenes']:
208 |             # Get scene ID, handle different possible keys
209 |             scene_id = scene.get('id') or scene.get('scene_id') or len(scenes) + 1
210 |             
211 |             # Generate images using Fal API
212 |             generated_images = generate_images_from_fal(scene['keywords'], scene_id)
213 |             
214 |             # Generate audio
215 |             generate_audio_from_text(scene['script'], scene_id)
216 |             
217 |             # Create video from generated images
218 |             video_filename = create_video_from_images(generated_images, scene_id)
219 |             
220 |             if video_filename:
221 |                 scenes.append((video_filename, f"scene_{scene_id}.mp3"))
222 |                 print(f"Created video for scene {scene_id}: {video_filename}")
223 |             else:
224 |                 print(f"Could not create video for scene {scene_id}")
225 |         
226 |         merged_clips = []
227 |         for video_filename, audio_filename in scenes:
228 |             resized_video = reformat_video(video_filename)
229 |             merged_clip = merge_audio_video(audio_filename, resized_video)
230 |             merged_clips.append(VideoFileClip(merged_clip))
231 | 
232 |         # Concatenate all the clips
233 |         final_clip = concatenate_videoclips(merged_clips, method="compose")
234 |         final_clip.write_videofile("final_youtube_short.mp4", codec="libx264", audio_codec="aac")
235 | 
236 |         # Clean up
237 |         for clip in merged_clips:
238 |             os.remove(clip.filename)
239 | else:
240 |     print("Please provide a topic as an argument")


--------------------------------------------------------------------------------