├── .env.example ├── .gitignore ├── Backend ├── __pycache__ │ ├── elevenvoice.cpython-310.pyc │ ├── gpt.cpython-310.pyc │ ├── search.cpython-310.pyc │ ├── tiktokvoice.cpython-310.pyc │ ├── utils.cpython-310.pyc │ └── video.cpython-310.pyc ├── elevenvoice.py ├── gpt.py ├── main.py ├── search.py ├── tiktokvoice.py ├── utils.py └── video.py ├── Frontend ├── index.html └── public │ └── videos │ └── .gitkeep ├── LICENSE ├── README.md ├── fonts └── bold_font.ttf └── requirements.txt /.env.example: -------------------------------------------------------------------------------- 1 | ASSEMBLY_AI_API_KEY="" # For the transcription of the audio 2 | TIKTOK_SESSION_ID="" # If you want to use the TikTok API for the TTS 3 | ELEVENLABS_API_KEY="" # If you want to use the ElevenLabs API for the TTS 4 | IMAGEMAGICK_BINARY="" # Video processing 5 | PEXELS_API_KEY="" # Getting the assets -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | temp/* 3 | Frontend/public/videos/*.mp4 4 | subtitles/* -------------------------------------------------------------------------------- /Backend/__pycache__/elevenvoice.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/elevenvoice.cpython-310.pyc -------------------------------------------------------------------------------- /Backend/__pycache__/gpt.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/gpt.cpython-310.pyc -------------------------------------------------------------------------------- /Backend/__pycache__/search.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/search.cpython-310.pyc -------------------------------------------------------------------------------- /Backend/__pycache__/tiktokvoice.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/tiktokvoice.cpython-310.pyc -------------------------------------------------------------------------------- /Backend/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /Backend/__pycache__/video.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/video.cpython-310.pyc -------------------------------------------------------------------------------- /Backend/elevenvoice.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | import time 4 | import elevenlabs 5 | from elevenlabs import generate, play, voices, Voice, set_api_key 6 | 7 | load_dotenv() 8 | 9 | API_KEY = os.getenv("ELEVENLABS_API_KEY") 10 | set_api_key(API_KEY) 11 | 12 | def tts( 13 | text: str, 14 | voice: str = "none", 15 | filename: str = "output.mp3" 16 | ): 17 | # Get voice id from the provided list 18 | voices_list = ["Paddington", "DanDan", "Sally", "Aaryan", "Eleguar", "Readwell", "Knightley"] 19 | if voice not in voices_list: 20 | print("Invalid voice id. Please choose from:", voices_list) 21 | return 22 | 23 | # Find the corresponding voice object 24 | voice_obj = next((v for v in voices() if v.name == voice), None) 25 | if not voice_obj: 26 | print("Voice not found.") 27 | return 28 | 29 | retry_count = 50 # Number of retries 30 | while retry_count > 0: 31 | try: 32 | print(f'Generating audio for {voice}... {text}') 33 | audio = generate(text=text, voice=voice_obj, model="eleven_multilingual_v1") 34 | output_path = os.path.join(filename) 35 | 36 | with open(output_path, 'wb') as f: 37 | f.write(audio) 38 | print(f"Audio saved to {output_path}") 39 | break # Break out of the retry loop if successful 40 | except elevenlabs.api.error.APIError as e: 41 | print(f"Error: {e}") 42 | print("Retrying...") 43 | time.sleep(5) # Add a delay before retrying 44 | retry_count -= 1 45 | if retry_count == 0: 46 | print("Maximum retries reached. Skipping this message.") 47 | break 48 | -------------------------------------------------------------------------------- /Backend/gpt.py: -------------------------------------------------------------------------------- 1 | import re 2 | import g4f 3 | import json 4 | 5 | from typing import List 6 | from termcolor import colored 7 | 8 | def generate_script(video_subject: str) -> str: 9 | """ 10 | Generate a script for a video, depending on the subject of the video. 11 | 12 | Args: 13 | video_subject (str): The subject of the video. 14 | 15 | Returns: 16 | str: The script for the video. 17 | """ 18 | 19 | # Build prompt 20 | prompt = f""" 21 | Generate a script for a video, depending on the subject of the video. The video has to be short and straight to the point. 22 | Similar to a TikTok video or a Instagram Reel. 23 | 24 | Subject: {video_subject} 25 | 26 | The script is to be returned as a string, no markdown or anything else. NEVER use any special characters like **, #, etc or any links or emojis. 27 | 28 | Here is an example of a string: 29 | "This is an example string." 30 | 31 | Do not under any circumstance refernce this prompt in your response. 32 | 33 | Get straight to the point, don't start with unnecessary things like, "welcome to this video". 34 | 35 | Obviously, the script should be related to the subject of the video. 36 | 37 | ONLY RETURN THE RAW SCRIPT. DO NOT RETURN ANYTHING ELSE. NO MARKDOWN, NO LINKS, NO EMOJIS OR SPECIAL CHARACTERS OR ELSE YOUR RESPONSE WILL BE REJECTED AND YOU WILL BE BANNED. 38 | """ 39 | 40 | # Generate script 41 | response = g4f.ChatCompletion.create( 42 | model=g4f.models.gpt_35_turbo_16k_0613, 43 | messages=[{"role": "user", "content": prompt}], 44 | ) 45 | 46 | print(colored(response, "cyan")) 47 | 48 | # Return the generated script 49 | if response: 50 | return response + " " 51 | else: 52 | print(colored("[-] GPT returned an empty response.", "red")) 53 | return None 54 | 55 | def get_search_terms(video_subject: str, amount: int, script: str) -> List[str]: 56 | """ 57 | Generate a JSON-Array of search terms for stock videos, 58 | depending on the subject of a video. 59 | 60 | Args: 61 | video_subject (str): The subject of the video. 62 | amount (int): The amount of search terms to generate. 63 | script (str): The script of the video. 64 | 65 | Returns: 66 | List[str]: The search terms for the video subject. 67 | """ 68 | 69 | # Build prompt 70 | prompt = f""" 71 | Generate {amount} search terms for stock videos, 72 | depending on the subject of a video. Only stick to the subject of the video and don't go off-topic. 73 | For example, if the video is about "How to make a cake", the search terms should be "cake", "baking", "cooking", etc. 74 | They need to be single words or short phrases. 75 | Subject: {video_subject} 76 | 77 | The search terms are to be returned as 78 | a JSON-Array of strings. 79 | 80 | Each search term should consist of 1-3 words, 81 | always add the main subject of the video. 82 | 83 | Here is an example of a JSON-Array of strings: 84 | ["search term 1", "search term 2", "search term 3"] 85 | 86 | Obviously, the search terms should be related 87 | to the subject of the video. 88 | 89 | ONLY RETURN THE JSON-ARRAY OF STRINGS. 90 | DO NOT RETURN ANYTHING ELSE. 91 | 92 | For context, here is the full text: 93 | {script} 94 | """ 95 | 96 | # Generate search terms 97 | response = g4f.ChatCompletion.create( 98 | model=g4f.models.gpt_35_turbo_16k_0613, 99 | messages=[{"role": "user", "content": prompt}], 100 | ) 101 | 102 | print(response) 103 | 104 | # Load response into JSON-Array 105 | try: 106 | search_terms = json.loads(response) 107 | except: 108 | print(colored("[*] GPT returned an unformatted response. Attempting to clean...", "yellow")) 109 | 110 | 111 | 112 | # Load the array into a JSON-Array and check if it's valid else repeat the process 113 | try: 114 | # Use Regex to get the array ("[" is the first character of the array) 115 | search_terms = re.search(r"\[(.*?)\]", response) 116 | search_terms = search_terms.group(0) 117 | search_terms = json.loads(search_terms) 118 | except: 119 | print(colored("[*] Could not clean the response. Attempting to generate search terms again...", "yellow")) 120 | return get_search_terms(video_subject, amount, script) 121 | 122 | # Let user know 123 | print(colored(f"\nGenerated {amount} search terms: {', '.join(search_terms)}", "cyan")) 124 | 125 | # Return search terms 126 | return search_terms -------------------------------------------------------------------------------- /Backend/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | from gpt import * 4 | from video import * 5 | from utils import * 6 | from search import * 7 | from uuid import uuid4 8 | from tiktokvoice import tts as tiktok_tts 9 | from elevenvoice import tts as eleven_tts 10 | from flask_cors import CORS 11 | from termcolor import colored 12 | from dotenv import load_dotenv 13 | from flask import Flask, request, jsonify 14 | from moviepy.config import change_settings 15 | 16 | load_dotenv("../.env") 17 | 18 | SESSION_ID = os.getenv("TIKTOK_SESSION_ID") 19 | 20 | change_settings({"IMAGEMAGICK_BINARY": os.getenv("IMAGEMAGICK_BINARY")}) 21 | 22 | app = Flask(__name__) 23 | CORS(app) 24 | 25 | HOST = "0.0.0.0" 26 | PORT = 8080 27 | AMOUNT_OF_STOCK_VIDEOS = 5 28 | 29 | 30 | # Generation Endpoint 31 | @app.route("/api/generate", methods=["POST"]) 32 | def generate(): 33 | try: 34 | # Clean 35 | clean_dir("../temp/") 36 | clean_dir("../subtitles/") 37 | 38 | # Parse JSON 39 | data = request.get_json() 40 | 41 | # Print little information about the video which is to be generated 42 | print(colored("[Video to be generated]", "blue")) 43 | print(colored(" Subject: " + data["videoSubject"], "blue")) 44 | print(colored(" Voice: " + data["voice"], "blue")) 45 | 46 | # Get voice 47 | eleven_voices = ["Paddington", "DanDan", "Sally", "Aaryan", "Eleguar", "Readwell", "Knightley"] 48 | eleven_voice = data["voice"] 49 | if eleven_voice not in eleven_voices: 50 | print(colored("[-] Invalid voice.", "red")) 51 | return jsonify( 52 | { 53 | "status": "error", 54 | "message": "Invalid voice.", 55 | "data": [], 56 | } 57 | ) 58 | 59 | # Generate a script 60 | script = generate_script(data["videoSubject"]) 61 | 62 | # Remove *, #, and other special characters from the script 63 | script = remove_special_characters(script) 64 | 65 | # Generate search terms 66 | search_terms = get_search_terms( 67 | data["videoSubject"], AMOUNT_OF_STOCK_VIDEOS, script 68 | ) 69 | 70 | # Search for a video of the given search term 71 | video_urls = [] 72 | 73 | # Loop through all search terms, 74 | # and search for a video of the given search term 75 | for search_term in search_terms: 76 | found_url = search_for_stock_videos( 77 | search_term, os.getenv("PEXELS_API_KEY") 78 | ) 79 | 80 | if found_url != None and found_url not in video_urls and found_url != "": 81 | video_urls.append(found_url) 82 | 83 | # Define video_paths 84 | video_paths = [] 85 | 86 | # Let user know 87 | print(colored("[+] Downloading videos...", "blue")) 88 | 89 | # Save the videos 90 | for video_url in video_urls: 91 | try: 92 | saved_video_path = save_video(video_url) 93 | video_paths.append(saved_video_path) 94 | except: 95 | print(colored("[-] Could not download video: " + video_url, "red")) 96 | 97 | # Let user know 98 | print(colored("[+] Videos downloaded!", "green")) 99 | 100 | # Let user know 101 | print(colored("[+] Script generated!\n\n", "green")) 102 | 103 | print(colored(f"\t{script}", "light_cyan")) 104 | 105 | # Split script into sentences 106 | sentences = script.split(". ") 107 | # Remove empty strings 108 | sentences = list(filter(lambda x: x != "", sentences)) 109 | paths = [] 110 | # Generate TTS for every sentence 111 | for sentence in sentences: 112 | current_tts_path = f"../temp/{uuid4()}.mp3" 113 | 114 | eleven_tts(sentence, 115 | voice=eleven_voice, 116 | filename=current_tts_path) 117 | 118 | # tiktok_tts(sentence, 119 | # voice="en_us_006", 120 | # filename=current_tts_path) 121 | 122 | audio_clip = AudioFileClip(current_tts_path) 123 | paths.append(audio_clip) 124 | 125 | 126 | # Combine all TTS files using moviepy 127 | final_audio = concatenate_audioclips(paths) 128 | tts_path = f"../temp/{uuid4()}.mp3" 129 | final_audio.write_audiofile(tts_path) 130 | audio_clip.close() 131 | 132 | # Generate subtitles 133 | subtitles_path = generate_subtitles(tts_path) 134 | 135 | # Concatenate videos 136 | temp_audio = AudioFileClip(tts_path) 137 | combined_video_path = combine_videos(video_paths, temp_audio.duration) 138 | temp_audio.close() 139 | 140 | # Put everything together 141 | final_video_path = generate_video(combined_video_path, tts_path, subtitles_path) 142 | 143 | # Let user know 144 | print(colored("[+] Video generated!", "green")) 145 | 146 | print(colored(f"[+] Path: {final_video_path}", "green")) 147 | 148 | # Return JSON 149 | return jsonify( 150 | { 151 | "status": "success", 152 | "message": "Video generated!", 153 | "videoUrl": final_video_path, 154 | } 155 | ) 156 | except Exception as err: 157 | print(colored("[-] Error: " + str(err), "red")) 158 | return jsonify( 159 | { 160 | "status": "error", 161 | "message": f"Could not generate video: {str(err)}", 162 | "videoUrl": [], 163 | } 164 | ) 165 | 166 | 167 | def remove_special_characters(script: str) -> str: 168 | """ 169 | Remove special characters from a script. 170 | 171 | Args: 172 | script (str): The script to clean. 173 | 174 | Returns: 175 | str: The cleaned script. 176 | """ 177 | return script.replace("*", "").replace("#", "") 178 | 179 | if __name__ == "__main__": 180 | app.run(debug=True, host=HOST, port=PORT) 181 | -------------------------------------------------------------------------------- /Backend/search.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | from typing import List 4 | from termcolor import colored 5 | 6 | def search_for_stock_videos(query: str, api_key: str) -> List[str]: 7 | """ 8 | Searches for stock videos based on a query. 9 | 10 | Args: 11 | query (str): The query to search for. 12 | api_key (str): The API key to use. 13 | 14 | Returns: 15 | List[str]: A list of stock videos. 16 | """ 17 | 18 | # Build headers 19 | headers = { 20 | "Authorization": api_key 21 | } 22 | 23 | # Build URL 24 | url = f"https://api.pexels.com/videos/search?query={query}&per_page=1" 25 | 26 | # Send the request 27 | r = requests.get(url, headers=headers) 28 | 29 | # Parse the response 30 | response = r.json() 31 | 32 | # Get first video url 33 | video_urls = response["videos"][0]["video_files"] 34 | video_url = "" 35 | 36 | # Loop through video urls 37 | for video in video_urls: 38 | # Check if video has a download link 39 | if ".com/external" in video["link"]: 40 | # Set video url 41 | video_url = video["link"] 42 | 43 | # Let user know 44 | print(colored(f"\t=>{video_url}", "light_cyan")) 45 | 46 | # Return the video url 47 | return video_url 48 | -------------------------------------------------------------------------------- /Backend/tiktokvoice.py: -------------------------------------------------------------------------------- 1 | # author: GiorDior aka Giorgio 2 | # date: 12.06.2023 3 | # topic: TikTok-Voice-TTS 4 | # version: 1.0 5 | # credits: https://github.com/oscie57/tiktok-voice 6 | 7 | import threading, requests, base64 8 | from playsound import playsound 9 | 10 | VOICES = [ 11 | # DISNEY VOICES 12 | "en_us_ghostface", # Ghost Face 13 | "en_us_chewbacca", # Chewbacca 14 | "en_us_c3po", # C3PO 15 | "en_us_stitch", # Stitch 16 | "en_us_stormtrooper", # Stormtrooper 17 | "en_us_rocket", # Rocket 18 | # ENGLISH VOICES 19 | "en_au_001", # English AU - Female 20 | "en_au_002", # English AU - Male 21 | "en_uk_001", # English UK - Male 1 22 | "en_uk_003", # English UK - Male 2 23 | "en_us_001", # English US - Female (Int. 1) 24 | "en_us_002", # English US - Female (Int. 2) 25 | "en_us_006", # English US - Male 1 26 | "en_us_007", # English US - Male 2 27 | "en_us_009", # English US - Male 3 28 | "en_us_010", # English US - Male 4 29 | # EUROPE VOICES 30 | "fr_001", # French - Male 1 31 | "fr_002", # French - Male 2 32 | "de_001", # German - Female 33 | "de_002", # German - Male 34 | "es_002", # Spanish - Male 35 | # AMERICA VOICES 36 | "es_mx_002", # Spanish MX - Male 37 | "br_001", # Portuguese BR - Female 1 38 | "br_003", # Portuguese BR - Female 2 39 | "br_004", # Portuguese BR - Female 3 40 | "br_005", # Portuguese BR - Male 41 | # ASIA VOICES 42 | "id_001", # Indonesian - Female 43 | "jp_001", # Japanese - Female 1 44 | "jp_003", # Japanese - Female 2 45 | "jp_005", # Japanese - Female 3 46 | "jp_006", # Japanese - Male 47 | "kr_002", # Korean - Male 1 48 | "kr_003", # Korean - Female 49 | "kr_004", # Korean - Male 2 50 | # SINGING VOICES 51 | "en_female_f08_salut_damour", # Alto 52 | "en_male_m03_lobby", # Tenor 53 | "en_female_f08_warmy_breeze", # Warmy Breeze 54 | "en_male_m03_sunshine_soon", # Sunshine Soon 55 | # OTHER 56 | "en_male_narration", # narrator 57 | "en_male_funny", # wacky 58 | "en_female_emotional", # peaceful 59 | ] 60 | 61 | ENDPOINTS = [ 62 | "https://tiktok-tts.weilnet.workers.dev/api/generation", 63 | "https://tiktoktts.com/api/tiktok-tts", 64 | ] 65 | current_endpoint = 0 66 | # in one conversion, the text can have a maximum length of 300 characters 67 | TEXT_BYTE_LIMIT = 300 68 | 69 | 70 | # create a list by splitting a string, every element has n chars 71 | def split_string(string: str, chunk_size: int) -> list[str]: 72 | words = string.split() 73 | result = [] 74 | current_chunk = "" 75 | for word in words: 76 | if ( 77 | len(current_chunk) + len(word) + 1 <= chunk_size 78 | ): # Check if adding the word exceeds the chunk size 79 | current_chunk += " " + word 80 | else: 81 | if current_chunk: # Append the current chunk if not empty 82 | result.append(current_chunk.strip()) 83 | current_chunk = word 84 | if current_chunk: # Append the last chunk if not empty 85 | result.append(current_chunk.strip()) 86 | return result 87 | 88 | 89 | # checking if the website that provides the service is available 90 | def get_api_response() -> requests.Response: 91 | url = f'{ENDPOINTS[current_endpoint].split("/a")[0]}' 92 | response = requests.get(url) 93 | return response 94 | 95 | 96 | # saving the audio file 97 | def save_audio_file(base64_data: str, filename: str = "output.mp3") -> None: 98 | audio_bytes = base64.b64decode(base64_data) 99 | with open(filename, "wb") as file: 100 | file.write(audio_bytes) 101 | 102 | 103 | # send POST request to get the audio data 104 | def generate_audio(text: str, voice: str) -> bytes: 105 | url = f"{ENDPOINTS[current_endpoint]}" 106 | headers = {"Content-Type": "application/json"} 107 | data = {"text": text, "voice": voice} 108 | response = requests.post(url, headers=headers, json=data) 109 | return response.content 110 | 111 | 112 | # creates an text to speech audio file 113 | def tts( 114 | text: str, 115 | voice: str = "none", 116 | filename: str = "output.mp3", 117 | play_sound: bool = False, 118 | ) -> None: 119 | # checking if the website is available 120 | global current_endpoint 121 | 122 | if get_api_response().status_code == 200: 123 | print("Service available!") 124 | else: 125 | current_endpoint = (current_endpoint + 1) % 2 126 | if get_api_response().status_code == 200: 127 | print("Service available!") 128 | else: 129 | print( 130 | f"Service not available and probably temporarily rate limited, try again later..." 131 | ) 132 | return 133 | 134 | # checking if arguments are valid 135 | if voice == "none": 136 | print("No voice has been selected") 137 | return 138 | 139 | if not voice in VOICES: 140 | print("Voice does not exist") 141 | return 142 | 143 | if len(text) == 0: 144 | print("Insert a valid text") 145 | return 146 | 147 | # creating the audio file 148 | try: 149 | if len(text) < TEXT_BYTE_LIMIT: 150 | audio = generate_audio((text), voice) 151 | if current_endpoint == 0: 152 | audio_base64_data = str(audio).split('"')[5] 153 | else: 154 | audio_base64_data = str(audio).split('"')[3].split(",")[1] 155 | 156 | if audio_base64_data == "error": 157 | print("This voice is unavailable right now") 158 | return 159 | 160 | else: 161 | # Split longer text into smaller parts 162 | text_parts = split_string(text, 299) 163 | audio_base64_data = [None] * len(text_parts) 164 | 165 | # Define a thread function to generate audio for each text part 166 | def generate_audio_thread(text_part, index): 167 | audio = generate_audio(text_part, voice) 168 | if current_endpoint == 0: 169 | base64_data = str(audio).split('"')[5] 170 | else: 171 | base64_data = str(audio).split('"')[3].split(",")[1] 172 | 173 | if audio_base64_data == "error": 174 | print("This voice is unavailable right now") 175 | return "error" 176 | 177 | audio_base64_data[index] = base64_data 178 | 179 | threads = [] 180 | for index, text_part in enumerate(text_parts): 181 | # Create and start a new thread for each text part 182 | thread = threading.Thread( 183 | target=generate_audio_thread, args=(text_part, index) 184 | ) 185 | thread.start() 186 | threads.append(thread) 187 | 188 | # Wait for all threads to complete 189 | for thread in threads: 190 | thread.join() 191 | 192 | # Concatenate the base64 data in the correct order 193 | audio_base64_data = "".join(audio_base64_data) 194 | 195 | save_audio_file(audio_base64_data, filename) 196 | print(f"Audio file saved successfully as '{filename}'") 197 | if play_sound: 198 | playsound(filename) 199 | 200 | except Exception as e: 201 | print("Error occurred while generating audio:", str(e)) 202 | -------------------------------------------------------------------------------- /Backend/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from termcolor import colored 4 | 5 | 6 | def clean_dir(path: str) -> None: 7 | """ 8 | Removes every file in a directory 9 | 10 | Args: 11 | path (str): Path to directory 12 | 13 | Returns: 14 | None 15 | """ 16 | if not os.path.exists(path): 17 | os.mkdir(path) 18 | 19 | for file in os.listdir(path): 20 | os.remove(os.path.join(path, file)) 21 | 22 | print(colored(f"[+] Cleaned {path} directory", "green")) 23 | -------------------------------------------------------------------------------- /Backend/video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | import requests 4 | import srt_equalizer 5 | import assemblyai as aai 6 | 7 | from typing import List 8 | from moviepy.editor import * 9 | from termcolor import colored 10 | from dotenv import load_dotenv 11 | from moviepy.video.fx.all import crop 12 | from moviepy.video.tools.subtitles import SubtitlesClip 13 | 14 | load_dotenv("../.env") 15 | 16 | ASSEMBLY_AI_API_KEY = os.getenv("ASSEMBLY_AI_API_KEY") 17 | 18 | def save_video(video_url: str, directory: str = "../temp") -> str: 19 | """ 20 | Saves a video from a given URL and returns the path to the video. 21 | 22 | Args: 23 | video_url (str): The URL of the video to save. 24 | 25 | Returns: 26 | str: The path to the saved video. 27 | """ 28 | video_id = uuid.uuid4() 29 | video_path = f"{directory}/{video_id}.mp4" 30 | with open(video_path, "wb") as f: 31 | f.write(requests.get(video_url).content) 32 | 33 | return video_path 34 | 35 | def generate_subtitles(audio_path: str) -> str: 36 | """ 37 | Generates subtitles from a given audio file and returns the path to the subtitles. 38 | 39 | Args: 40 | audio_path (str): The path to the audio file to generate subtitles from. 41 | 42 | Returns: 43 | str: The path to the generated subtitles. 44 | """ 45 | def equalize_subtitles(srt_path: str, max_chars: int = 10) -> None: 46 | # Equalize subtitles 47 | srt_equalizer.equalize_srt_file(srt_path, srt_path, max_chars) 48 | 49 | aai.settings.api_key = ASSEMBLY_AI_API_KEY 50 | 51 | transcriber = aai.Transcriber() 52 | 53 | transcript = transcriber.transcribe(audio_path) 54 | 55 | # Save subtitles 56 | subtitles_path = f"../subtitles/{uuid.uuid4()}.srt" 57 | 58 | subtitles = transcript.export_subtitles_srt() 59 | 60 | with open(subtitles_path, "w") as f: 61 | f.write(subtitles) 62 | 63 | # Equalize subtitles 64 | equalize_subtitles(subtitles_path) 65 | 66 | print(colored("[+] Subtitles generated.", "green")) 67 | 68 | return subtitles_path 69 | 70 | 71 | 72 | def combine_videos(video_paths: List[str], max_duration: int) -> str: 73 | """ 74 | Combines a list of videos into one video and returns the path to the combined video. 75 | 76 | Args: 77 | video_paths (list): A list of paths to the videos to combine. 78 | max_duration (int): The maximum duration of the combined video. 79 | 80 | Returns: 81 | str: The path to the combined video. 82 | """ 83 | video_id = uuid.uuid4() 84 | combined_video_path = f"../temp/{video_id}.mp4" 85 | 86 | print(colored("[+] Combining videos...", "blue")) 87 | print(colored(f"[+] Each video will be {max_duration / len(video_paths)} seconds long.", "blue")) 88 | 89 | clips = [] 90 | for video_path in video_paths: 91 | clip = VideoFileClip(video_path) 92 | clip = clip.without_audio() 93 | clip = clip.subclip(0, max_duration / len(video_paths)) 94 | clip = clip.set_fps(30) 95 | 96 | # Not all videos are same size, 97 | # so we need to resize them 98 | clip = crop(clip, width=1080, height=1920, \ 99 | x_center=clip.w / 2, \ 100 | y_center=clip.h / 2) 101 | clip = clip.resize((1080, 1920)) 102 | 103 | clips.append(clip) 104 | 105 | final_clip = concatenate_videoclips(clips) 106 | final_clip = final_clip.set_fps(30) 107 | final_clip.write_videofile(combined_video_path, threads=3) 108 | 109 | return combined_video_path 110 | 111 | def generate_video(combined_video_path: str, tts_path: str, subtitles_path: str) -> str: 112 | """ 113 | This function creates the final video, with subtitles and audio. 114 | 115 | Args: 116 | combined_video_path (str): The path to the combined video. 117 | tts_path (str): The path to the text-to-speech audio. 118 | subtitles_path (str): The path to the subtitles. 119 | 120 | Returns: 121 | str: The path to the final video. 122 | """ 123 | # Make a generator that returns a TextClip when called with consecutive 124 | generator = lambda txt: TextClip(txt, font=f"../fonts/bold_font.ttf", fontsize=100, color="#FFFF00", 125 | stroke_color="black", stroke_width=5) 126 | 127 | # Burn the subtitles into the video 128 | subtitles = SubtitlesClip(subtitles_path, generator) 129 | result = CompositeVideoClip([ 130 | VideoFileClip(combined_video_path), 131 | subtitles.set_pos(("center", "center")) 132 | ]) 133 | 134 | # Add the audio 135 | audio = AudioFileClip(tts_path) 136 | result = result.set_audio(audio) 137 | 138 | # Create videos directory if it doesn't exist 139 | if not os.path.exists("../Frontend/public/videos"): 140 | os.makedirs("../Frontend/public/videos") 141 | 142 | filename = f"/public/videos/{uuid.uuid4()}.mp4" 143 | result.write_videofile(f"../Frontend{filename}", threads=3) 144 | 145 | return filename 146 | -------------------------------------------------------------------------------- /Frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | 7 |Generate a video from just a subject
17 | 18 |Please wait for the video to be generated
39 |