├── .env.example
├── .gitignore
├── Backend
    ├── __pycache__
    │   ├── elevenvoice.cpython-310.pyc
    │   ├── gpt.cpython-310.pyc
    │   ├── search.cpython-310.pyc
    │   ├── tiktokvoice.cpython-310.pyc
    │   ├── utils.cpython-310.pyc
    │   └── video.cpython-310.pyc
    ├── elevenvoice.py
    ├── gpt.py
    ├── main.py
    ├── search.py
    ├── tiktokvoice.py
    ├── utils.py
    └── video.py
├── Frontend
    ├── index.html
    └── public
    │   └── videos
    │       └── .gitkeep
├── LICENSE
├── README.md
├── fonts
    └── bold_font.ttf
└── requirements.txt


/.env.example:
--------------------------------------------------------------------------------
1 | ASSEMBLY_AI_API_KEY="" # For the transcription of the audio
2 | TIKTOK_SESSION_ID="" # If you want to use the TikTok API for the TTS
3 | ELEVENLABS_API_KEY="" # If you want to use the ElevenLabs API for the TTS
4 | IMAGEMAGICK_BINARY="" # Video processing
5 | PEXELS_API_KEY="" # Getting the assets


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | temp/*
3 | Frontend/public/videos/*.mp4
4 | subtitles/*


--------------------------------------------------------------------------------
/Backend/__pycache__/elevenvoice.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/elevenvoice.cpython-310.pyc


--------------------------------------------------------------------------------
/Backend/__pycache__/gpt.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/gpt.cpython-310.pyc


--------------------------------------------------------------------------------
/Backend/__pycache__/search.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/search.cpython-310.pyc


--------------------------------------------------------------------------------
/Backend/__pycache__/tiktokvoice.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/tiktokvoice.cpython-310.pyc


--------------------------------------------------------------------------------
/Backend/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/Backend/__pycache__/video.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Backend/__pycache__/video.cpython-310.pyc


--------------------------------------------------------------------------------
/Backend/elevenvoice.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dotenv import load_dotenv
 3 | import time
 4 | import elevenlabs
 5 | from elevenlabs import generate, play, voices, Voice, set_api_key
 6 | 
 7 | load_dotenv()
 8 | 
 9 | API_KEY = os.getenv("ELEVENLABS_API_KEY")
10 | set_api_key(API_KEY)
11 | 
12 | def tts(
13 |     text: str,
14 |     voice: str = "none",
15 |     filename: str = "output.mp3"
16 | ):
17 |     # Get voice id from the provided list
18 |     voices_list = ["Paddington", "DanDan", "Sally", "Aaryan", "Eleguar", "Readwell", "Knightley"]
19 |     if voice not in voices_list:
20 |         print("Invalid voice id. Please choose from:", voices_list)
21 |         return
22 | 
23 |     # Find the corresponding voice object
24 |     voice_obj = next((v for v in voices() if v.name == voice), None)
25 |     if not voice_obj:
26 |         print("Voice not found.")
27 |         return
28 | 
29 |     retry_count = 50  # Number of retries
30 |     while retry_count > 0:
31 |         try:
32 |             print(f'Generating audio for {voice}... {text}')
33 |             audio = generate(text=text, voice=voice_obj, model="eleven_multilingual_v1")
34 |             output_path = os.path.join(filename)
35 |             
36 |             with open(output_path, 'wb') as f:
37 |                 f.write(audio)
38 |             print(f"Audio saved to {output_path}")
39 |             break  # Break out of the retry loop if successful
40 |         except elevenlabs.api.error.APIError as e:
41 |             print(f"Error: {e}")
42 |             print("Retrying...")
43 |             time.sleep(5)  # Add a delay before retrying
44 |             retry_count -= 1
45 |             if retry_count == 0:
46 |                 print("Maximum retries reached. Skipping this message.")
47 |                 break
48 | 


--------------------------------------------------------------------------------
/Backend/gpt.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import g4f
  3 | import json
  4 | 
  5 | from typing import List
  6 | from termcolor import colored
  7 | 
  8 | def generate_script(video_subject: str) -> str:
  9 |     """
 10 |     Generate a script for a video, depending on the subject of the video.
 11 | 
 12 |     Args:
 13 |         video_subject (str): The subject of the video.
 14 | 
 15 |     Returns:
 16 |         str: The script for the video.
 17 |     """
 18 | 
 19 |     # Build prompt
 20 |     prompt = f"""
 21 |     Generate a script for a video, depending on the subject of the video. The video has to be short and straight to the point.
 22 |     Similar to a TikTok video or a Instagram Reel.
 23 |     
 24 |     Subject: {video_subject}
 25 | 
 26 |     The script is to be returned as a string, no markdown or anything else. NEVER use any special characters like **, #, etc or any links or emojis.
 27 | 
 28 |     Here is an example of a string:
 29 |     "This is an example string."
 30 | 
 31 |     Do not under any circumstance refernce this prompt in your response.
 32 | 
 33 |     Get straight to the point, don't start with unnecessary things like, "welcome to this video".
 34 | 
 35 |     Obviously, the script should be related to the subject of the video.
 36 | 
 37 |     ONLY RETURN THE RAW SCRIPT. DO NOT RETURN ANYTHING ELSE. NO MARKDOWN, NO LINKS, NO EMOJIS OR SPECIAL CHARACTERS OR ELSE YOUR RESPONSE WILL BE REJECTED AND YOU WILL BE BANNED.
 38 |     """
 39 | 
 40 |     # Generate script
 41 |     response = g4f.ChatCompletion.create(
 42 |         model=g4f.models.gpt_35_turbo_16k_0613,
 43 |         messages=[{"role": "user", "content": prompt}],
 44 |     )
 45 | 
 46 |     print(colored(response, "cyan"))
 47 | 
 48 |     # Return the generated script
 49 |     if response:
 50 |         return response + " "
 51 |     else:
 52 |         print(colored("[-] GPT returned an empty response.", "red"))
 53 |         return None
 54 | 
 55 | def get_search_terms(video_subject: str, amount: int, script: str) -> List[str]:
 56 |     """
 57 |     Generate a JSON-Array of search terms for stock videos,
 58 |     depending on the subject of a video.
 59 | 
 60 |     Args:
 61 |         video_subject (str): The subject of the video.
 62 |         amount (int): The amount of search terms to generate.
 63 |         script (str): The script of the video.
 64 | 
 65 |     Returns:
 66 |         List[str]: The search terms for the video subject.
 67 |     """
 68 |     
 69 |     # Build prompt
 70 |     prompt = f"""
 71 |     Generate {amount} search terms for stock videos,
 72 |     depending on the subject of a video. Only stick to the subject of the video and don't go off-topic.
 73 |     For example, if the video is about "How to make a cake", the search terms should be "cake", "baking", "cooking", etc.
 74 |     They need to be single words or short phrases.
 75 |     Subject: {video_subject}
 76 | 
 77 |     The search terms are to be returned as
 78 |     a JSON-Array of strings.
 79 | 
 80 |     Each search term should consist of 1-3 words, 
 81 |     always add the main subject of the video.
 82 | 
 83 |     Here is an example of a JSON-Array of strings:
 84 |     ["search term 1", "search term 2", "search term 3"]
 85 | 
 86 |     Obviously, the search terms should be related
 87 |     to the subject of the video.
 88 | 
 89 |     ONLY RETURN THE JSON-ARRAY OF STRINGS.
 90 |     DO NOT RETURN ANYTHING ELSE.
 91 | 
 92 |     For context, here is the full text:
 93 |     {script}
 94 |     """
 95 | 
 96 |     # Generate search terms
 97 |     response = g4f.ChatCompletion.create(
 98 |         model=g4f.models.gpt_35_turbo_16k_0613,
 99 |         messages=[{"role": "user", "content": prompt}],
100 |     )
101 | 
102 |     print(response)
103 | 
104 |     # Load response into JSON-Array
105 |     try:
106 |         search_terms = json.loads(response)
107 |     except:
108 |         print(colored("[*] GPT returned an unformatted response. Attempting to clean...", "yellow"))
109 | 
110 |         
111 | 
112 |         # Load the array into a JSON-Array and check if it's valid else repeat the process
113 |         try:
114 |             # Use Regex to get the array ("[" is the first character of the array)
115 |             search_terms = re.search(r"\[(.*?)\]", response)
116 |             search_terms = search_terms.group(0)
117 |             search_terms = json.loads(search_terms)
118 |         except:
119 |             print(colored("[*] Could not clean the response. Attempting to generate search terms again...", "yellow"))
120 |             return get_search_terms(video_subject, amount, script)
121 | 
122 |     # Let user know
123 |     print(colored(f"\nGenerated {amount} search terms: {', '.join(search_terms)}", "cyan"))
124 | 
125 |     # Return search terms
126 |     return search_terms


--------------------------------------------------------------------------------
/Backend/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | from gpt import *
  4 | from video import *
  5 | from utils import *
  6 | from search import *
  7 | from uuid import uuid4
  8 | from tiktokvoice import tts as tiktok_tts
  9 | from elevenvoice import tts as eleven_tts
 10 | from flask_cors import CORS
 11 | from termcolor import colored
 12 | from dotenv import load_dotenv
 13 | from flask import Flask, request, jsonify
 14 | from moviepy.config import change_settings
 15 | 
 16 | load_dotenv("../.env")
 17 | 
 18 | SESSION_ID = os.getenv("TIKTOK_SESSION_ID")
 19 | 
 20 | change_settings({"IMAGEMAGICK_BINARY": os.getenv("IMAGEMAGICK_BINARY")})
 21 | 
 22 | app = Flask(__name__)
 23 | CORS(app)
 24 | 
 25 | HOST = "0.0.0.0"
 26 | PORT = 8080
 27 | AMOUNT_OF_STOCK_VIDEOS = 5
 28 | 
 29 | 
 30 | # Generation Endpoint
 31 | @app.route("/api/generate", methods=["POST"])
 32 | def generate():
 33 |     try:
 34 |         # Clean
 35 |         clean_dir("../temp/")
 36 |         clean_dir("../subtitles/")
 37 | 
 38 |         # Parse JSON
 39 |         data = request.get_json()
 40 | 
 41 |         # Print little information about the video which is to be generated
 42 |         print(colored("[Video to be generated]", "blue"))
 43 |         print(colored("   Subject: " + data["videoSubject"], "blue"))
 44 |         print(colored("   Voice: " + data["voice"], "blue"))
 45 |         
 46 |         # Get voice
 47 |         eleven_voices = ["Paddington", "DanDan", "Sally", "Aaryan", "Eleguar", "Readwell", "Knightley"]
 48 |         eleven_voice = data["voice"]
 49 |         if eleven_voice not in eleven_voices:
 50 |             print(colored("[-] Invalid voice.", "red"))
 51 |             return jsonify(
 52 |                 {
 53 |                     "status": "error",
 54 |                     "message": "Invalid voice.",
 55 |                     "data": [],
 56 |                 }
 57 |             )
 58 | 
 59 |         # Generate a script
 60 |         script = generate_script(data["videoSubject"])
 61 |         
 62 |         # Remove *, #, and other special characters from the script
 63 |         script = remove_special_characters(script)
 64 | 
 65 |         # Generate search terms
 66 |         search_terms = get_search_terms(
 67 |             data["videoSubject"], AMOUNT_OF_STOCK_VIDEOS, script
 68 |         )
 69 | 
 70 |         # Search for a video of the given search term
 71 |         video_urls = []
 72 | 
 73 |         # Loop through all search terms,
 74 |         # and search for a video of the given search term
 75 |         for search_term in search_terms:
 76 |             found_url = search_for_stock_videos(
 77 |                 search_term, os.getenv("PEXELS_API_KEY")
 78 |             )
 79 | 
 80 |             if found_url != None and found_url not in video_urls and found_url != "":
 81 |                 video_urls.append(found_url)
 82 | 
 83 |         # Define video_paths
 84 |         video_paths = []
 85 | 
 86 |         # Let user know
 87 |         print(colored("[+] Downloading videos...", "blue"))
 88 | 
 89 |         # Save the videos
 90 |         for video_url in video_urls:
 91 |             try:
 92 |                 saved_video_path = save_video(video_url)
 93 |                 video_paths.append(saved_video_path)
 94 |             except:
 95 |                 print(colored("[-] Could not download video: " + video_url, "red"))
 96 | 
 97 |         # Let user know
 98 |         print(colored("[+] Videos downloaded!", "green"))
 99 | 
100 |         # Let user know
101 |         print(colored("[+] Script generated!\n\n", "green"))
102 | 
103 |         print(colored(f"\t{script}", "light_cyan"))
104 | 
105 |         # Split script into sentences
106 |         sentences = script.split(". ")
107 |         # Remove empty strings
108 |         sentences = list(filter(lambda x: x != "", sentences))
109 |         paths = []
110 |         # Generate TTS for every sentence
111 |         for sentence in sentences:
112 |             current_tts_path = f"../temp/{uuid4()}.mp3"
113 |             
114 |             eleven_tts(sentence, 
115 |                         voice=eleven_voice,
116 |                        filename=current_tts_path)
117 |             
118 |             # tiktok_tts(sentence,
119 |             #            voice="en_us_006",
120 |             #            filename=current_tts_path)
121 |             
122 |             audio_clip = AudioFileClip(current_tts_path)
123 |             paths.append(audio_clip)
124 |             
125 | 
126 |         # Combine all TTS files using moviepy
127 |         final_audio = concatenate_audioclips(paths)
128 |         tts_path = f"../temp/{uuid4()}.mp3"
129 |         final_audio.write_audiofile(tts_path)
130 |         audio_clip.close()
131 | 
132 |         # Generate subtitles
133 |         subtitles_path = generate_subtitles(tts_path)
134 | 
135 |         # Concatenate videos
136 |         temp_audio = AudioFileClip(tts_path)
137 |         combined_video_path = combine_videos(video_paths, temp_audio.duration)
138 |         temp_audio.close()
139 |         
140 |         # Put everything together
141 |         final_video_path = generate_video(combined_video_path, tts_path, subtitles_path)
142 | 
143 |         # Let user know
144 |         print(colored("[+] Video generated!", "green"))
145 | 
146 |         print(colored(f"[+] Path: {final_video_path}", "green"))
147 | 
148 |         # Return JSON
149 |         return jsonify(
150 |             {
151 |                 "status": "success",
152 |                 "message": "Video generated!",
153 |                 "videoUrl": final_video_path,
154 |             }
155 |         )
156 |     except Exception as err:
157 |         print(colored("[-] Error: " + str(err), "red"))
158 |         return jsonify(
159 |             {
160 |                 "status": "error",
161 |                 "message": f"Could not generate video: {str(err)}",
162 |                 "videoUrl": [],
163 |             }
164 |         )
165 |         
166 |     
167 | def remove_special_characters(script: str) -> str:
168 |     """
169 |     Remove special characters from a script.
170 | 
171 |     Args:
172 |         script (str): The script to clean.
173 | 
174 |     Returns:
175 |         str: The cleaned script.
176 |     """
177 |     return script.replace("*", "").replace("#", "")
178 | 
179 | if __name__ == "__main__":
180 |     app.run(debug=True, host=HOST, port=PORT)
181 | 


--------------------------------------------------------------------------------
/Backend/search.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | from typing import List
 4 | from termcolor import colored
 5 | 
 6 | def search_for_stock_videos(query: str, api_key: str) -> List[str]:
 7 |     """
 8 |     Searches for stock videos based on a query.
 9 | 
10 |     Args:
11 |         query (str): The query to search for.
12 |         api_key (str): The API key to use.
13 | 
14 |     Returns:
15 |         List[str]: A list of stock videos.
16 |     """
17 |     
18 |     # Build headers
19 |     headers = {
20 |         "Authorization": api_key
21 |     }
22 | 
23 |     # Build URL
24 |     url = f"https://api.pexels.com/videos/search?query={query}&per_page=1"
25 | 
26 |     # Send the request
27 |     r = requests.get(url, headers=headers)
28 | 
29 |     # Parse the response
30 |     response = r.json()
31 | 
32 |     # Get first video url
33 |     video_urls = response["videos"][0]["video_files"]
34 |     video_url = ""
35 | 
36 |     # Loop through video urls
37 |     for video in video_urls:
38 |         # Check if video has a download link
39 |         if ".com/external" in video["link"]:
40 |             # Set video url
41 |             video_url = video["link"]
42 | 
43 |     # Let user know
44 |     print(colored(f"\t=>{video_url}", "light_cyan"))
45 | 
46 |     # Return the video url
47 |     return video_url
48 | 


--------------------------------------------------------------------------------
/Backend/tiktokvoice.py:
--------------------------------------------------------------------------------
  1 | # author: GiorDior aka Giorgio
  2 | # date: 12.06.2023
  3 | # topic: TikTok-Voice-TTS
  4 | # version: 1.0
  5 | # credits: https://github.com/oscie57/tiktok-voice
  6 | 
  7 | import threading, requests, base64
  8 | from playsound import playsound
  9 | 
 10 | VOICES = [
 11 |     # DISNEY VOICES
 12 |     "en_us_ghostface",  # Ghost Face
 13 |     "en_us_chewbacca",  # Chewbacca
 14 |     "en_us_c3po",  # C3PO
 15 |     "en_us_stitch",  # Stitch
 16 |     "en_us_stormtrooper",  # Stormtrooper
 17 |     "en_us_rocket",  # Rocket
 18 |     # ENGLISH VOICES
 19 |     "en_au_001",  # English AU - Female
 20 |     "en_au_002",  # English AU - Male
 21 |     "en_uk_001",  # English UK - Male 1
 22 |     "en_uk_003",  # English UK - Male 2
 23 |     "en_us_001",  # English US - Female (Int. 1)
 24 |     "en_us_002",  # English US - Female (Int. 2)
 25 |     "en_us_006",  # English US - Male 1
 26 |     "en_us_007",  # English US - Male 2
 27 |     "en_us_009",  # English US - Male 3
 28 |     "en_us_010",  # English US - Male 4
 29 |     # EUROPE VOICES
 30 |     "fr_001",  # French - Male 1
 31 |     "fr_002",  # French - Male 2
 32 |     "de_001",  # German - Female
 33 |     "de_002",  # German - Male
 34 |     "es_002",  # Spanish - Male
 35 |     # AMERICA VOICES
 36 |     "es_mx_002",  # Spanish MX - Male
 37 |     "br_001",  # Portuguese BR - Female 1
 38 |     "br_003",  # Portuguese BR - Female 2
 39 |     "br_004",  # Portuguese BR - Female 3
 40 |     "br_005",  # Portuguese BR - Male
 41 |     # ASIA VOICES
 42 |     "id_001",  # Indonesian - Female
 43 |     "jp_001",  # Japanese - Female 1
 44 |     "jp_003",  # Japanese - Female 2
 45 |     "jp_005",  # Japanese - Female 3
 46 |     "jp_006",  # Japanese - Male
 47 |     "kr_002",  # Korean - Male 1
 48 |     "kr_003",  # Korean - Female
 49 |     "kr_004",  # Korean - Male 2
 50 |     # SINGING VOICES
 51 |     "en_female_f08_salut_damour",  # Alto
 52 |     "en_male_m03_lobby",  # Tenor
 53 |     "en_female_f08_warmy_breeze",  # Warmy Breeze
 54 |     "en_male_m03_sunshine_soon",  # Sunshine Soon
 55 |     # OTHER
 56 |     "en_male_narration",  # narrator
 57 |     "en_male_funny",  # wacky
 58 |     "en_female_emotional",  # peaceful
 59 | ]
 60 | 
 61 | ENDPOINTS = [
 62 |     "https://tiktok-tts.weilnet.workers.dev/api/generation",
 63 |     "https://tiktoktts.com/api/tiktok-tts",
 64 | ]
 65 | current_endpoint = 0
 66 | # in one conversion, the text can have a maximum length of 300 characters
 67 | TEXT_BYTE_LIMIT = 300
 68 | 
 69 | 
 70 | # create a list by splitting a string, every element has n chars
 71 | def split_string(string: str, chunk_size: int) -> list[str]:
 72 |     words = string.split()
 73 |     result = []
 74 |     current_chunk = ""
 75 |     for word in words:
 76 |         if (
 77 |             len(current_chunk) + len(word) + 1 <= chunk_size
 78 |         ):  # Check if adding the word exceeds the chunk size
 79 |             current_chunk += " " + word
 80 |         else:
 81 |             if current_chunk:  # Append the current chunk if not empty
 82 |                 result.append(current_chunk.strip())
 83 |             current_chunk = word
 84 |     if current_chunk:  # Append the last chunk if not empty
 85 |         result.append(current_chunk.strip())
 86 |     return result
 87 | 
 88 | 
 89 | # checking if the website that provides the service is available
 90 | def get_api_response() -> requests.Response:
 91 |     url = f'{ENDPOINTS[current_endpoint].split("/a")[0]}'
 92 |     response = requests.get(url)
 93 |     return response
 94 | 
 95 | 
 96 | # saving the audio file
 97 | def save_audio_file(base64_data: str, filename: str = "output.mp3") -> None:
 98 |     audio_bytes = base64.b64decode(base64_data)
 99 |     with open(filename, "wb") as file:
100 |         file.write(audio_bytes)
101 | 
102 | 
103 | # send POST request to get the audio data
104 | def generate_audio(text: str, voice: str) -> bytes:
105 |     url = f"{ENDPOINTS[current_endpoint]}"
106 |     headers = {"Content-Type": "application/json"}
107 |     data = {"text": text, "voice": voice}
108 |     response = requests.post(url, headers=headers, json=data)
109 |     return response.content
110 | 
111 | 
112 | # creates an text to speech audio file
113 | def tts(
114 |     text: str,
115 |     voice: str = "none",
116 |     filename: str = "output.mp3",
117 |     play_sound: bool = False,
118 | ) -> None:
119 |     # checking if the website is available
120 |     global current_endpoint
121 | 
122 |     if get_api_response().status_code == 200:
123 |         print("Service available!")
124 |     else:
125 |         current_endpoint = (current_endpoint + 1) % 2
126 |         if get_api_response().status_code == 200:
127 |             print("Service available!")
128 |         else:
129 |             print(
130 |                 f"Service not available and probably temporarily rate limited, try again later..."
131 |             )
132 |             return
133 | 
134 |     # checking if arguments are valid
135 |     if voice == "none":
136 |         print("No voice has been selected")
137 |         return
138 | 
139 |     if not voice in VOICES:
140 |         print("Voice does not exist")
141 |         return
142 | 
143 |     if len(text) == 0:
144 |         print("Insert a valid text")
145 |         return
146 | 
147 |     # creating the audio file
148 |     try:
149 |         if len(text) < TEXT_BYTE_LIMIT:
150 |             audio = generate_audio((text), voice)
151 |             if current_endpoint == 0:
152 |                 audio_base64_data = str(audio).split('"')[5]
153 |             else:
154 |                 audio_base64_data = str(audio).split('"')[3].split(",")[1]
155 | 
156 |             if audio_base64_data == "error":
157 |                 print("This voice is unavailable right now")
158 |                 return
159 | 
160 |         else:
161 |             # Split longer text into smaller parts
162 |             text_parts = split_string(text, 299)
163 |             audio_base64_data = [None] * len(text_parts)
164 | 
165 |             # Define a thread function to generate audio for each text part
166 |             def generate_audio_thread(text_part, index):
167 |                 audio = generate_audio(text_part, voice)
168 |                 if current_endpoint == 0:
169 |                     base64_data = str(audio).split('"')[5]
170 |                 else:
171 |                     base64_data = str(audio).split('"')[3].split(",")[1]
172 | 
173 |                 if audio_base64_data == "error":
174 |                     print("This voice is unavailable right now")
175 |                     return "error"
176 | 
177 |                 audio_base64_data[index] = base64_data
178 | 
179 |             threads = []
180 |             for index, text_part in enumerate(text_parts):
181 |                 # Create and start a new thread for each text part
182 |                 thread = threading.Thread(
183 |                     target=generate_audio_thread, args=(text_part, index)
184 |                 )
185 |                 thread.start()
186 |                 threads.append(thread)
187 | 
188 |             # Wait for all threads to complete
189 |             for thread in threads:
190 |                 thread.join()
191 | 
192 |             # Concatenate the base64 data in the correct order
193 |             audio_base64_data = "".join(audio_base64_data)
194 | 
195 |         save_audio_file(audio_base64_data, filename)
196 |         print(f"Audio file saved successfully as '{filename}'")
197 |         if play_sound:
198 |             playsound(filename)
199 | 
200 |     except Exception as e:
201 |         print("Error occurred while generating audio:", str(e))
202 | 


--------------------------------------------------------------------------------
/Backend/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from termcolor import colored
 4 | 
 5 | 
 6 | def clean_dir(path: str) -> None:
 7 |     """
 8 |     Removes every file in a directory
 9 | 
10 |     Args:
11 |         path (str): Path to directory
12 | 
13 |     Returns:
14 |         None
15 |     """
16 |     if not os.path.exists(path):
17 |         os.mkdir(path)
18 | 
19 |     for file in os.listdir(path):
20 |         os.remove(os.path.join(path, file))
21 | 
22 |     print(colored(f"[+] Cleaned {path} directory", "green"))
23 | 


--------------------------------------------------------------------------------
/Backend/video.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import uuid
  3 | import requests
  4 | import srt_equalizer
  5 | import assemblyai as aai
  6 | 
  7 | from typing import List
  8 | from moviepy.editor import *
  9 | from termcolor import colored
 10 | from dotenv import load_dotenv
 11 | from moviepy.video.fx.all import crop
 12 | from moviepy.video.tools.subtitles import SubtitlesClip
 13 | 
 14 | load_dotenv("../.env")
 15 | 
 16 | ASSEMBLY_AI_API_KEY = os.getenv("ASSEMBLY_AI_API_KEY")
 17 | 
 18 | def save_video(video_url: str, directory: str = "../temp") -> str:
 19 |     """
 20 |     Saves a video from a given URL and returns the path to the video.
 21 | 
 22 |     Args:
 23 |         video_url (str): The URL of the video to save.
 24 | 
 25 |     Returns:
 26 |         str: The path to the saved video.
 27 |     """
 28 |     video_id = uuid.uuid4()
 29 |     video_path = f"{directory}/{video_id}.mp4"
 30 |     with open(video_path, "wb") as f:
 31 |         f.write(requests.get(video_url).content)
 32 | 
 33 |     return video_path
 34 | 
 35 | def generate_subtitles(audio_path: str) -> str:
 36 |     """
 37 |     Generates subtitles from a given audio file and returns the path to the subtitles.
 38 | 
 39 |     Args:
 40 |         audio_path (str): The path to the audio file to generate subtitles from.
 41 | 
 42 |     Returns:
 43 |         str: The path to the generated subtitles.
 44 |     """
 45 |     def equalize_subtitles(srt_path: str, max_chars: int = 10) -> None:
 46 |       # Equalize subtitles
 47 |       srt_equalizer.equalize_srt_file(srt_path, srt_path, max_chars)
 48 | 
 49 |     aai.settings.api_key = ASSEMBLY_AI_API_KEY
 50 | 
 51 |     transcriber = aai.Transcriber()
 52 | 
 53 |     transcript = transcriber.transcribe(audio_path)
 54 | 
 55 |     # Save subtitles
 56 |     subtitles_path = f"../subtitles/{uuid.uuid4()}.srt"
 57 | 
 58 |     subtitles = transcript.export_subtitles_srt()
 59 | 
 60 |     with open(subtitles_path, "w") as f:
 61 |         f.write(subtitles)
 62 | 
 63 |     # Equalize subtitles
 64 |     equalize_subtitles(subtitles_path)
 65 | 
 66 |     print(colored("[+] Subtitles generated.", "green"))
 67 | 
 68 |     return subtitles_path
 69 | 
 70 | 
 71 | 
 72 | def combine_videos(video_paths: List[str], max_duration: int) -> str:
 73 |     """
 74 |     Combines a list of videos into one video and returns the path to the combined video.
 75 | 
 76 |     Args:
 77 |         video_paths (list): A list of paths to the videos to combine.
 78 |         max_duration (int): The maximum duration of the combined video.
 79 | 
 80 |     Returns:
 81 |         str: The path to the combined video.
 82 |     """
 83 |     video_id = uuid.uuid4()
 84 |     combined_video_path = f"../temp/{video_id}.mp4"
 85 | 
 86 |     print(colored("[+] Combining videos...", "blue"))
 87 |     print(colored(f"[+] Each video will be {max_duration / len(video_paths)} seconds long.", "blue"))
 88 | 
 89 |     clips = []
 90 |     for video_path in video_paths:
 91 |         clip = VideoFileClip(video_path)
 92 |         clip = clip.without_audio()
 93 |         clip = clip.subclip(0, max_duration / len(video_paths))
 94 |         clip = clip.set_fps(30)
 95 | 
 96 |         # Not all videos are same size,
 97 |         # so we need to resize them
 98 |         clip = crop(clip, width=1080, height=1920, \
 99 |                     x_center=clip.w / 2, \
100 |                         y_center=clip.h / 2)
101 |         clip = clip.resize((1080, 1920))
102 | 
103 |         clips.append(clip)
104 | 
105 |     final_clip = concatenate_videoclips(clips)
106 |     final_clip = final_clip.set_fps(30)
107 |     final_clip.write_videofile(combined_video_path, threads=3)
108 | 
109 |     return combined_video_path
110 | 
111 | def generate_video(combined_video_path: str, tts_path: str, subtitles_path: str) -> str:
112 |     """
113 |     This function creates the final video, with subtitles and audio.
114 | 
115 |     Args:
116 |         combined_video_path (str): The path to the combined video.
117 |         tts_path (str): The path to the text-to-speech audio.
118 |         subtitles_path (str): The path to the subtitles.
119 | 
120 |     Returns:
121 |         str: The path to the final video.
122 |     """
123 |     # Make a generator that returns a TextClip when called with consecutive
124 |     generator = lambda txt: TextClip(txt, font=f"../fonts/bold_font.ttf", fontsize=100, color="#FFFF00",
125 |     stroke_color="black", stroke_width=5)
126 | 
127 |     # Burn the subtitles into the video
128 |     subtitles = SubtitlesClip(subtitles_path, generator)
129 |     result = CompositeVideoClip([
130 |         VideoFileClip(combined_video_path),
131 |         subtitles.set_pos(("center", "center"))
132 |     ])
133 | 
134 |     # Add the audio
135 |     audio = AudioFileClip(tts_path)
136 |     result = result.set_audio(audio)
137 |     
138 |     # Create videos directory if it doesn't exist
139 |     if not os.path.exists("../Frontend/public/videos"):
140 |         os.makedirs("../Frontend/public/videos")
141 | 
142 |     filename = f"/public/videos/{uuid.uuid4()}.mp4"
143 |     result.write_videofile(f"../Frontend{filename}", threads=3)
144 | 
145 |     return filename
146 | 


--------------------------------------------------------------------------------
/Frontend/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <meta charset="UTF-8">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 |     <title>Text2Video</title>
  8 |     <link rel="icon"
  9 |         href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>🎥</text></svg>">
 10 | 
 11 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/tailwindcss/2.0.2/tailwind.min.css">
 12 | </head>
 13 | 
 14 | <body class="bg-blue-100 min-h-screen justify-center p-40">
 15 |     <h1 class="text-4xl text-center mb-4">Text2Video</h1>
 16 |     <p class="text-center text-gray-700">Generate a video from just a subject</p>
 17 | 
 18 |     <div class="flex justify-center mt-8">
 19 |         <div class="flex flex-col space-y-4">
 20 |             <label for="videoSubject" class="text-blue-600">Subject</label>
 21 |             <input type="text" name="videoSubject" id="videoSubject"
 22 |                 class="border-2 border-blue-300 p-2 rounded-md focus:outline-none focus:border-blue-500">
 23 |             <select 
 24 |                 title="Choose a voice"
 25 |                 name="voice" id="voice"
 26 |                 class="border-2 border-blue-300 p-2 rounded-md focus:outline-none focus:border-blue-500">
 27 |                 <option value="Paddington">Paddington</option>
 28 |                 <option value="DanDan">DanDan</option>
 29 |                 <option value="Sally">Sally</option>
 30 |                 <option value="Aaryan">Aaryan</option>
 31 |                 <option value="Eleguar">Eleguar</option>
 32 |                 <option value="Readwell">Readwell</option>
 33 |                 <option value="Knightley">Knightley</option>
 34 |             </select>
 35 |             <button id="generateButton"
 36 |                 class="bg-blue-500 hover:bg-blue-700 duration-100 linear text-white px-4 py-2 rounded-md">Generate</button>
 37 | 
 38 |             <p class="video-output text-gray-700">Please wait for the video to be generated</p>
 39 |         </div>
 40 |     </div>
 41 | 
 42 |     <script>
 43 |         const generateButton = document.querySelector('#generateButton')
 44 | 
 45 |         generateButton.addEventListener('click', () => {
 46 |             // Disable button and change text
 47 |             generateButton.disabled = true
 48 |             generateButton.innerHTML = "Generating..."
 49 |             generateButton.classList.add('cursor-not-allowed')
 50 |             generateButton.classList.remove('hover:bg-blue-700')
 51 |             generateButton.classList.remove('bg-blue-500')
 52 |             generateButton.classList.add('bg-blue-300')
 53 | 
 54 |             // Get values from input fields
 55 |             const videoSubject = document.querySelector('#videoSubject').value
 56 |             const voice = document.querySelector('#voice').value
 57 | 
 58 |             const url = "http://localhost:8080/api/generate"
 59 | 
 60 |             // Construct data to be sent to server
 61 |             const data = {
 62 |                 videoSubject: videoSubject,
 63 |                 voice: voice
 64 |             }
 65 | 
 66 |             // Send the actual request to the server
 67 |             fetch(url, {
 68 |                 method: 'POST',
 69 |                 body: JSON.stringify(data),
 70 |                 headers: {
 71 |                     'Content-Type': 'application/json',
 72 |                     'Accept': 'application/json'
 73 |                 }
 74 |             }).then(response => response.json())
 75 |                 .then(data => {
 76 |                     console.log(data)
 77 |                     generateButton.disabled = false
 78 |                     generateButton.innerHTML = "Generate"
 79 |                     generateButton.classList.remove('cursor-not-allowed')
 80 |                     generateButton.classList.add('hover:bg-blue-700')
 81 |                     generateButton.classList.add('bg-blue-500')
 82 |                     generateButton.classList.remove('bg-blue-300')
 83 | 
 84 |                     // Display the video if data.status is success and not error
 85 |                     if (data.status === "error") {
 86 |                         alert(data.message)
 87 |                         return
 88 |                     } 
 89 | 
 90 |                     // Download the video
 91 |                     const a = document.createElement('a')
 92 |                     a.href = data.videoUrl
 93 |                     a.download = 'video.mp4'
 94 |                     a.click()
 95 | 
 96 |                     // Add link to the video
 97 |                     const videoLink = document.createElement('a')
 98 |                     videoLink.href = data.videoUrl
 99 |                     videoLink.innerHTML = "Download video"
100 |                     videoLink.classList.add('text-blue-600')
101 |                     document.querySelector('.video-output').innerHTML = "Video generated successfully. "
102 |                     document.querySelector('.video-output').appendChild(videoLink)
103 |                 })
104 |                 .catch(error => {
105 |                     console.log(error)
106 |                 })
107 |         });
108 |     </script>
109 | </body>
110 | 
111 | </html>


--------------------------------------------------------------------------------
/Frontend/public/videos/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/Frontend/public/videos/.gitkeep


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 FujiwaraChoki
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Text2Video
 2 | 
 3 | Create YouTube Shorts without any effort, simply by providing a video topic to talk about.
 4 | 
 5 | ## Installation
 6 | 
 7 | ```bash
 8 | cd MoneyPrinter/Backend
 9 | pip install -r requirements.txt
10 | 
11 | # Run the backend server
12 | python3 main.py
13 | 
14 | # Run the frontend server
15 | cd ../Frontend
16 | python3 -m http.server 3000
17 | ```
18 | 
19 | ## Usage
20 | 
21 | 1. In `.env` fill in the required values
22 | 1. Open `http://localhost:3000` in your browser
23 | 1. Enter a topic to talk about
24 | 1. Choose a voice ID
25 | 1. Click on the "Generate" button
26 | 1. Wait for the video to be generated
27 | 1. The video's location is `temp/output.mp4`
28 | 
29 | ## Fonts
30 | 
31 | Add your fonts to the `fonts/` folder, and load them by specifiying the font name on line `124` in `Backend/video.py`.
32 | 
33 | ## Contributing
34 | 
35 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
36 | 
37 | ## License
38 | 
39 | See [`LICENSE`](LICENSE) file for more information.
40 | 


--------------------------------------------------------------------------------
/fonts/bold_font.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0Shark/text2video/95ed8f3ece86cf7d5dbc26a89bc65ccccea25662/fonts/bold_font.ttf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | g4f
 2 | setuptools
 3 | wheel
 4 | requests
 5 | moviepy
 6 | termcolor
 7 | flask
 8 | flask-cors
 9 | playsound
10 | Pillow==9.4.0
11 | srt_equalizer
12 | assemblyai
13 | python-dotenv
14 | elevenlabs


--------------------------------------------------------------------------------