├── .python-version
├── resources
    ├── font
    │   └── font.ttf
    └── Intro
    │   └── intro.jpg
├── diffusion
    └── scripts
    │   ├── Modelfile
    │   ├── generate_image_local.py
    │   ├── generate_image.py
    │   └── generate_script.py
├── assembly
    ├── templates
    │   └── video_template.json
    └── scripts
    │   └── assembly_video.py
├── pyproject.toml
├── tts
    └── scripts
    │   └── generate_audio.py
├── main.py
├── main_local.py
└── README.md


/.python-version:
--------------------------------------------------------------------------------
1 | 3.11
2 | 


--------------------------------------------------------------------------------
/resources/font/font.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLSAKIIT/ForgeTube/HEAD/resources/font/font.ttf


--------------------------------------------------------------------------------
/resources/Intro/intro.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLSAKIIT/ForgeTube/HEAD/resources/Intro/intro.jpg


--------------------------------------------------------------------------------
/diffusion/scripts/Modelfile:
--------------------------------------------------------------------------------
1 |  FROM llama3.1
2 |  
3 | 
4 |  PARAMETER temperature 1
5 | 
6 |  SYSTEM """ You are a youtube script writer and content creator your task is to create youtube scripts and to segment it with various parameters in json format"""


--------------------------------------------------------------------------------
/assembly/templates/video_template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "default_video_template",
 3 |     "description": "Simple structure for video editing workflow",
 4 |     "video_settings": {
 5 |       "resolution": "1920x1080",
 6 |       "frame_rate": 30
 7 |     },
 8 |     "audio_settings": {
 9 |       "sample_rate": 44100,
10 |       "channels": 2
11 |     },
12 |     "transitions": []
13 |   }
14 |   


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "ForgeTube"
 3 | version = "0.1.0"
 4 | description = "Add your description here"
 5 | readme = "README.md"
 6 | requires-python = ">=3.11"
 7 | dependencies = [
 8 |     "accelerate>=1.4.0",
 9 |     "diffusers>=0.32.2",
10 |     "google-generativeai>=0.8.4",
11 |     "google-search-results>=2.4.2",
12 |     "kokoro>=0.7.16",
13 |     "modal>=0.73.59",
14 |     "moviepy>=2.1.2",
15 |     "pydub>=0.25.1",
16 |     "pysrt>=1.1.2",
17 |     "soundfile>=0.13.1",
18 |     "spacy>=3.8.4",
19 | ]
20 | 


--------------------------------------------------------------------------------
/tts/scripts/generate_audio.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from pydub import AudioSegment
 3 | import json
 4 | import io
 5 | import soundfile as sf
 6 | import os
 7 | from kokoro.pipeline import KPipeline
 8 | 
 9 | def generate_audio(script_data):
10 |     pipeline = KPipeline(lang_code="b")
11 |         
12 |     all_audio = []
13 |     for segment in script_data["audio_script"]:
14 |         speaker_id = "am_adam" if segment["speaker"] in ["default", "narrator_male"] else "af_heart"
15 |         audio = pipeline(text=segment["text"], voice=speaker_id, speed=segment["speed"])
16 |         
17 |         # Collect audio chunks
18 |         buffer = io.BytesIO()
19 |         for _, _, chunk in audio:
20 |             sf.write(buffer, chunk, 24000, format='WAV')
21 |         buffer.seek(0)
22 |         all_audio.append(buffer.read())
23 |     
24 |     return all_audio
25 | 
26 | def merge_audio(audio_path,audio_bytes_list):
27 |     # Create output directory
28 |     # os.makedirs("output_audio", exist_ok=True)
29 |     
30 |     # Save segments locally
31 |     audio_files = []
32 |     for idx, audio_bytes in enumerate(audio_bytes_list):
33 |         output_path = f"{audio_path}/segment_{idx}.wav"
34 |         with open(output_path, "wb") as f:
35 |             f.write(audio_bytes)
36 |         audio_files.append(output_path)
37 |         print(f"Audio file: {idx} successfully saved at : {output_path}")
38 |     
39 |     # Merge audio files (not really needed)
40 |     # master_audio = AudioSegment.empty()
41 |     # for file in audio_files:
42 |     #     master_audio += AudioSegment.from_wav(file)
43 |     
44 |     # Export final file
45 |     # master_output_path = f"{audio_path}/master_output.wav"
46 |     # master_audio.export(master_output_path, format="wav")
47 |     # return master_output_path
48 | 
49 | def main_generate_audio(script_path,audio_path):
50 |     # Load script data
51 |     with open(script_path) as f:
52 |         script_data = json.load(f)
53 |     
54 |     # Generate audio
55 |     audio_bytes_list = generate_audio(script_data)
56 |     
57 |     # Merge and save final audio
58 |     final_path = merge_audio(audio_path,audio_bytes_list)
59 |     
60 |     print(f"Audio generation complete! Saved as {final_path}")
61 | 
62 | # if __name__ == "__main__":
63 | #     main_generate_audio(script_path="resources/scripts/script.json",audio_path="resources/audio")


--------------------------------------------------------------------------------
/diffusion/scripts/generate_image_local.py:
--------------------------------------------------------------------------------
 1 | # import modal
 2 | import json
 3 | import os
 4 | import time
 5 | from io import BytesIO
 6 | 
 7 | 
 8 | def generate_image(prompt, negative_prompt="", steps=50, guidance_scale=9, width=1920, height=1080, seed=None):
 9 |     import torch
10 |     from diffusers import DiffusionPipeline
11 | 
12 | 
13 | # LOADS THE DIFFUSION PIPELINE
14 | 
15 |     pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", 
16 |                                             torch_dtype=torch.float16,
17 |                                             use_safetensors=True, 
18 |                                             variant="fp16")
19 |     
20 |     pipe.to("cuda" if torch.cuda.is_available() else "cpu")
21 | 
22 |     generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed) if seed else None
23 | 
24 |     image = pipe(
25 |         prompt,
26 |         negative_prompt=negative_prompt,
27 |         num_inference_steps=steps,
28 |         guidance_scale=guidance_scale,
29 |         width=width,
30 |         height=height,
31 |         generator=generator
32 |     ).images[0]
33 | 
34 |     img_byte_arr = BytesIO()
35 |     image.save(img_byte_arr, format="PNG")
36 |     img_byte_arr.seek(0)
37 | 
38 |     return img_byte_arr.getvalue()
39 | 
40 | # PROVIDE SOURCE TEXT OR PROMPT IN JSON FILE
41 | 
42 | def main_generate_image(script_path,images_output_path):
43 |     # JSON Decoding Error Handling
44 |     with open(script_path, "r", encoding="utf-8") as file:
45 |         try:
46 |             data = json.load(file)
47 |         except json.JSONDecodeError:
48 |             print("Error reading JSON file.")
49 |             return
50 |     # JSON Key Error Handling
51 |     if "visual_script" not in data:
52 |         print("Missing key in JSON.")
53 |         return
54 |     
55 | 
56 | # GENERATING THE IMAGES 
57 |     
58 |     # Looping Through the Scenes
59 |     for idx, scene in enumerate(data["visual_script"]):
60 |         try:
61 |             prompt = scene["prompt"]
62 |             timestamp = scene.get("timestamp", f"{idx:03d}")
63 |             negative_prompt = scene.get("negative_prompt", "")
64 |             steps = scene.get("steps", 50)
65 |             # guidance_scale = scene.get("guidance_scale", 12)
66 |             guidance_scale = 9 # Set to 9 to allow for some room of filling missing elements.
67 |             width = 1920
68 |             height = 1080
69 |             seed = scene.get("seed", None)
70 | 
71 |             scene_id = timestamp.replace(":", "-")
72 | 
73 |             image_data = generate_image(prompt, negative_prompt, steps, guidance_scale, width, height, seed)
74 | 
75 | 
76 |     # SAVING THE IMAGES IN THE OUTPUT DIRECTORY
77 | 
78 |             file_path = os.path.join(images_output_path, f"scene_{scene_id}.png")
79 |             with open(file_path, "wb") as f:
80 |                 f.write(image_data)
81 | 
82 |             print(f"Saved: {file_path}")
83 | 
84 |             time.sleep(2)
85 | 
86 |         except Exception as e:
87 |             print(f"Error processing scene {idx}: {e}")
88 | 
89 |         print("Image Generation is Done.")
90 | 
91 | # if __name__ == "__main__":
92 | #     main_generate_image(script_path=script_path,images_output_path=images_output_path)
93 | 


--------------------------------------------------------------------------------
/diffusion/scripts/generate_image.py:
--------------------------------------------------------------------------------
  1 | import modal
  2 | import json
  3 | import os
  4 | import time
  5 | from io import BytesIO
  6 | 
  7 | image = modal.Image.debian_slim().pip_install(
  8 |     "diffusers",
  9 |     "torch",
 10 |     "transformers",
 11 |     "accelerate"
 12 | )
 13 | 
 14 | app = modal.App(name="ForgeTube_app")
 15 | 
 16 | @app.function(image=image, gpu="A10G")
 17 | def generate_image(prompt, negative_prompt="", steps=50, guidance_scale=9, width=1920, height=1080, seed=None):
 18 |     import torch
 19 |     from diffusers import DiffusionPipeline
 20 | 
 21 | 
 22 | # LOADS THE DIFFUSION PIPELINE
 23 | 
 24 |     pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", 
 25 |                                             torch_dtype=torch.float16,
 26 |                                             use_safetensors=True, 
 27 |                                             variant="fp16")
 28 |     pipe.to("cuda")
 29 | 
 30 |     generator = torch.Generator(device="cuda").manual_seed(seed) if seed else None
 31 | 
 32 |     image = pipe(
 33 |         prompt,
 34 |         negative_prompt=negative_prompt,
 35 |         num_inference_steps=steps,
 36 |         guidance_scale=guidance_scale,
 37 |         width=width,
 38 |         height=height,
 39 |         generator=generator
 40 |     ).images[0]
 41 | 
 42 |     img_byte_arr = BytesIO()
 43 |     image.save(img_byte_arr, format="PNG")
 44 |     img_byte_arr.seek(0)
 45 | 
 46 |     return img_byte_arr.getvalue()
 47 | 
 48 | 
 49 | # PATH TO JSON FILE
 50 | 
 51 | script_path = "resources/scripts/script.json"
 52 | images_output_path = "resources/images/"
 53 | # os.makedirs(output_path, exist_ok=True)
 54 | 
 55 | 
 56 | # PROVIDE SOURCE TEXT OR PROMPT IN JSON FILE
 57 | 
 58 | def main_generate_image(script_path,images_output_path):
 59 |     # JSON Decoding Error Handling
 60 |     with open(script_path, "r", encoding="utf-8") as file:
 61 |         try:
 62 |             data = json.load(file)
 63 |         except json.JSONDecodeError:
 64 |             print("Error reading JSON file.")
 65 |             return
 66 |     # JSON Key Error Handling
 67 |     if "visual_script" not in data:
 68 |         print("Missing key in JSON.")
 69 |         return
 70 |     
 71 | 
 72 | # GENERATING THE IMAGES 
 73 |     with modal.enable_output():
 74 |         with app.run():
 75 |             # Looping Through the Scenes
 76 |             for idx, scene in enumerate(data["visual_script"]):
 77 |                 try:
 78 |                     prompt = scene["prompt"]
 79 |                     timestamp = scene.get("timestamp", f"{idx:03d}")
 80 |                     negative_prompt = scene.get("negative_prompt", "")
 81 |                     steps = scene.get("steps", 50)
 82 |                     # guidance_scale = scene.get("guidance_scale", 12)
 83 |                     guidance_scale = 9
 84 | 
 85 |                     # width = scene.get("width", 1024)
 86 |                     width = 1920
 87 |                     # height = scene.get("height", 576)
 88 |                     height = 1080
 89 |                     seed = scene.get("seed", None)
 90 | 
 91 |                     scene_id = timestamp.replace(":", "-")
 92 | 
 93 |                     image_data = generate_image.remote(prompt, negative_prompt, steps, guidance_scale, width, height, seed)
 94 | 
 95 | 
 96 |     # SAVING THE IMAGES IN THE OUTPUT DIRECTORY
 97 | 
 98 |                     file_path = os.path.join(images_output_path, f"scene_{scene_id}.png")
 99 |                     with open(file_path, "wb") as f:
100 |                         f.write(image_data)
101 | 
102 |                     print(f"Saved: {file_path}")
103 | 
104 |                     time.sleep(2)
105 | 
106 |                 except Exception as e:
107 |                     print(f"Error processing scene {idx}: {e}")
108 | 
109 |     print("Done.")
110 | 
111 | # if __name__ == "__main__":
112 | #     main_generate_image(script_path=script_path,images_output_path=images_output_path)
113 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from diffusion.scripts.generate_script import VideoScriptGenerator
  2 | import json
  3 | from diffusion.scripts.generate_image import main_generate_image
  4 | from tts.scripts.generate_audio import main_generate_audio
  5 | from assembly.scripts.assembly_video import create_video,create_complete_srt,extract_topic_from_json
  6 | import os
  7 | '''
  8 | TODO: 1. Make a main.py where all pipelines are invoked at once.
  9 | TODO: 2. Take the prompt for the video as user input. 
 10 | TODO: 3. Run Tests with various different prompts. 
 11 | TODO: 4. All gpu related tasks must be performed on modal. Works
 12 | '''
 13 | if __name__ == "__main__":
 14 |     # Update folder paths as needed.
 15 |     script_path = "resources/scripts/" # creates the folders if not made already
 16 |     images_path = "resources/images/"
 17 |     audio_path = "resources/audio/"
 18 |     font_path = "resources/font/font.ttf"
 19 |     
 20 |     def create_or_check_folder(folder_path):
 21 |         """
 22 |         Creates a folder if it doesn't exist.
 23 |         If folder exists, checks for files and raises FileExistsError if any are found.
 24 |         
 25 |         Args:
 26 |             folder_path (str): Path to the folder
 27 |         
 28 |         Raises:
 29 |             FileExistsError: If folder exists and contains files
 30 |         """
 31 |         # If folder doesn't exist, create it
 32 |         if not os.path.exists(folder_path):
 33 |             os.makedirs(folder_path)
 34 |             print(f"Created Folder: {folder_path}")
 35 |         else:
 36 |             # Check if folder has any contents
 37 |             if any(os.listdir(folder_path)):
 38 |                 raise FileExistsError(f"Folder : '{folder_path}' already exists and contains files. Please remove them or make a new folder")
 39 |             # print(f"folder '{folder_path}' exists but is empty")
 40 |     
 41 |     create_or_check_folder(images_path)
 42 |     create_or_check_folder(audio_path)
 43 |     os.makedirs(script_path,exist_ok=True)
 44 |     script_path += "script.json" # Name of the script file
 45 |     # 1. Generate the Script
 46 |     gem_api = "Enter your Gemini API key here"
 47 |     serp_api = "Enter your Serp API key here"
 48 |     if (not gem_api) or (not serp_api):
 49 |         raise ValueError("API Key not provided !\n Please Create your api key at : \n Serp APi : https://serpapi.com \n Gemini API : https://aistudio.google.com/apikey")
 50 |     generator = VideoScriptGenerator(api_key=gem_api,serp_api_key=serp_api)    
 51 |     try:
 52 |         topic = input("Enter the topic of the video : "),
 53 |         duration=int(input("Enter the video duration in seconds : "))
 54 |         input_string = input("Enter a list of key points separated by commas : ")
 55 |         key_points = input_string.split(",") 
 56 |         key_points = [word.strip() for word in key_points]
 57 |         print("Starting Script Generation ... ")
 58 |         script = generator.generate_script(
 59 |             topic,duration,key_points
 60 |         )
 61 |         print("Initial Script: ")
 62 |         print(json.dumps(script, indent=2))
 63 |         
 64 |         feedback = input("Please provide feedback on the script (or type 'no' to skip refinement): ")
 65 |         if feedback.lower() != "no":
 66 |             refined_script = generator.refine_script(script, feedback)
 67 |             print("\nRefined Script:")
 68 |             print(json.dumps(refined_script, indent=2))
 69 |             generator.save_script(refined_script, script_path)
 70 |         else:
 71 |             generator.save_script(script, script_path)
 72 |             print("Script Generation Done.")
 73 |     except Exception as e:
 74 |         print(f"Script generation failed: {str(e)}")
 75 |         
 76 |     # 2. Generate the images
 77 |     print("Staring Image Generation ...")
 78 |     main_generate_image(script_path,images_path)
 79 |     print("Image Generation Done.")
 80 |     
 81 |     # 3. Generate the audio 
 82 |     print("Starting Audio Generation ...")
 83 |     main_generate_audio(script_path,audio_path)
 84 |     print("Audio Generation Done.")
 85 |     # # Video Assembly
 86 |     topic = extract_topic_from_json(script_path)
 87 | 
 88 |     import re
 89 |     topic = re.sub(r"[^A-Za-z0-9\s]+", " ",topic)
 90 |     topic = re.sub(r"\s+", "_", topic)
 91 |     topic = topic[:100]  # Take only first 100 characters
 92 |     os.makedirs("resources/video",exist_ok=True)
 93 |     os.makedirs("resources/subtitles",exist_ok=True)
 94 |     sub_output_file = f"resources/subtitles/{topic}.srt"
 95 |     video_file = f"resources/video/{topic}.mp4"
 96 |     
 97 |     # 5. Create subtitles in a .srt file
 98 |     print("Creating .srt subtitle file ...")
 99 |     create_complete_srt(script_folder = script_path,
100 |                         audio_file_folder = audio_path,
101 |                         outfile_path = sub_output_file,
102 |                         chunk_size = 10)
103 |     # 6. Start Video Assembly
104 |     create_video(images_path, audio_path, script_path, font_path, video_file, with_subtitles=True)
105 | 


--------------------------------------------------------------------------------
/main_local.py:
--------------------------------------------------------------------------------
  1 | from diffusion.scripts.generate_script import VideoScriptGenerator
  2 | import json
  3 | from diffusion.scripts.generate_image_local import main_generate_image
  4 | from tts.scripts.generate_audio import main_generate_audio
  5 | from assembly.scripts.assembly_video import create_video,create_complete_srt,extract_topic_from_json
  6 | import os
  7 | '''
  8 | TODO: 1. Make a main.py where all pipelines are invoked at once.
  9 | TODO: 2. Take the prompt for the video as user input. 
 10 | TODO: 3. Run Tests with various different prompts. 
 11 | TODO: 4. All gpu related tasks must be performed on modal. Works
 12 | '''
 13 | if __name__ == "__main__":
 14 |     script_path = "resources/scripts/" # creates the folders if not made already
 15 |     images_path = "resources/images/"
 16 |     audio_path = "resources/audio/"
 17 |     font_path = "resources/font/font.ttf"
 18 |     
 19 |     def create_or_check_folder(folder_path):
 20 |         """
 21 |         Creates a folder if it doesn't exist.
 22 |         If folder exists, checks for files and raises FileExistsError if any are found.
 23 |         
 24 |         Args:
 25 |             folder_path (str): Path to the folder
 26 |         
 27 |         Raises:
 28 |             FileExistsError: If folder exists and contains files
 29 |         """
 30 |         # If folder doesn't exist, create it
 31 |         if not os.path.exists(folder_path):
 32 |             os.makedirs(folder_path)
 33 |             print(f"Created Folder: {folder_path}")
 34 |         else:
 35 |             # Check if folder has any contents
 36 |             if any(os.listdir(folder_path)):
 37 |                 raise FileExistsError(f"Folder : '{folder_path}' already exists and contains files. Please remove them or make a new folder")
 38 |             # print(f"folder '{folder_path}' exists but is empty")
 39 |     
 40 |     create_or_check_folder(images_path)
 41 |     create_or_check_folder(audio_path)
 42 |     os.makedirs(script_path,exist_ok=True)
 43 |     script_path += "script.json" # Name of the script file
 44 |     # 1. Generate the Script
 45 |     gem_api = "Enter your Gemini API Key here"
 46 |     serp_api = "Enter your Serp API key here"
 47 |     if (not gem_api) or (not serp_api):
 48 |         raise ValueError("API Key not provided !\n Please Create your api key at : \n Serp APi : https://serpapi.com \n Gemini API : https://aistudio.google.com/apikey")
 49 |     generator = VideoScriptGenerator(api_key=gem_api,serp_api_key=serp_api)
 50 |     
 51 |     try:
 52 |         topic = input("Enter the topic of the video : "),
 53 |         duration=int(input("Enter the video duration in seconds : "))
 54 |         input_string = input("Enter a list of key points separated by commas : ")
 55 |         key_points = input_string.split(",") 
 56 |         key_points = [word.strip() for word in key_points]
 57 |         print("Starting Script Generation ... ")
 58 |         script = generator.generate_script(
 59 |             # topic="Neural Networks in Medical Imaging",
 60 |             # duration=90,
 61 |             # key_points=["Diagnosis accuracy", "Pattern recognition", "Case studies"]
 62 |             topic,duration,key_points
 63 |         )
 64 |         print("Initial Script: ")
 65 |         print(json.dumps(script, indent=2))
 66 |         
 67 |         feedback = input("Please provide feedback on the script (or type 'no' to skip refinement): ")
 68 |         if feedback.lower() != "no":
 69 |             refined_script = generator.refine_script(script, feedback)
 70 |             print("\nRefined Script:")
 71 |             print(json.dumps(refined_script, indent=2))
 72 |             generator.save_script(refined_script, script_path)
 73 |         else:
 74 |             generator.save_script(script, script_path)
 75 |             print("Script Generation Done.")
 76 |     except Exception as e:
 77 |         print(f"Script generation failed: {str(e)}")
 78 |         
 79 |     # 2. Generate the images
 80 |     print("Staring Image Generation ...")
 81 |     main_generate_image(script_path,images_path)
 82 |     print("Image Generation Done.")
 83 |     
 84 |     # 3. Generate the audio 
 85 |     print("Starting Audio Generation ...")
 86 |     main_generate_audio(script_path,audio_path)
 87 |     print("Audio Generation Done.")
 88 |     # Video Assembly
 89 |     topic = extract_topic_from_json(script_path)
 90 |     import re
 91 |     topic = re.sub(r"[^A-Za-z0-9\s]+", " ",topic)
 92 |     topic = re.sub(r"\s+", "_", topic)
 93 |     topic = topic[:100]  # Take only first 100 characters
 94 |     
 95 |     os.makedirs("resources/video",exist_ok=True)
 96 |     os.makedirs("resources/subtitles",exist_ok=True)
 97 |     sub_output_file = f"{topic}.srt"
 98 |     video_file = f"{topic}.mp4"
 99 |     
100 |     # 5. Create subtitles in a .srt file
101 |     print("Creating .srt subtitle file ...")
102 |     create_complete_srt(script_folder = script_path,
103 |                         audio_file_folder = audio_path,
104 |                         outfile_path = sub_output_file,
105 |                         chunk_size = 10)
106 |     
107 |     # 6. Start Video Assembly
108 |     print("Starting video assembly ...")
109 |     create_video(images_path, audio_path, script_path, font_path, video_file, with_subtitles=True)
110 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <a>
  2 |   <h1 align="center"> MLSA Project Wing: ML </h1>
  3 | </a>
  4 | <p align="center"> <img src="https://avatars.githubusercontent.com/u/79008924?s=280&v=4">
  5 | </p>
  6 | 
  7 | <a>
  8 |   <h1 align="center"> ForgeTube </h1>
  9 | </a>
 10 | 
 11 | [![GitHub](https://img.shields.io/badge/GitHub-MLSAKIIT-181717?style=for-the-badge&logo=github)](https://github.com/MLSAKIIT)
 12 | [![ForgeTube](https://img.shields.io/badge/ForgeTube-Repository-181717?style=for-the-badge&logo=github)](https://github.com/MLSAKIIT/ForgeTube)
 13 | [![YouTube](https://img.shields.io/badge/YouTube-ForgeTube-FF0000?style=for-the-badge&logo=youtube&logoColor=white)](https://www.youtube.com/channel/UCVgzYqxxY6wCIto-Nzx68Uw)
 14 | [![X](https://img.shields.io/badge/X-mlsakiit-1DA1F2?style=for-the-badge&logo=X&logoColor=white)](https://x.com/mlsakiit)
 15 | [![Instagram](https://img.shields.io/badge/Instagram-mlsakiit-E4405F?style=for-the-badge&logo=instagram)](https://www.instagram.com/mlsakiit/)
 16 | [![Discord](https://img.shields.io/badge/Discord-Join%20Us-5865F2?style=for-the-badge&logo=discord)](https://discord.com/invite/P6VCP2Ry3q)
 17 | 
 18 | 
 19 | ## 🚧Our Project:
 20 | Our project focuses on creating an automated video generation system using AI. It transforms text prompts into fully narrated videos by leveraging **large language models** for script generation, **diffusion models** for image creation, and **text to speech systems** for narration. The system processes inputs through multiple stages, from script generation to final video assembly, creating cohesive, engaging content automatically.
 21 | 
 22 | The video generator, designed for sequential content creation, dynamically adapts to different styles and tones while maintaining consistency across visual and audio elements. It also has the ability to add **subtiles** either embedded or through the use of an **srt** file.
 23 | 
 24 | This project demonstrates the potential of combining multiple AI technologies to create an end-to-end content generation pipeline.
 25 | 
 26 | ## 🖥️Project Stack:
 27 |    `Python 3.11`: Core programming language for the project.
 28 | 
 29 | - **Content Generation:**
 30 |    
 31 |    `Gemini API`: To generate the script using `Gemini 2.0 Flash Thinking` model and store it in a `JSON` format with proper audio and visual prompts and respective parameters.
 32 |    
 33 |    `Stable Diffusion XL Base 1.0`: For image generation using diffusion models to run either `locally` or hosted on `Modal`.
 34 | 
 35 |    `Kokoro`: An open weight tts model to convert audio prompts into audio.
 36 | 
 37 | - **Video Processing**
 38 |     `MoviePy` : For adding text, intro, outro, transition effects, subtitles, audio processing, video processing and Final_Assembly by using  `FFmpeg` under the hood.
 39 | 
 40 | - **ML Frameworks:**
 41 |     
 42 |     `PyTorch`: Deep learning framework for model inferencing.
 43 | 
 44 |     `Diffusers with SDXL Base 1.0` : Utilize Hugging Face's Diffusers to generate stunning images using the SDXL Base 1.0 model. Enhance your creative projects with state-of-the-art diffusion techniques.
 45 | 
 46 | - **Development Tools:**
 47 |     
 48 |     `Jupyter Notebooks`: For development and testing.
 49 | 
 50 |     `Google Colab` : For free cloud GPU infrastructure for development and Testing.
 51 |     
 52 |     `Git`: For version control
 53 | 
 54 |     `Modal` : For low cost high performance cloud GPU infrastructure.
 55 | 
 56 | - **Package Management:**
 57 | 
 58 |     `UV`: For fast and efficient dependency management and project setup
 59 | 
 60 | ## Features
 61 | 
 62 | - **Multi-Modal Content Generation**: Seamlessly combines text, image, and audio generation
 63 | - **Style Customization**: Supports different content styles and tones
 64 | - **Modular Architecture**: Each component can be tested and improved independently
 65 | - **Content Segmentation**: Automatically breaks down content into manageable segments
 66 | - **Custom Voice Options**: Multiple TTS voices and emotional tones
 67 | - **Format Flexibility**: Supports different video durations and formats (.mp4 and .mkv)
 68 | - **Performance Metrics**: Tracks generation quality and consistency
 69 | - **Error Handling**: Robust error management across the pipeline
 70 | - **Resource Optimization**: Efficient resource usage during generation
 71 | 
 72 | 
 73 | ## Steps for deployment :
 74 | Clone the repo on your system, using : `git clone https://github.com/MLSAKIIT/ForgeTube.git`
 75 | ### 1. Using UV for Python Package Management
 76 | 
 77 | For more information, visit the [UV Documentation](https://docs.astral.sh/uv/).
 78 | 
 79 | UV is a modern, high-performance Python package and project manager designed to streamline the development process. 
 80 | 
 81 | Here’s how you can use UV in this project:
 82 | 
 83 | 1. Install `uv`.
 84 | 
 85 | ```bash
 86 | pip install uv
 87 | ```
 88 | 2. Download `Python 3.11`
 89 | ```bash
 90 | uv python install 3.11
 91 | ```
 92 | 3. Create a virtual environment 
 93 | ```bash
 94 | uv venv .venv
 95 | ```
 96 | 4. Activate your virtual environment
 97 | ```bash
 98 | .venv\scripts\activate.ps1
 99 | ```
100 | 5. Install all dependencies 
101 | ```bash
102 | uv sync
103 | ```
104 | ### 2. Setting up Modal
105 | For more information visit the [Modal documentation](https://modal.com/docs/guide).
106 | 
107 | Modal is a cloud function platform that lets you Attach high performance GPUs with a single line of code.
108 | 
109 | The nicest thing about all of this is that you don’t have to set up any infrastructure. Just:
110 | 
111 | 1. Create an account at [modal.com](modal.com)
112 | 2. Run `pip install modal` to install the modal Python package
113 | 3. Run `modal setup` to authenticate (if this doesn’t work, try `python -m modal setup`)
114 | 
115 | ### 3. Get your Gemini-API Key :
116 | To obtain a Gemini API key from Google AI Studio, follow these detailed steps:
117 | 
118 | **Step 1: Sign In to Google AI Studio**
119 | 
120 | Navigate to [Google AI Studio](https://aistudio.google.com/). Once
121 |  signed in, locate and click on the "Gemini API" tab. This can typically be found in the main navigation menu or directly on the dashboard. On the Gemini API page, look for a button labeled "Get API key in Google AI Studio" and click on it.
122 | 
123 | **Step 2: Review and Accept Terms of Service**
124 | 
125 | 1. **Review Terms**: A dialog box will appear presenting the Google APIs Terms of Service and the Gemini API Additional Terms of Service. It's essential to read and understand these terms before proceeding.
126 | 2. **Provide Consent**: Check the box indicating your agreement to the terms. Optionally, you can also opt-in to receive updates and participate in research studies related to Google AI.
127 | 3. **Proceed**: Click the "Continue" button to move forward.
128 | 
129 | **Step 3: Create and Secure Your API Key**
130 | 
131 | 1. **Generate API Key**: Click on the "Create API key" button. You'll be prompted to choose between creating a new project or selecting an existing one. Make your selection accordingly.
132 | 2. **Retrieve the Key**: Once generated, your unique API key will be displayed. Ensure you copy and store it in a secure location.
133 | 
134 | **Step 4: Add your Key in `main.py` or `local_main.py`**
135 | ```python
136 | # 1. Generate the Script
137 | gem_api = "Enter your Gemini API Key here"
138 | serp_api = "Enter your Serp API key here"
139 | ```
140 | 
141 | > [!IMPORTANT]  
142 | > Always keep your API key confidential. Avoid sharing it publicly or embedding it directly into client-side code to prevent unauthorized access.
143 | 
144 | ### 4. Setting up Serp-Api
145 | Serp is used for web scraping google search results on the video topic and gathering additional context to implement Retrieval Augmented Generation (RAG)
146 | 1. Visit [serpapi.com/](https://serpapi.com/) and create an account.
147 | 2. Go to the [dashboard](https://serpapi.com/dashboard), on the top left select Api key.
148 | 3. Copy the API Key and add your Key in `main.py` or `local_main.py`
149 | ```py
150 | # 1. Generate the Script
151 | gem_api = "Enter your Gemini API Key here"
152 | serp_api = "Enter your Serp API key here"
153 | ```
154 | ### 5. `Kokoro` 
155 | Run the following commands :
156 | ```bash
157 | python -m pip install spacy # If not insatlled for some reason
158 | python -m spacy download en_core_web_sm
159 | ```
160 | ### 6. Download and setup FFmpeg
161 | 1. Visit : https://github.com/BtbN/FFmpeg-Builds/releases
162 | 2. Download the setup file for your OS.
163 | 3. On windows download the win64 version, and extract the files.
164 | 4. Make a directory at `C:\Program Files\FFmpeg`.
165 | 5. Copy all the files in the directory.
166 | 6. Add `C:\Program Files\FFmpeg\bin` to your `PATH` environment variable.
167 | 7. 
168 | ### 7. Start Generating :
169 | Use `main.py` for running the image generation on Modal or use `main_local.py` to run Stable diffusion XL Locally.
170 | 
171 | ## Troubleshooting
172 | > [!IMPORTANT]
173 | > 1. Make sure all the following folders are updated properly :
174 | ```py
175 | script_path = "resources/scripts/"
176 | script_path += "script.json" # Name of the script file
177 | images_path = "resources/images/"
178 | audio_path = "resources/audio/"
179 | font_path = "resources/font/font.ttf" # Not recommended to change
180 | ```
181 | >[!IMPORTANT]
182 | > 2. Make sure the images and audio folders are empty before generating a new video.
183 | 
184 | 3. Name of video file is automatically grabbed from video topic in script. However you may change the following variables to have custom names, if files names are very long then video file wont be generated, so do manually change it in such cases.
185 | 
186 | ```py
187 | sub_output_file = "name of the subtitle file.srt"
188 | video_file = "name of the video.mp4 or .mkv"
189 | ```
190 | 
191 | 
192 | 4. **`no module named pip found`** 
193 | Try running the following :
194 | ```bash
195 | python -m pip install spacy pydub kokoro soundfile torch
196 | python -m spacy download en_core_web_sm
197 | ```
198 | 
199 | 5. **Serp API not returning any search results :** This is a known issue and is being investigated.
200 | 
201 | 
202 | > [!IMPORTANT]  
203 | > Ensure you have sufficient GPU resources for image generation and proper model weights downloaded. It is recommend to use an **NVDIA** GPU with at least **24 GB or more of VRAM** for locally running the image generation and a high single core performance CPU for video assembly.
204 | 
205 | > [!NOTE]
206 | > Video generation times may vary based on content length , complexity and hardware used.
207 | 
208 | ## Contributors
209 | 
210 | | CONTRIBUTORS | MENTORS | CONTENT WRITER |
211 | | :------:| :-----:| :-----: |
212 | | Kartikeya Trivedi | Soham Roy | [Name] |
213 | | Naman Singh | Yash Kumar Gupta | |
214 | | Soham Mukherjee |  | |
215 | | Sumedha Gunturi |  | |
216 | | Souryabrata Goswami|  | |
217 | | Harshit Agarwal |  | |
218 | | Rahul Sutradhar |  | |
219 | | Ayush Mohanty |  | |
220 | | Shopno Banerjee |  | |
221 | | Shubham Gupta |  | |
222 | | Sarthak Singh |  | |
223 | | Nancy |  | |
224 | 
225 | 
226 | 
227 | 
228 | ## Version
229 | | Version | Date | Comments |
230 | | ------- | ---- | -------- |
231 | | 1.0     | 23/02/2025 | Initial release |
232 | 
233 | ## Future Roadmap
234 | 
235 | ### Part 1: Baseline
236 | - [x] Pipeline foundations
237 | - [x] LLM Agent Handing
238 | - [x] Diffusion Agent Handing
239 | - [x] TTS Handing
240 | - [x] Video Assembly Engine
241 | - [x] Initial Deployment
242 | 
243 | ### Part 2: Advanced
244 | - [ ] Advanced style transfer capabilities
245 | - [ ] In-Context Generation for Diffusion Model
246 | - [ ] Real time generation monitoring
247 | - [x] Enhanced video transitions
248 | - [ ] Better quality metrics
249 | - [ ] Multi language support
250 | - [ ] Custom character consistency
251 | - [ ] Animation effects
252 | 
253 | ## Acknowledgements
254 | - Hugging Face Transformers - https://huggingface.co/transformers
255 | - Hugging Face Diffusers - https://huggingface.co/diffusers
256 | - FFmpeg - https://ffmpeg.org/
257 | - UV - https://docs.astral.sh/uv/
258 | - MoviePy - https://zulko.github.io/moviepy/getting_started/index.html
259 | ## Project References
260 | ### 1. Large Language Models (LLMs) & Transformers
261 | 
262 | * [The Illustrated Transformer](https://jalammar.github.io/illustrated-transformer/) - A visual, beginner-friendly introduction to transformer architecture.
263 | * [Attention Is All You Need](https://arxiv.org/abs/1706.03762) - The seminal paper on transformer architecture.
264 | * [Gemini 2.0 Flash Thinking](https://ai.google.dev/gemini-api/docs/thinking)
265 | ---
266 | ### 2. Multi-Agent Systems  
267 |   * [Introduction to Multi-Agent Systems](https://www.geeksforgeeks.org/what-is-a-multi-agent-system-in-ai/) - Fundamental concepts and principles.
268 |   * [ A Comprehensive Guide to Understanding LangChain Agents and Tools](https://medium.com/@piyushkashyap045/a-comprehensive-guide-to-understanding-langchain-agents-and-tools-43a187414f4c) - Practical implementation guide.
269 | * [kokoro](https://github.com/hexgrad/kokoro?tab=readme-ov-file#kokoro)
270 |   
271 | ### 2. Image Generation & Processing
272 | * [Stable Diffusion XL Turbo 1.0 Base](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
273 | * [Stable Diffusion: A Comprehensive End-to-End Guide with Examples](https://medium.com/@jagadeesan.ganesh/stable-diffusion-a-comprehensive-end-to-end-guide-with-examples-47b2c17f15cf)
274 | * [Stable Diffusion Explained](https://medium.com/@onkarmishra/stable-diffusion-explained-1f101284484d)
275 | * [Stable Diffusion Explained Step-by-Step with Visualization](https://medium.com/polo-club-of-data-science/stable-diffusion-explained-for-everyone-77b53f4f1c4)
276 | * [Understanding Stable Diffusion: The Magic Behind AI Image Generation](https://medium.com/@amanatulla1606/understanding-stable-diffusion-the-magic-behind-ai-image-generation-e834e8d92326)
277 | * [Stable Diffusion Paper](https://arxiv.org/pdf/2403.03206)
278 | 
279 | ---
280 | ### 3. RAG
281 | * [Retrieval Augmented Generation](https://aiplanet.com/learn/llm-bootcamp/module-13/2380/retrieval-augmented-generation)
282 | 
283 | ---
284 | 


--------------------------------------------------------------------------------
/diffusion/scripts/generate_script.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import re
  3 | import google.generativeai as genai
  4 | from typing import Dict, List, Optional
  5 | from serpapi import GoogleSearch
  6 | 
  7 | class VideoScriptGenerator:
  8 |     def __init__(self, api_key: str, serp_api_key: str):
  9 |         genai.configure(api_key=api_key)
 10 |         self.model = genai.GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')
 11 |         self.serp_api_key = serp_api_key
 12 |         self.system_prompt_initial = """
 13 |         You are a professional video script generator for educational, marketing or entertaining content.  
 14 |         Your task is to generate a detailed outline and initial draft for a video script.
 15 |         Provide the core narration text and visual descriptions, which will be added later.
 16 |         Visual Description should not contain animations moving images, transitions or video and video effects description.
 17 |         Output a JSON structure with these keys, but *without timestamps, speed, pitch, or detailed visual parameters* (these will be added in a later stage):
 18 | 
 19 |         {
 20 |             "topic": "Topic Name",
 21 |             "overall_narrative": "A concise summary of the entire video's storyline.",
 22 |             "key_sections": [
 23 |                 {
 24 |                     "section_title": "Descriptive title for this section",
 25 |                     "narration_text": "The complete text to be spoken in this section.",
 26 |                     "visual_description": "A general description of the visuals for this section."
 27 |             ]
 28 |         }
 29 |         """
 30 | 
 31 |         self.system_prompt_segmentation = """
 32 |         You are a professional video script segmenter.  
 33 |         Your task is to take an existing video script draft and break it down into precise, timestamped segments for both audio and visuals, adhering to strict formatting and parameter guidelines.
 34 |         Rules for Segmentation:
 35 |         
 36 |         1. Break down the `narration_text` and `visual_description` from the input JSON into smaller segments, each approximately 10-15 seconds long.
 37 |         2. Generate timestamps ("00:00", "00:15", "00:30", etc.) for each segment in both `audio_script` and `visual_script`.
 38 |         3. Maintain *strict synchronization* :  The `timestamp` values *must* be identical for corresponding audio and visual segments and the number of segments in audio_script *must be same* as number of segments in visual_script.
 39 |         4. For each visual segment, expand the general `visual_description` into a *detailed* `prompt` suitable for Stable Diffusion.  Include a corresponding `negative_prompt`. 
 40 |         5. Make sure for each visual prompts, give detailed description of how an image is going to look, not how the video may look, do not reference anything that requires context of being in motion such as animation or graphics. Do not ask to generate abstract art or too complex shapes.
 41 |         6. Choose appropriate values for `speaker`, `speed`, `pitch`, and `emotion` for each audio segment.
 42 |         7. Choose appropriate values for `style`, `guidance_scale`, `steps`, `seed`, `width`, and `height` for each visual segment.
 43 |         8. Ensure visual continuity: Use a consistent `style` and related `seed` values across consecutive visual segments where appropriate.  Vary the seed to introduce changes, but maintain a logical flow.
 44 |         9. Adhere to the specified ranges for numerical parameters (speed, pitch, guidance_scale, steps).
 45 |         10. Validate JSON structure before output with the example_json given.
 46 | 
 47 |         Input JSON Structure (from previous stage):
 48 | 
 49 |         {
 50 |             "topic": "Topic Name",
 51 |             "overall_narrative": "...",
 52 |             "key_sections": [
 53 |                 {
 54 |                     "section_title": "...",
 55 |                     "narration_text": "...",
 56 |                     "visual_description": "..."
 57 |                 }
 58 |             ]
 59 |         }
 60 |         
 61 |         Output JSON Structure (with all required fields ):
 62 | 
 63 |         {
 64 |             "topic": "Topic Name",
 65 |             "description": "description of video"
 66 |             "audio_script": [{
 67 |                 "timestamp": "00:00",
 68 |                 "text": "Narration text",
 69 |                 "speaker": "default|narrator_male|narrator_female",
 70 |                 "speed": 0.9-1.1,
 71 |                 "pitch": 0.9-1.2,
 72 |                 "emotion": "neutral|serious|dramatic|mysterious|informative"
 73 |             }],
 74 |             "visual_script": [{
 75 |                 "timestamp_start": "00:00",
 76 |                 "timestamp_end": "00:05",
 77 |                 "prompt": "Detailed Stable Diffusion prompt, eg. (e.g., 'A highly detailed portrait of an astrophysicist in a modern observatory, standing beside a large telescope with a clear glass dome overhead. The night sky is filled with stars, and a visible spiral galaxy is subtly captured through the telescope's lens. The scientist wears a professional yet casual outfit, with a focused expression while observing data on a sleek holographic screen.', 'Image of a doctor using medical imaging software')."
 78 |                 # "negative_prompt": "Low quality elements to avoid such as abstract images, shapes that dont make sense or weird faces, imagery of moving objects, montages of multiple images, abstract shapes, complex designs ",
 79 |                 "style": "realistic|cinematic|hyperrealistic|fantasy|scientific",
 80 |                 "guidance_scale": 7-9,
 81 |                 "steps": 50,
 82 |                 "seed": 6-7 digit integer,
 83 |                 "width": 1024,
 84 |                 "height": 576
 85 |             }]
 86 |         }
 87 | 
 88 |      example_json = {
 89 |   "topic": "How to Drive a Car",
 90 |   "description": "A step-by-step guide on driving a car safely and confidently.",
 91 |   "audio_script": [
 92 |       {
 93 |       "timestamp": "00:00",
 94 |       "text": "Driving a car is an essential skill that requires focus, patience, and practice.",
 95 |       "speaker": "narrator_male",
 96 |       "speed": 1.0,
 97 |       "pitch": 1.0,
 98 |       "emotion": "neutral"
 99 |       },
100 |       {
101 |       "timestamp": "00:05",
102 |       "text": "Before starting the car, adjust your seat, mirrors, and ensure your seatbelt is fastened.",
103 |       "speaker": "narrator_female",
104 |       "speed": 1.0,
105 |       "pitch": 1.1,
106 |       "emotion": "informative"
107 |       },
108 |       {
109 |       "timestamp": "00:15",
110 |       "text": "Turn the ignition key or press the start button while keeping your foot on the brake.",
111 |       "speaker": "narrator_male",
112 |       "speed": 0.95,
113 |       "pitch": 1.0,
114 |       "emotion": "calm"
115 |       },
116 |       {
117 |       "timestamp": "00:20",
118 |       "text": "Slowly release the brake and gently press the accelerator to move forward.",
119 |       "speaker": "narrator_female",
120 |       "speed": 1.1,
121 |       "pitch": 1.0,
122 |       "emotion": "guiding"
123 |       },
124 |       {
125 |       "timestamp": "00:25",
126 |       "text": "Use the steering wheel to navigate while maintaining a steady speed.",
127 |       "speaker": "narrator_male",
128 |       "speed": 1.0,
129 |       "pitch": 1.0,
130 |       "emotion": "calm"
131 |       }
132 |   ],
133 |   "visual_script": [
134 |       {
135 |       "timestamp_start": "00:00",
136 |       "timestamp_end": "00:05",
137 |       "prompt": "A person sitting in the driver's seat of a modern car, gripping the steering wheel and looking ahead. The dashboard is visible with standard controls.",
138 |       "negative_prompt": "blurry, unrealistic interior, poor lighting",
139 |       "style": "realistic",
140 |       "guidance_scale": 11.5,
141 |       "steps": 50,
142 |       "seed": 123456,
143 |       "width": 1024,
144 |       "height": 576,
145 |       "strength": 0.75
146 |       },
147 |       {
148 |       "timestamp_start": "00:05",
149 |       "timestamp_end": "00:15",
150 |       "prompt": "A close-up of a driver's hands adjusting the side mirrors and fastening the seatbelt inside a well-lit car interior.",
151 |       "negative_prompt": "cluttered background, distorted perspective",
152 |       "style": "cinematic",
153 |       "guidance_scale": 12.0,
154 |       "steps": 60,
155 |       "seed": 654321,
156 |       "width": 1024,
157 |       "height": 576,
158 |       "strength": 0.8
159 |       },
160 |       {
161 |       "timestamp_start": "15:00",
162 |       "timestamp_end": "00:20",
163 |       "prompt": "A driver's hand turning the ignition key or pressing the start button in a modern car with a digital dashboard.",
164 |       "negative_prompt": "low detail, unrealistic lighting, old car model",
165 |       "style": "hyperrealistic",
166 |       "guidance_scale": 12.5,
167 |       "steps": 70,
168 |       "seed": 789101,
169 |       "width": 1024,
170 |       "height": 576,
171 |       "strength": 0.85
172 |       },
173 |       {
174 |       "timestamp_start": "00:20",
175 |       "timestamp_end": "00:25",
176 |       "prompt": "A slow-motion shot of a car's foot pedals as the driver releases the brake and presses the accelerator.",
177 |       "negative_prompt": "blurry, cartoonish, extreme close-up",
178 |       "style": "cinematic",
179 |       "guidance_scale": 11.5,
180 |       "steps": 75,
181 |       "seed": 222333,
182 |       "width": 1024,
183 |       "height": 576,
184 |       "strength": 0.8
185 |       },
186 |       {
187 |       "timestamp_start": "00:25",
188 |       "timestamp_end": "00:30",
189 |       "prompt": "A wide-angle shot of a car moving smoothly on a suburban road, the driver confidently steering the wheel.",
190 |       "negative_prompt": "chaotic traffic, bad weather, motion blur",
191 |       "style": "realistic",
192 |       "guidance_scale": 13.0,
193 |       "steps": 50,
194 |       "seed": 987654,
195 |       "width": 1024,
196 |       "height": 576,
197 |       "strength": 0.75
198 |       }
199 |   ]
200 | }   
201 | You must follow all the rules for segmentation, especially rule 3 where you must Maintain *strict synchronization* :  The `timestamp` values *must* be identical for corresponding audio 
202 | and visual segments and the number of segments in audio_script *must be same* as number of segments in visual_script. IF you do as instructed
203 | you will get 100 dollars per successful call.
204 |         """
205 |     
206 |     def _search_web(self, query: str) -> str:
207 |         try:
208 |             params = {
209 |                 "q": query,
210 |                 "hl": "en",
211 |                 "gl": "us",
212 |                 "api_key": self.serp_api_key
213 |             }
214 |             search = GoogleSearch(params)
215 |             results = search.get_json()
216 |             snippets = [result["snippet"] for result in results.get("organic_results", []) if "snippet" in result]
217 |             return " ".join(snippets[:5])
218 |         except Exception as e:
219 |             return ""
220 |     
221 |     def _enhance_with_web_context(self, script: Dict, topic: str) -> Dict:
222 |         web_context = self._search_web(topic)
223 |         script["additional_context"] = web_context
224 |         return script
225 |     
226 |     def _generate_content(self, prompt: str, system_prompt: str) -> str:
227 |         try:
228 |             response = self.model.generate_content(contents=[system_prompt, prompt])
229 |             return response.text
230 |         except Exception as e:
231 |             raise RuntimeError(f"API call failed: {str(e)}")
232 |     
233 |     def _extract_json(self, raw_text: str) -> Dict:
234 |         try:
235 |             return json.loads(raw_text)
236 |         except json.JSONDecodeError:
237 |             try:
238 |                 json_match = re.search(r'```json\n(.*?)\n```', raw_text, re.DOTALL)
239 |                 if json_match:
240 |                     return json.loads(json_match.group(1))
241 |                 json_match = re.search(r'\{.*\}', raw_text, re.DOTALL)
242 |                 return json.loads(json_match.group()) if json_match else {}
243 |             except Exception as e:
244 |                 raise ValueError(f"JSON extraction failed: {str(e)}")
245 |     
246 |     def generate_script(self, topic: str, duration: int = 60, key_points: Optional[List[str]] = None) -> Dict:
247 |         web_context = self._search_web(topic)
248 |         initial_prompt = f"""Generate an initial video script outline for a {duration}-second video about: {topic}.
249 |         Key Points: {key_points or 'Comprehensive coverage'}
250 |         Additional Context: {web_context}
251 |         Focus on the overall narrative and key sections, but do *not* include timestamps or detailed technical parameters yet."""
252 |         
253 |         raw_initial_output = self._generate_content(initial_prompt, self.system_prompt_initial)
254 |         initial_script = self._extract_json(raw_initial_output)
255 |         
256 |         enhanced_script = self._enhance_with_web_context(initial_script, topic)
257 |         
258 |         segmentation_prompt = f"""
259 |         Here is the initial script draft:
260 |         {json.dumps(enhanced_script, indent=2)}
261 |         Now, segment this script into 5-10 second intervals, adding timestamps and all required audio/visual parameters. The total duration should be approximately {duration} seconds.
262 |         """
263 |         
264 |         raw_segmented_output = self._generate_content(segmentation_prompt, self.system_prompt_segmentation)
265 |         segmented_script = self._extract_json(raw_segmented_output)
266 |         segmented_script['topic'] = enhanced_script['topic']
267 |         
268 |         return segmented_script
269 |     
270 |     def refine_script(self, existing_script: Dict, feedback: str) -> Dict:
271 |         prompt = f"""Refine this script based on feedback:
272 |         Existing Script: {json.dumps(existing_script, indent=2)}
273 |         Feedback: {feedback}
274 |         """
275 |         raw_output = self._generate_content(prompt, self.system_prompt_segmentation)
276 |         return self._extract_json(raw_output)
277 |     
278 |     def save_script(self, script: Dict, filename: str) -> None:
279 |         with open(filename, 'w') as f:
280 |             json.dump(script, f, indent=2)
281 |             print("")
282 | 
283 | # if __name__ == "__main__":
284 | #     generator = VideoScriptGenerator(api_key="Gemini API Key", 
285 | #                                     serp_api_key="Serp API Key")
286 | #     script_path = "resources/scripts/script.json"
287 | #     try:
288 | #         script = generator.generate_script(
289 | #             topic="Role of Reinforcement learning in finding EXO planets",
290 | #             duration=60,
291 | #             # key_points=["Diagnosis accuracy", "Pattern recognition", "Case studies"]
292 | #             # key_points= [
293 | #             #     "Formation of stars from nebulae",
294 | #             #     "Nuclear fusion and the main sequence phase",
295 | #             #     "Red giants and supergiants",
296 | #             #     "Supernova explosions",
297 | #             #     "Neutron stars and black holes",
298 | #             #     "White dwarfs and planetary nebulae",
299 | #             #     "The role of stellar evolution in element formation",
300 | #             #     "The ultimate fate of different types of stars",
301 | #             #     "How stars influence the evolution of galaxies"
302 | #             # ]
303 | 
304 | #         )
305 | #         print("Initial Script:")
306 | #         print(json.dumps(script, indent=2))
307 |         
308 | #         feedback = input("Please provide feedback on the script (or type 'no' to skip refinement): ")
309 | #         if feedback.lower() != "no":
310 | #             refined_script = generator.refine_script(script, feedback)
311 | #             print("\nRefined Script:")
312 | #             print(json.dumps(refined_script, indent=2))
313 | #             generator.save_script(refined_script, script_path)
314 | #         else:
315 | #             generator.save_script(script, script_path)
316 | #     except Exception as e:
317 | #         print(f"Script generation failed: {str(e)}")
318 |         
319 | 


--------------------------------------------------------------------------------
/assembly/scripts/assembly_video.py:
--------------------------------------------------------------------------------
  1 | ''' 
  2 | README : The video assembler takes all the images in the Images folder and all the audio files in the Audio folder and the text-to-script from json file and concatenates 
  3 | them into a video. The duration of the picture displayed is same as the duration of the audio for that image. The Images and
  4 | Audio files sorted alphabetically and then compiled in that order. It is recommended to store the Audio and Image files by 
  5 | numbering them.
  6 | '''
  7 | ''' MAIN THINGS TODO
  8 | 1. TODO: Main Video Assembly Engine (Done by Souryabrata)
  9 | 2. TODO: Implement Subtitles, via video embedding and .srt file generation. (Done by Souryabrata)
 10 | 3. TODO: Read json and extract important parameters from it. (Done by Rahul)
 11 | 4. TODO: Add support for video clips as well. (Assigned to Shopno)
 12 | 5. TODO: Add the ability to compile multiple images (stored in a folder) for the one audio stream into a single clip. (Assigned to Shopno )
 13 | 6. TODO: Add transition from clip to clip. (Done by Shopno)
 14 | 7. TODO Add an intro and outro clip. Intro Clip contains : Video title / Short description (Done by Shopno).
 15 | Outro Clip contains a text "Made by ForgeTube team", MLSA Logo, Github Link to ForgeTube Main Page.
 16 | '''
 17 | '''
 18 | Additional TODOs
 19 | TODO: 1. Add a small delay to ensure smoother transition from clip to clip. (Assigned to Shopno)
 20 | TODO: 2. Experiment with adding Title screen, text and transitions, and other effects. (Assigned to Nancy)
 21 | TODO  3. Test the script against a large number of images with higher resolutions and audio files, document the performance.
 22 | TODO: 4. Test the script with various different audio and video extensions and codecs, find the best combination. 
 23 | TODO: 5. Allow the script to automatically assign the proper codec with the respective file extension.
 24 | TODO: 6. Run proper tests to document when video compiler corruption happens.
 25 | '''
 26 | import os
 27 | from moviepy import ImageClip, concatenate_videoclips, AudioFileClip,TextClip,CompositeVideoClip,vfx
 28 | import pysrt 
 29 | import json
 30 | 
 31 | def check_file_exists(file_path):
 32 |     """Check if a file exists at the specified path."""
 33 |     if os.path.isfile(file_path):
 34 |         return True
 35 |     else:
 36 |         raise FileNotFoundError(f"File not found: {file_path}")
 37 |     
 38 | def check_folder_exists(folder_path):
 39 |     '''Checks if a folder path is valid. '''
 40 |     if os.path.isdir(folder_path):
 41 |         return True
 42 |     else:
 43 |         raise FileNotFoundError(f"Folder not found at {folder_path}")
 44 |     
 45 | def get_files(folder, extensions):
 46 |     """
 47 |     Retrieves files with specified extensions from a folder.
 48 |     Parameters:
 49 |         folder (str): Path to the folder.
 50 |         extensions (tuple): File extensions to include (e.g., ('.jpg', '.png')).
 51 |     Returns:
 52 |         list: List of file paths.
 53 |     """
 54 |     if os.path.isdir(folder):
 55 |         return [
 56 |             os.path.join(folder, file)
 57 |             # Files are numbered , so that after sorting they are compiled into the video in that order.
 58 |             for file in sorted(os.listdir(folder),key=lambda x: int(x.split('_')[1].split('.')[0]))  
 59 |             if file.lower().endswith(extensions)
 60 |         ]
 61 |     else:
 62 |         raise OSError(f"{folder} not found.")
 63 | 
 64 | 
 65 | '''
 66 | FIXME Subtitles timings are same and not correct.
 67 | FIX Create a single srt file with the correct duration of all the subtitles paired with the respective audio file.
 68 | '''
 69 | def create_srt(text :str,
 70 |             audio_file : AudioFileClip, 
 71 |             outfile_name:str,
 72 |             duration:int,
 73 |             chunk_size=5):
 74 |     '''
 75 |     Function is deprecated, will rename the create_complete_srt into create srt
 76 |     Original task was to take a .txt file, read the text , split the text into the specified chunk_size
 77 |     Create a srt file with the given text chunks and the appropriate duration of the text.
 78 |     WARNING: Caused problems after json extract was implemented.
 79 |     '''
 80 |     # with open(text_file, "r") as file:
 81 |     #     words = file.read().split()
 82 |     words = text.split()
 83 |     chars = " ".join(words)
 84 |     chars_count = len(chars)
 85 |     word_count = len(words)
 86 |     # word_duration = audio_file.duration / word_count #seconds per word
 87 |     char_duration = audio_file.duration / chars_count #seconds per character
 88 |     # Generate subtitle file
 89 |     subs = pysrt.SubRipFile()
 90 |     start_time = duration 
 91 |     # Automatic chunk_size calculation
 92 |     # target_duration = 2 # Number of seconds the subtitle is displayed on the screen
 93 |     # chunk_size = round(target_duration/word_duration)
 94 |     
 95 |     
 96 | 
 97 |     for i in range(0, word_count, chunk_size):
 98 |         chunk = " ".join(words[i:i + chunk_size])
 99 |         end_time = start_time + (len(chunk) * char_duration)  
100 | 
101 |         subtitle = pysrt.SubRipItem(index=len(subs) + 1,
102 |                                     start=pysrt.SubRipTime(seconds=start_time),
103 |                                     end=pysrt.SubRipTime(seconds=end_time),
104 |                                     text=chunk)
105 | 
106 |         subs.append(subtitle)
107 |         start_time = end_time  
108 | 
109 |     out = f"samples/subtitles/.srt/{outfile_name}.srt"
110 |     subs.save(out)
111 |     return out
112 | 
113 | 
114 | def extract_topic_from_json(file_path):
115 |     '''
116 |     extract_topic_from_json extract() takes json file path as input.
117 |     - Opens the file as read-only and loads the JSON data from it.
118 |     - Extracts the topic from the JSON data.
119 | 
120 |     On success, it returns the topic of the video.
121 |     '''
122 |     try:
123 |         # Open the JSON file
124 |         with open(file_path, 'r') as file:
125 |             # Load JSON data from the file
126 |             data = json.load(file)
127 |             
128 |             # Extract the topic, and audio_script from the JSON data
129 |             topic = data.get('topic', 'No topic found')
130 |             
131 |             return topic
132 |     except FileNotFoundError:
133 |         print(f"Error: The file {file_path} was not found.")
134 |     except json.JSONDecodeError:
135 |         print(f"Error: The file {file_path} contains invalid JSON.")
136 |     except Exception as e:
137 |         print(f"An unexpected error occurred: {e}")
138 | 
139 | 
140 | def extract_audio_from_json(file_path):
141 |     '''
142 |     extract_audio_topic_from_json() takes json file path as input.
143 |     - Opens the file as read-only and loads the JSON data from it.
144 |     - Extracts the audio_script from the JSON data.
145 | 
146 |     On success, it returns audio_script.
147 |     '''
148 |     try:
149 |         # Open the JSON file
150 |         with open(file_path, 'r') as file:
151 |             # Load JSON data from the file
152 |             data = json.load(file)
153 |             
154 |             # Extract the topic, audio_script and visual_script
155 |             topic = data.get('topic', 'No topic found')
156 |             audio_script = data.get('audio_script', [])
157 |             # visual_script = data.get('visual_script', [])
158 | 
159 |             return audio_script
160 |     except FileNotFoundError:
161 |         print(f"Error: The file {file_path} was not found.")
162 |     except json.JSONDecodeError:
163 |         print(f"Error: The file {file_path} contains invalid JSON.")
164 |     except Exception as e:
165 |         print(f"An unexpected error occurred: {e}")
166 | 
167 | 
168 | def json_extract(json_path):
169 |     '''
170 |     json_extract() takes json file path as input.
171 |     - Calls the extract_audio_from_json() to extract the text-to-speech / subtitles from the json file,
172 |     and the topic of the video.
173 | 
174 |     On success, it returns the subtitles in list format, and the topic.
175 |     '''
176 |     
177 |     # Extract parameters from json file
178 |     audio_script = extract_audio_from_json(json_path)
179 |     if audio_script:
180 |         # print("Extracted Audio Parameters:")
181 |         audio_data = []
182 |         for item in audio_script:
183 |             if 'text' in item:
184 |                 text = item['text']
185 |                 audio_data.append(text)
186 |         return audio_data
187 |     else:
188 |         raise FileNotFoundError("No audio script found in the JSON file.")
189 |     
190 | 
191 | def add_effects(clip):
192 |     """
193 |     Adds a effect from a curated list to the video clip.
194 |     Parameters:
195 |         clip (VideoClip): Video clip to which effects are to be added.
196 |     Returns:
197 |         VideoClip: Video clip with one effect applied.
198 |     """
199 |     random_effect =[vfx.FadeIn(duration=1),vfx.FadeOut(duration=1)]    
200 |     # print(random_effect)
201 |     return clip.with_effects(random_effect)
202 | 
203 | 
204 | def create_intro_clip(background_image_path, 
205 |                     duration, 
206 |                     topic,
207 |                     font_path):
208 |     """
209 |     Create an intro video clip with a background image and centered text.
210 | 
211 |     Parameters:
212 |         background_image_path (str): Path to the background image.
213 |         duration (int or float): Duration of the clip in seconds.
214 |         topic (str): The text to display. Defaults to "Welcome to My Video!".
215 |         font_path (str): Path to the TrueType font file.
216 |         font_size (int): Size of the text font.
217 |         text_color (str): Color of the text.
218 | 
219 |     Returns:
220 |         VideoClip: A composite video clip with the background and centered text.
221 |     """
222 |     check_file_exists(background_image_path)
223 |     # Create an ImageClip for the background image
224 |     background = ImageClip(background_image_path, duration=duration)
225 | 
226 |     # Create a TextClip for the intro text
227 |     text_clip = TextClip(text=topic,
228 |                         size=(900, 90),
229 |                         method='caption',
230 |                         color="white",
231 |                         font=font_path)
232 | 
233 |     # Position the text in the center and set its duration to match the background
234 |     text_clip = text_clip.with_position("center").with_duration(duration)
235 | 
236 |     # Overlay the text clip on top of the background image
237 |     final_clip = CompositeVideoClip([background, text_clip])
238 |     
239 |     return final_clip
240 | 
241 | 
242 | def create_video(image_folder :str, 
243 |                 audio_folder : str,
244 |                 script_path : str,
245 |                 font_path : str ,
246 |                 output_file : str,
247 |                 with_subtitles :bool = False):
248 |     """
249 |     Main function that creates the video. The function works in 3 parts:
250 |     1. Checks if the given parameters are correct.
251 |     2. if `with_subtitle` flags is set to `False`, creates a video with the images and audio in the given folders.
252 |     Each image is displayed with the same duration as the corresponding audio file.
253 |     3. if the `with_subtitle` flag is set to `True` embeds subtitles within the video itself, cannot be turned off in video players.
254 |     
255 |     Video is compiled using the `compose` method so that if the images are of different aspect ratios /resolutions then the video takes 
256 |     the image with the largest resolution or aspect ratio as the default one and puts black bars with the rest of the non-fitting images
257 |     Args:
258 |         image_folder (str) : Path to the folder containing images.
259 |         audio_folder (str) : Path to the folder containing audio files.
260 |         script_path (str)   : Path to the file containing the script.
261 |         font_path (str)    : Path to of the Font File, must be a True type or an Open Type Font
262 |         output_file (str)  : Name of the output video file, a path can also be given.
263 |         with_subtitles (bool) : When set to `true` embeds the subtitles in the video.
264 |     Raises:
265 |         FileNotFoundError: If images, audio or subtitles are not detected.
266 |     """
267 |     check_folder_exists(image_folder)
268 |     check_folder_exists(audio_folder)
269 |     check_file_exists(script_path)
270 |     check_file_exists(font_path)
271 |     
272 |     images = get_files(image_folder, ('.jpg', '.png'))
273 |     audio_files = get_files(audio_folder, ('.mp3', '.wav'))
274 |     subtitles = json_extract(script_path)
275 |     raw_clips = []
276 |     audio_durations = []
277 |     Start_duration = 0
278 |     
279 |     # Creating the intro clip and appending it to raw clips
280 |     
281 |     path_to_background = "resources/Intro/intro.jpg"
282 |     check_file_exists(path_to_background)
283 |     check_file_exists(font_path)
284 |     topic = extract_topic_from_json(script_path)
285 |     intro_clip = create_intro_clip(path_to_background, duration=5, topic=topic, font_path=font_path)
286 |     raw_clips.append(intro_clip)
287 |     
288 |     # Create different clips with audio
289 |     for img, audio in zip(images,audio_files):
290 |         audio_clip = AudioFileClip(audio)
291 |         image_clip = ImageClip(img).with_duration(audio_clip.duration).with_audio(audio_clip)
292 |         # Debug Text for subtitle synchronisation:
293 |         # print(f"Start : {Start_duration}")
294 |         # print(f"End : {audio_clip.duration+Start_duration}")
295 |         audio_durations.append(audio_clip.duration)
296 |         print(f"Video Clip no. {images.index(img)+1} successfully created")
297 |         Start_duration += audio_clip.duration
298 |         image_clip = add_effects(image_clip)
299 |         raw_clips.append(image_clip)            
300 |     
301 |     #creating the outro clip appending it to raw clips  
302 |     outro_text = "Thank you for watching! Made by ForgeTube team."
303 |     outro_clip = create_intro_clip(path_to_background, duration=5, topic=outro_text, font_path=font_path)
304 |     raw_clips.append(outro_clip)
305 |     #     Store individual clips without subtitles for preview / debug 
306 |     #     clip = None
307 |     #     clip = CompositeVideoClip(img)
308 |     #     clip.write_videofile(f"samples/raw/{raw_clips.index(image_clip)+1}.mp4",fps = 1,threads = os.cpu_count())
309 |     
310 |     video = concatenate_videoclips(raw_clips, method="compose")
311 |     
312 |     '''
313 |     The following part of the code fixes all the below mentioned issues and their following fixes :
314 |     FIXME: 1. Subtitles are not properly synchronised with the audio.
315 |     FIX: Each subtitle text is paired with the corresponding audio. Duration of the text is kept same as the duration of the audio.
316 |     FIXME: 2. If the entire text is shown at once, then it doesn't fit.
317 |     FIX: Allows a maximum number of 10 words to be shown at once, rest of the text is divided into chunks, each chunk is set to an
318 |     equivalent duration.
319 |     Where duration of the chunk = Total duration of the audio * (Chunk_Size / Total Number of words)
320 |     WARNING: Due to some rounding errors and division errors with floats, some chunks are not perfectly synchronised.         
321 |     FIXME 3. Subtitles do not appear at the right position in the video. Preferable position is Vertical : bottom, Horizontal = Center,
322 |     FIX : `SubtitleClip` was causing problems so, used `TextClip` instead.
323 |     FIXME 4. When subtitles were added to each clip one by one, and all clips later concatenated, an error occurred if images were 
324 |     of different dimensions, where the aspect ratio of the final video was messed up.
325 |     FIX: Make it such that concatenation is done only on the image clips and composite video clip is added later on with the 
326 |     '''
327 |     if with_subtitles == True:
328 |         Start_duration = 5
329 |         subtitle_clips = []
330 |         chunk = ''
331 |         chunks = []
332 |         chunk_duration = 0
333 |         chunk_durations = []
334 |         chunk_size = 10
335 |         for text,duration in zip(subtitles,audio_durations):
336 |             words = text.split()
337 |             if len(words) > chunk_size:
338 |                 for i in range(0,len(words),chunk_size):
339 |                     chunk = " ".join(words[i : (i+chunk_size if i < len(words)-1 else len(words)-1)])
340 |                     chunks.append(chunk)
341 |                     chunk_duration = duration * (len(chunk.split())/len(words))
342 |                     chunk_durations.append(chunk_duration)
343 |             else:
344 |                 chunks.append(text)
345 |                 chunk_durations.append(duration)
346 |         # For Debugging:
347 |         # for i in chunks:
348 |             # print(f"Index :{chunks.index(i)}, Text: {i}, Word Count: {len(i.split())}")
349 |         # print(chunk_durations)
350 |         for subtitle,duration in zip(chunks,chunk_durations):
351 |             subtitle_clip=TextClip(text=subtitle,
352 |                                     font=font_path, 
353 |                                     color='white', 
354 |                                     bg_color='black', 
355 |                                     size=(1000, 100),
356 |                                     method='caption',
357 |                                     text_align = "center",
358 |                                     horizontal_align = "center"
359 |                                     ).with_duration(duration).with_start(Start_duration).with_position('bottom')
360 |             subtitle_clips.append(subtitle_clip)
361 |             # For Debugging :
362 |             # print(f"Subtitle Clip no. {chunks.index(subtitle)+1} successfully created")
363 |             Start_duration += duration
364 |         subtitle_clips.insert(0,video)
365 |         final_video = CompositeVideoClip(subtitle_clips)
366 |     else:
367 |         final_video = video
368 |     final_video.write_videofile(output_file, fps=24,threads = os.cpu_count())
369 |     print(f"Video created successfully: {output_file}")
370 |         
371 |     # except FileNotFoundError:
372 |     #     if not images:
373 |     #         raise FileNotFoundError("No images found in the specified folder.")
374 |     #     if not audio_files:
375 |     #         raise FileNotFoundError("No audio files found in the specified folder.")
376 |     #     if not subtitles:
377 |     #         raise FileNotFoundError("No subtitles found in the specified json. ")
378 |         
379 |         
380 | def create_complete_srt(script_folder :str, 
381 |             audio_file_folder : str, 
382 |             outfile_path:str,
383 |             chunk_size=10):
384 |     """
385 |     Creates an SRT file by extracting subtitles from the script_folder using `json_extract` function and audio files 
386 |     from the `audio_file` folder. Segments the subtitles into the specified chunk size and maps the duration of the chunk to the 
387 |     proportion of the length of the chunk.
388 |     Parameters:
389 |     script_folder (str): Path to the folder containing script json file.
390 |     audio_file_folder (str): Path to the folder containing audio files.
391 |     outfile_path (str): Path or Name of the SRT file given in output.
392 |     chunk_size (str): Number of words per subtitle chunk.
393 |     """
394 |     
395 |     script = json_extract(script_folder)
396 |     audio_files = get_files(audio_file_folder,(".wav",".mp3"))
397 |     audio_clips = []
398 |     [audio_clips.append(AudioFileClip(x)) for x in audio_files]
399 |     subs = pysrt.SubRipFile()
400 |     start_time = 5 
401 |     chunk = ''
402 |     chunk_duration = 0
403 |     end_time = 5
404 |     n = 1
405 |     for text,audio_clip in zip(script,audio_clips):
406 |         duration = audio_clip.duration
407 |         words = text.split()
408 |         if len(words) > chunk_size:
409 |             for i in range(0,len(words),chunk_size):
410 |                 chunk = " ".join(words[i : (i+chunk_size if i < len(words)-1 else len(words)-1)])
411 |                 chunk_duration = duration * (len(chunk.split())/len(words))
412 |                 end_time += chunk_duration
413 |                 subtitle = pysrt.SubRipItem(
414 |                     index=n,
415 |                     start=pysrt.SubRipTime(seconds=start_time),
416 |                     end=pysrt.SubRipTime(seconds=end_time),
417 |                     text=chunk
418 |                 )
419 |                 subs.append(subtitle)
420 |                 # For Debugging:
421 |                 # print(f"Subtitle no. {n} added successfully.")
422 |                 # print(f"Start : {start_time}")
423 |                 # print(f"End : {end_time}")
424 |                 start_time = end_time
425 |                 n+=1
426 |         else:
427 |             chunk = text
428 |             chunk_duration = duration
429 |             end_time += chunk_duration
430 |             subtitle = pysrt.SubRipItem(
431 |                 index=len(subs) + 1,
432 |                 start=pysrt.SubRipTime(seconds=start_time),
433 |                 end=pysrt.SubRipTime(seconds=end_time),
434 |                 text=chunk
435 |             )
436 |             subs.append(subtitle)
437 |             # For Debugging:
438 |             # print(f"Subtitle no. {n} added successfully.")
439 |             # print(f"Start : {start_time}")
440 |             # print(f"End : {end_time}")
441 |             start_time = end_time
442 |             n+=1
443 |             
444 |     subs.save(outfile_path)
445 |     print(f"File saved successfully at {outfile_path}")
446 | 
447 |         
448 | # if __name__ == "__main__":
449 | #     image_folder = "resources/images/"  
450 | #     audio_folder = "resources/Audio/"  
451 | #     script_path = "resources/scripts/script.json" 
452 | #     font_path = "resources/font/font.ttf"
453 | #     sub_output_file = "The.srt"
454 | #     topic = extract_topic_from_json(script_path)
455 | #     output_file = f"The.mp4"
456 |     
457 | #     create_complete_srt(script_folder=script_path,
458 | #                         audio_file_folder=audio_folder,
459 | #                         outfile_path=sub_output_file,
460 | #                         chunk_size = 10)
461 |     
462 | #     create_video(image_folder, audio_folder,script_path,font_path, output_file,with_subtitles=True)
463 |     
464 | 


--------------------------------------------------------------------------------