├── .env.template ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── agents ├── .env.template ├── .gitignore ├── __init__.py ├── killer_video_idea.py ├── killer_video_title_gen.py ├── persona_testing.py ├── requirements.txt ├── utils.py └── videos_to_compare.json ├── config.json ├── config └── config.json ├── config_loader.py ├── main.py ├── operations ├── __init__.py ├── avatar_video_generation.py ├── denoise.py ├── save.py ├── set_orientation.py ├── shorts.py ├── subtitles.py ├── transcript.py ├── translation.py └── trim.py ├── recipes.py ├── requirements.txt └── utils ├── __init__.py └── utils.py /.env.template: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | OPENAI_MODEL=o4-mini 3 | OPENAI_API_BASE=https://api.openai.com/v1 4 | WHISPER_MODEL_SIZE=turbo -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.wav 2 | *.mp4 3 | *.srt 4 | *.mp3 5 | *.jpe?g 6 | *.png 7 | .DS_Store 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | cover/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | .pybuilder/ 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | # For a library or package, you might want to ignore these files since the code is 95 | # intended to run in multiple environments; otherwise, check them in: 96 | # .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # poetry 106 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 107 | # This is especially recommended for binary packages to ensure reproducibility, and is more 108 | # commonly ignored for libraries. 109 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 110 | #poetry.lock 111 | 112 | # pdm 113 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 114 | #pdm.lock 115 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 116 | # in version control. 117 | # https://pdm.fming.dev/#use-with-ide 118 | .pdm.toml 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Hector Pulido 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Youtubers toolkit 2 | 3 | This project is a comprehensive command-line toolkit designed for video editing and processing tasks. It provides a modular framework to perform operations such as trimming by silence, adding subtitles, denoising audio, generating transcripts and translations, and even creating short videos with dynamic titles. 4 | 5 | ## Features 6 | 7 | - **Configuration Loader:** Loads settings from a JSON file (`config.json`) to configure various operations. 8 | - **Video Editing Pipeline:** Supports a sequence of operations (e.g., trim by silence, add subtitles, set orientation) to edit videos. 9 | - **Audio Processing:** Extracts, separates, and denoises audio from video files. 10 | - **Voice Operations:** Utilizes AI models (e.g., Whisper, Bark) for video translation and audio generation. 11 | - **Short Video Generation:** Creates a video base with a blurred background and overlays content or titles. 12 | - **Utility Functions:** Includes tools to concatenate source files and handle subtitle splitting. 13 | 14 | ## Installation 15 | 16 | ### Prerequisites 17 | 18 | - **Python 3.10+** 19 | - Required libraries (installable via `pip`): 20 | - `moviepy` 21 | - `numpy` 22 | - `scipy` 23 | - `whisper` 24 | - `torch` 25 | - `torchaudio` 26 | - `pydub` 27 | - `librosa` 28 | - `bark` (for audio generation) 29 | - _...and any additional dependencies as noted in individual modules._ 30 | 31 | ### Setup 32 | 33 | 1. Clone the repository: 34 | ```bash 35 | git clone 36 | cd 37 | ``` 38 | 39 | 2. Create and activate a virtual environment (recommended): 40 | ```bash 41 | python -m venv venv 42 | source venv/bin/activate # On Windows: venv\Scripts\activate 43 | ``` 44 | 45 | 3. Install the required packages: 46 | ```bash 47 | pip install -r requirements.txt 48 | ``` 49 | *(Ensure your `requirements.txt` lists all necessary dependencies.)* 50 | 51 | 4. Configure the project by editing the `config.json` file to adjust settings such as subtitle and title clip configurations. 52 | 53 | ## Usage 54 | 55 | The main entry point is `main.py`, which provides several subcommands for different tasks. Run the following command to see available options: 56 | 57 | ```bash 58 | python main.py --help 59 | ``` 60 | 61 | ## Subcommands Overview 62 | 63 | The CLI tool uses subcommands to select the desired functionality. You invoke the tool using one of the following subcommands: 64 | 65 | 1. **video_edit** – For processing video files with a customizable pipeline of functions. 66 | 2. **separate_audio** – To extract audio from video files. 67 | 3. **split_str** – To split SRT subtitle files based on a specified number of words per subtitle. 68 | 4. **voice** – To perform voice operations such as video translation or audio generation. 69 | 5. **generator** – To generate a base video or add titles for short video production. 70 | 71 | Each subcommand has its own required and optional arguments. Use the `--help` flag with any subcommand to see detailed usage information. 72 | 73 | --- 74 | 75 | ## 1. Video Editing (`video_edit`) 76 | 77 | **Description:** 78 | This subcommand processes video files by applying a sequence (pipeline) of editing functions. 79 | 80 | ### Usage 81 | ```bash 82 | python main.py video_edit [ ...] --pipeline [ ...] [options] 83 | ``` 84 | 85 | ### Arguments 86 | 87 | - **input_file** (required): 88 | One or more video file paths to process. 89 | 90 | - **--pipeline** (required): 91 | List of functions to apply to each video file. 92 | **Available functions:** 93 | - `trim_by_silence` 94 | - `denoise` 95 | - `transcript` 96 | - `subtitles` 97 | - `save_separated_video` 98 | - `save_join` 99 | - `save_video` 100 | - `set_vertical` 101 | - `set_horizontal` 102 | 103 | ### Options 104 | 105 | - **-c, --clip_interval**: 106 | *Type:* float, *Default:* 2 107 | *Description:* Clipping precision. 108 | 109 | - **-s, --sound_threshold**: 110 | *Type:* float, *Default:* 0.01 111 | *Description:* Maximum volume threshold to consider silence. 112 | 113 | - **-d, --discard_silence**: 114 | *Type:* boolean flag (uses a string-to-boolean converter), *Default:* False 115 | *Description:* Discard silent clips. 116 | 117 | ### Example 118 | ```bash 119 | python main.py video_edit video1.mp4 video2.mp4 --pipeline trim_by_silence subtitles -c 3 -s 0.02 -d True 120 | ``` 121 | *This applies the `trim_by_silence` and `subtitles` functions to `video1.mp4` and `video2.mp4` with a clip interval of 3 seconds and a sound threshold of 0.02, discarding silent clips.* 122 | 123 | --- 124 | 125 | ## 2. Separate Audio (`separate_audio`) 126 | 127 | **Description:** 128 | Extracts audio from the given video files. 129 | 130 | ### Usage 131 | ```bash 132 | python main.py separate_audio [ ...] 133 | ``` 134 | 135 | ### Arguments 136 | 137 | - **files** (required): 138 | One or more video files from which to extract audio. 139 | 140 | ### Example 141 | ```bash 142 | python main.py separate_audio video1.mp4 video2.mp4 143 | ``` 144 | *This command will extract the audio from `video1.mp4` and `video2.mp4` and save them accordingly.* 145 | 146 | --- 147 | 148 | ## 3. Split SRT (`split_str`) 149 | 150 | **Description:** 151 | Splits SRT subtitle files into smaller segments based on a specified number of words per subtitle. 152 | 153 | ### Usage 154 | ```bash 155 | python main.py split_str [ ...] 156 | ``` 157 | 158 | ### Arguments 159 | 160 | - **files** (required): 161 | One or more SRT files to split. 162 | 163 | - **words_per_subtitle** (required): 164 | *Type:* integer 165 | *Description:* The number of words per subtitle segment. 166 | 167 | ### Example 168 | ```bash 169 | python main.py split_str subtitles.srt 5 170 | ``` 171 | *This will split the subtitles in `subtitles.srt` so that each subtitle contains approximately 5 words.* 172 | 173 | --- 174 | 175 | ## 4. Voice Operations (`voice`) 176 | 177 | **Description:** 178 | Performs voice operations such as video translation or audio generation. 179 | 180 | ### Usage 181 | ```bash 182 | python main.py voice [options] 183 | ``` 184 | 185 | ### Arguments 186 | 187 | - **operation** (required): 188 | Operation to perform. Choose between: 189 | - `video_translation` 190 | - `audio_generator` 191 | 192 | - **video_path** (required): 193 | The path to the video file to process. 194 | 195 | ### Options 196 | 197 | - **-t, --translate**: 198 | *Type:* Model to use for translation, *Default:* `"Helsinki-NLP/opus-mt-es-en"` 199 | *Description:* Translate the video to English or transcribe in the same language. 200 | 201 | - **--voice**: 202 | *Type:* string, *Default:* `"en-us/af_heart"` 203 | *Description:* Voice model to use for translation. 204 | 205 | ### Example 206 | ```bash 207 | python main.py voice video_translation video1.mp4 -t Helsinki-NLP/opus-mt-es-en --voice en-us/af_heart 208 | ``` 209 | *This translates `video1.mp4` using the specified voice model, with translation turned off (if you only want transcription).* 210 | 211 | --- 212 | 213 | ## 5. Short Video Generator (`generator`) 214 | 215 | **Description:** 216 | Generates a base video or adds titles to a short video. 217 | 218 | ### Usage 219 | ```bash 220 | python main.py generator [ ...] 221 | ``` 222 | 223 | ### Arguments 224 | 225 | - **files** (required): 226 | One or more files to process. 227 | 228 | - **tool** (required): 229 | The tool to use. Available options: 230 | - `base` – to generate a base video. 231 | - `add_titles` – to add titles to the video. 232 | 233 | ### Example 234 | ```bash 235 | python main.py generator video1.mp4 base 236 | ``` 237 | *This command uses the `base` tool on `video1.mp4` to generate a base video.* 238 | 239 | --- 240 | 241 | ## General Help 242 | 243 | To display the help information for the CLI tool or a specific subcommand, use the `--help` flag. For example: 244 | ```bash 245 | python main.py --help 246 | python main.py video_edit --help 247 | ``` 248 | 249 | This will display all available options and arguments for that command. 250 | 251 | 252 | ## Project Structure 253 | 254 | - **config_loader.py:** Loads configuration from `config.json` and makes it available throughout the project. 255 | - **main.py:** The central entry point that defines and handles multiple subcommands for video processing. 256 | - **automatic_short_generator.py:** A script to generate short videos using predefined tools. 257 | - **get_data.py:** A utility to traverse directories and concatenate files. 258 | - **utils/** 259 | - **utils.py:** Contains helper functions (e.g., converting strings to booleans, audio extraction, video metadata extraction). 260 | - **operations/** 261 | - **save.py:** Functions to save edited or joined video clips. 262 | - **set_orientation.py:** Adjusts video orientation (vertical/horizontal). 263 | - **subtitles.py:** Adds subtitles to videos. 264 | - **shorts.py:** Generates base videos with effects (e.g., blurred background) and adds title clips. 265 | - **transcript.py:** Generates transcripts using the Whisper model. 266 | - **trim.py:** Implements silence detection and video trimming. 267 | - **translation.py:** Handles video translation and audio generation. 268 | - **denoise.py:** Applies denoising filters using deep learning models. 269 | 270 | ## Configuration 271 | 272 | The toolkit uses a JSON configuration file (`config.json`) to define parameters such as: 273 | - Subtitle and title clip settings (e.g., font, size, position). 274 | - Other customizable options for processing operations. 275 | 276 | Adjust these settings according to your needs before running any commands. 277 | 278 | ## Contributing 279 | 280 | Contributions are welcome! If you have suggestions or improvements, feel free to open an issue or submit a pull request. 281 | 282 | ## License 283 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. 284 | 285 | 286 |
287 |

Let's connect 😋

288 |
289 |

290 | 291 | Hector's LinkedIn     292 | 293 | Hector's Twitter     294 | 295 | Hector's Twitch     296 | 297 | Hector's Youtube     298 | 299 | Pequesoft website     300 | 301 | 302 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HectorPulido/Youtubers-toolkit/a99be24edc2bef6d02039cd0b87f93bc61efe680/__init__.py -------------------------------------------------------------------------------- /agents/.env.template: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | OPENAI_MODEL=o4-mini 3 | OPENAI_API_BASE=https://api.openai.com/v1 -------------------------------------------------------------------------------- /agents/.gitignore: -------------------------------------------------------------------------------- 1 | *.env 2 | ignore_* 3 | *.ipynb 4 | video_transcription.txt -------------------------------------------------------------------------------- /agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HectorPulido/Youtubers-toolkit/a99be24edc2bef6d02039cd0b87f93bc61efe680/agents/__init__.py -------------------------------------------------------------------------------- /agents/killer_video_idea.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script generates unique and engaging YouTube video ideas for a specific channel 3 | """ 4 | 5 | import os 6 | import sys 7 | from concurrent.futures import ThreadPoolExecutor, as_completed 8 | 9 | from dotenv import load_dotenv 10 | from openai import OpenAI 11 | 12 | try: 13 | from .utils import get_youtube_data, try_to_load_json 14 | from .persona_testing import PersonaTester 15 | except ImportError: 16 | from utils import get_youtube_data, try_to_load_json 17 | from persona_testing import PersonaTester 18 | 19 | load_dotenv() 20 | 21 | MODEL = os.getenv("MODEL", "o3-mini") 22 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 23 | OPENAI_API_BASE = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1") 24 | 25 | _client = OpenAI( 26 | api_key=OPENAI_API_KEY, 27 | base_url=OPENAI_API_BASE, 28 | ) 29 | 30 | WHATS_AN_IDEA = """ 31 | Concepts examples 32 | ❌ THOSE ARE EXAMPLES, You should NOT use them, apply the concept to the user style: 33 | 34 | A video idea is NOT a topic IS THE HOOK. 35 | 36 | Topic examples: 37 | > How to cook eggs 38 | > How to bake cookies 39 | > Best kitchen appliances in 2025 40 | > How to cook like a michelin star chef 41 | 42 | Angles of "How to cook like a michelin star chef": 43 | > How does a michelin star chef cook 200 meals per night? 44 | > The process, start to finish, of a michelin star dish 45 | 46 | Hook of "The process, start to finish, of a michelin star dish": 47 | > Breaking down the process of a michelin star dish from menu to ingredients to cook to table 48 | 49 | A good idea should: 50 | 1. Create a sense of urgency and a need to watch the video NOW. 51 | 2. Spark curiosity and make the viewer want to learn more. 52 | 3. Open a loop of information. 53 | """ 54 | 55 | 56 | def chain_summarize_style(videos_string: str) -> str: 57 | """ 58 | Take the description (videos_string) of the existing YouTuber videos 59 | and generate a brief summary about their style. 60 | """ 61 | 62 | prompt = f""" 63 | You are an AI assistant that analyzes the given YouTuber's content description 64 | and writes a concise overview of their style, common themes, and audience expectations. 65 | 66 | YouTuber videos description: 67 | 68 | {videos_string} 69 | 70 | 71 | TASK: 72 | Summarize the main themes, style, and audience interest found in the videos described above. 73 | Add also the language of the videos. 74 | """ 75 | response = _client.chat.completions.create( 76 | model=MODEL, 77 | messages=[{"role": "user", "content": prompt}], 78 | ) 79 | return response.choices[0].message.content.strip() 80 | 81 | 82 | def chain_generate_ideas(summarized_style: str, num_ideas: int = 15) -> list: 83 | """ 84 | Create a list of video ideas based on the summarized style of the YouTuber. 85 | """ 86 | prompt = f""" 87 | You are an AI specialized in brainstorming extraordinary YouTube video ideas. 88 | 89 | SUMMARY OF THE YOUTUBER'S STYLE: 90 | 91 | {summarized_style} 92 | 93 | 94 | {WHATS_AN_IDEA} 95 | 96 | TASK: 97 | 1. Propose at least {num_ideas} different video ideas (Topic -> Angle -> Hook). 98 | 2. Make sure these ideas align with the YouTuber's direction and style. 99 | 3. Keep them creative and unique don't use the same ideas as the examples. 100 | 4. Use the same language as the YouTuber's style. 101 | 5. Focus on doable ideas, not impossible ones. 102 | 103 | Return your response in a JSON format like this, do not return anything else: 104 | [ 105 | {{ 106 | "Topic": "How to cook like a michelin star chef", 107 | "Angle": "How does a michelin star chef cook 200 meals per night?", 108 | "Hook": "Breaking down the process of a michelin star dish from menu to ingredients to cook to table" 109 | }}, 110 | {{ 111 | ... 112 | }} 113 | ] 114 | """ 115 | response = _client.chat.completions.create( 116 | model=MODEL, 117 | messages=[{"role": "user", "content": prompt}], 118 | ) 119 | 120 | response_final = response.choices[0].message.content.strip() 121 | return try_to_load_json(_client, MODEL, response_final) 122 | 123 | 124 | def chain_criticize_and_refine(ideas: dict, testing: dict) -> dict: 125 | """ 126 | This chain "critique" and refines the generated list of ideas. 127 | """ 128 | 129 | prompt = f""" 130 | You are an AI harsh critic that wants to refine the video ideas to ensure 131 | they are as unique and engaging as possible. 132 | 133 | {WHATS_AN_IDEA} 134 | 135 | BELOW ARE THE VIDEO IDEA (Topic -> Angle -> Hook): 136 | 137 | {ideas} 138 | 139 | 140 | AND BELOW is how the ideas performed on the testing, a good testing is >50%: 141 | 142 | {testing} 143 | 144 | 145 | TASK: 146 | 1. Critique each idea concisely, pointing out any weak or cliché aspects. 147 | 2. Suggest a refined or improved version if necessary. 148 | 3. The goal is to transform each idea into a truly outstanding, fresh concept. 149 | 4. Use the same language as the ideas. 150 | 5. Make sure the ideas are doable, not impossible ones. 151 | 152 | Return the refined ideas, maintaining the (Topic, Angle, Hook) format but incorporating your improvements. 153 | 154 | And add if not existing a WOW FACTOR to each idea. 155 | 156 | Return your response in a JSON format like this, do not return anything else: 157 | {{ 158 | "Feedback": "This is a great idea, but it could be improved by...", 159 | "Topic": "How to cook like a michelin star chef", 160 | "Angle": "How does a michelin star chef cook 200 meals per night?", 161 | "Hook": "Breaking down the process of a michelin star dish from menu to ingredients to cook to table", 162 | "WOW": "WOW, Do I really will know how to cook like a michelin star chef after this video??" 163 | }} 164 | """ 165 | response = _client.chat.completions.create( 166 | model=MODEL, 167 | messages=[{"role": "user", "content": prompt}], 168 | ) 169 | response_final = response.choices[0].message.content.strip() 170 | return try_to_load_json(_client, MODEL, response_final) 171 | 172 | 173 | def iterative_idea_generator( 174 | videos_string: str, 175 | iterations: int = 2, 176 | num_initial_ideas: int = 100, 177 | persona: PersonaTester = None, 178 | ) -> str: 179 | """ 180 | 1. Summary of the style of the channel 181 | 2. Generate several initial ideas 182 | 3. Test the video ideas using the personas 183 | 4. Remove the ideas with a testing result <10% 184 | 5. Critique and refine the ideas 185 | 6. Repeat the process N times 186 | 7. Return the final ideas 187 | """ 188 | 189 | # 1. Resumen de estilo 190 | style_summary = chain_summarize_style(videos_string) 191 | print(f"\n--- Style Summary ---\n{style_summary}") 192 | 193 | # 2. Generar ideas iniciales 194 | ideas_current = chain_generate_ideas(style_summary, num_ideas=num_initial_ideas) 195 | print(f"\n--- Initial Ideas ({len(ideas_current)}) ---\n{ideas_current}") 196 | 197 | for i in range(1, iterations + 1): 198 | # Extraer sólo los títulos 199 | idea_topics = [idea["Topic"] for idea in ideas_current] 200 | 201 | # 3. Testeo en lote (una sola llamada, devuelve lista ordenada según idea_topics) 202 | testing_results = persona.test_multiples_videos(titles=idea_topics, checks=20) 203 | print(f"\n--- Testing Results ({len(testing_results)}) ---\n{testing_results}") 204 | 205 | # 4. Filtrar ideas con percentage < 10% 206 | kept = [ 207 | (idea, result) 208 | for idea, result in zip(ideas_current, testing_results) 209 | if result["percentage"] >= 10 210 | ] 211 | ideas_filtered, results_filtered = zip(*kept) if kept else ([], []) 212 | ideas_current = list(ideas_filtered) 213 | testing_results = list(results_filtered) 214 | 215 | # 5. Paralelizar crítica y refinamiento 216 | refined = [None] * len(ideas_current) 217 | with ThreadPoolExecutor(max_workers=len(ideas_current) or 1) as executor: 218 | # Lanzar tareas con su índice 219 | future_to_idx = { 220 | executor.submit(chain_criticize_and_refine, idea, result): idx 221 | for idx, (idea, result) in enumerate( 222 | zip(ideas_current, testing_results) 223 | ) 224 | } 225 | for future in as_completed(future_to_idx): 226 | idx = future_to_idx[future] 227 | idea_refined = future.result() 228 | # Actualizar porcentaje y limpiar feedback 229 | idea_refined["Likeness_percentage"] = testing_results[idx]["percentage"] 230 | idea_refined.pop("Feedback", None) 231 | refined[idx] = idea_refined 232 | 233 | ideas_current = refined 234 | print( 235 | f"\n--- Iteration {i}: Critiquing and Refining Ideas ({len(ideas_current)}) ---" 236 | ) 237 | 238 | return ideas_current 239 | 240 | 241 | if __name__ == "__main__": 242 | if len(sys.argv) < 2 or len(sys.argv) > 2: 243 | print("Usage: python killer_video_idea.py ") 244 | sys.exit(1) 245 | 246 | channel_name = sys.argv[1] 247 | channel_data = get_youtube_data(channel_name) 248 | if not channel_data or len(channel_data) < 1: 249 | print("Error: Unable to retrieve channel data.") 250 | sys.exit(1) 251 | 252 | channel_data = channel_data[:25] 253 | 254 | persona_tester = PersonaTester( 255 | model=MODEL, 256 | client=_client, 257 | comparation_path="videos_to_compare.json", 258 | ) 259 | 260 | _final_ideas = iterative_idea_generator( 261 | videos_string=channel_data, 262 | iterations=3, 263 | num_initial_ideas=25, 264 | persona=persona_tester, 265 | ) 266 | 267 | print("\n=== 10 FINAL VIDEO IDEAS (TOPIC -> ANGLE -> HOOK) ===") 268 | print(_final_ideas) 269 | 270 | with open("ignore_video_ideas.txt", "w", encoding="utf-8") as file: 271 | file.write(_final_ideas) 272 | -------------------------------------------------------------------------------- /agents/killer_video_title_gen.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script generates killer video titles for YouTube videos using OpenAI's API. 3 | """ 4 | 5 | import os 6 | import json 7 | 8 | from dotenv import load_dotenv 9 | from openai import OpenAI 10 | 11 | try: 12 | from .utils import try_to_load_json 13 | from .persona_testing import PersonaTester 14 | except ImportError: 15 | from utils import try_to_load_json 16 | from persona_testing import PersonaTester 17 | 18 | load_dotenv() 19 | MODEL = os.getenv("MODEL", "o3-mini") 20 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 21 | OPENAI_API_BASE = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1") 22 | _client = OpenAI( 23 | api_key=OPENAI_API_KEY, 24 | base_url=OPENAI_API_BASE, 25 | ) 26 | 27 | 28 | def gen_video_titles(videos_string: str, num_of_titles: int) -> str: 29 | """ 30 | Take the description (videos_string) of the existing YouTuber videos 31 | and generate a brief summary about their style. 32 | """ 33 | prompt = f""" 34 | You are a World wide known Youtuber, even bigger than MR Beast, and you are a master of creating killer video titles. 35 | Your objective is to generate {str(num_of_titles)} perfects videos titles for this video. 36 | Here are some guidelines for title generation 37 | 38 | Especially at higher levels of YouTube, they stress me out all the time. But I was reviewing some videos shared to this subreddit and I noticed that the vast majority of small creators just completely have the wrong ideas when it comes to titles. And this isn't a roast, it's hard to know where to start with titles, especially if you have no guidance. So here's some guidance. 39 | I originally shared this advice to someone individually but I thought it could help the rest of the community. 40 | Titles should be short. Generally 50 characters or less is a safe bet. Mostly this is because in some places titles longer than this will be cut off and the viewer won't see the full title. But also, short titles are easier to see and immediately understand. And if you can come up with a way to describe your video in 50 characters or less, that probably means it's a strong idea. (and the other way around. If you can't think of a short, simple title for your video, that potentially means your idea is not that clear/strong). 41 | Titles should be not only short, but also simple. They should use simple language that people of all ages and education levels can understand. Anytime your video is shown, it's competing with hundreds of other videos on the screen at any given time. You want a bold, easy to understand title that will catch someone's eyes. Overly long, complex titles don't stand out, they are easy to glaze over because we actually have to "think" about what they say, and no one wants to do that. Consider these two titles. 42 | A. One Hit Kills? Full Overwatch Review and Gameplay | Sam the Gamer 43 | B. Overwatch is a Terrible Game 44 | One of these requires significantly more effort to read and understand than the other. We want to be more like the second one. Not only is it easier to read, it much more clearly communicates what to expect from the video. 45 | Title should not be vague**.** This is a big one I've been seeing a lot. People understand that you have to generate curiosity in your titles so they make them super vague. But vagueness causes confusion, not curiosity. Consider these two title: 46 | A. Before You Start Cycling, Do This 47 | B. Everything You Need to Know as a New Cyclist 48 | Title A is okay, but it's a bit unclear what to expect from the video. Title B however is extremely clear in showing us what the video is about. 49 | An easy way to think of your title is "A simple statement that, alongside my thumbnail, shows why my video is worth watching". What is special about your video? Why should people watch it? What is contained in the video that people need to see? The answer to that should most likely be in the title (and thumbnail). Sometimes the idea itself is so interesting, you can just put the premise of the video as the title. Like if you have a video about climbing mount everest, you could probably just title the video "I Climbed Mount Everest" because that's interesting alone. Most top YouTubers have figured out how to do this every video. 50 | If you're doing a Overwatch gameplay, you probably wont get way with "Overwatch Gameplay" as the title because that's not that interesting. So then the question is, what IS interesting about the video? As implied before, this is a good way to know if you actually have a good video idea--if you can't figure out what's interesting you might not. So maybe Sam the Gamer became the top ranked player in Overwatch - so he made a video about it -- THAT'S interesting. So maybe a good title is "I Got Rank 1 in Overwatch" 51 | Im sure you've seen successful videos that break these rules and any other titles advice you've received, but that's how it goes. Titles are just one part of the picture. Sometimes the other parts are so strong it makes up for it. 52 | 53 | More info: 54 | YouTube isn’t just another social media platform like TikTok or Instagram. It is a search engine, like Google, specifically for video content. 55 | This means people actively search for content they’re interested in, and your YouTube title is the gateway to users discovering your videos. 56 | A YouTube title greatly influences search engine optimization (SEO) and video ranking. 57 | YouTube’s algorithm scans your titles for specific keywords and phrases and bumps your video higher in search results because of it. 58 | How To Write YouTube Video Titles In 7 Easy Steps 59 | Creating an effective YouTube Video Title isn’t always an easy task. There’s a lot of strategy and technique involved. 60 | Follow these 7 easy steps to create a YouTube video title that increases views. 61 | Use Simple And Clear Language 62 | Create a title using elementary language that is relatively conversational.. Simplify your title to a 5th-grade reading level, making it easier to read and understand. 63 | Simple YouTube video description from Finn Whitaker 64 | YouTube 65 | While we don’t necessarily recommend using one word to describe your video like this YouTube creator. Sometimes all you need is a short and concise title to get your point across. 66 | The only exception: know your audience. If you are creating a video for a niche community that uses more advanced language, then you can use a more specific video title. 67 | Optimize Title Length And Format 68 | YouTube limits titles to 100 characters; however, long titles get cut off at a certain point. 69 | Keep your YouTube video title between 60 and 70 characters for full visibility on both desktop and mobile. 70 | Alex Armitage grabbing audience's attention with his YouTube video title 71 | YouTube 72 | The words up front grab the viewer’s attention. People have a low attention span, so get your point across at the beginning of the title. 73 | In this example, the second title, “Grocery Shopping Tips – Daily Vlog #30,” is better because it puts the main keywords first, making the content clearer and more attractive to viewers. 74 | Use title modifiers such as “How-to,” “Top 10,” or “Ultimate Guide” to gain more viewership. These are small things that can go a long way. 75 | YouTube Video Title Ideas: using "Top 15," strategy to gain more viewership 76 | Youtube 77 | Be Honest And Direct 78 | Avoid clickbait titles. Don’t trick viewers into watching the video. 79 | If your content isn’t giving them what they want, then they will click off your video immediately. 80 | As a result, your average watch time will decrease, and the algorithm will show your video to fewer people. 81 | On the other hand, if your average watch time increases, so will your views. So, be honest about the content of your video in your title, even if you get fewer views right away. 82 | Get Inspiration From Competitors 83 | Search your topic on YouTube and scroll through the top videos. 84 | Look for videos with lots of views but not many subscribers. This means the video is doing well, and YouTube is promoting this video. 85 | YouTube Video Title Ideas: get inspiration from successful vidoes 86 | YouTube 87 | Get inspiration from these videos. Look at the wording and structure of their titles, and incorporate them into your video titles. 88 | Conduct Keyword Research 89 | Keyword research 90 | The process of identifying words and phrases that rank well in search engines such as Google and YouTube 91 | Keywords should be included in not only your video title, but also the description, thumbnail, and tags. 92 | “top travel destinations 2024” YouTube search, Ryan Shirley's video is one of the top results 93 | YouTube 94 | When searching for “top travel destinations 2024”, this creator’s video is one of the top results. This is partly because he uses keywords like “2024”, “travel”, and “travel guide”. 95 | However, finding the right keywords can be difficult. 96 | Use keyword research tools like Ahrefs. This software suggests relevant keywords in your title to help it rank higher in search results. 97 | Ahrefs - keyword research tool 98 | Ahrefs 99 | Try our new YouTube Channel Keyword Generator today! 100 | Add Hashtags To Your Video Titles 101 | Hashtags allow YouTube to understand your content and target a specific audience. 102 | If YouTube understands your content it will categorize it and show it to an audience that is interested in similar content. 103 | Hashtags will also allow viewers to find these videos on the hashtag page. 104 | YouTube Video Title Ideas: adding Hashtags To Your Video Titles 105 | YouTube 106 | How do I add hashtags to my YouTube videos? 107 | At the bottom of your video’s description, add the # symbol and start typing words or keywords related to your topic. YouTube will give hashtag suggestions and choose which ones work best. 108 | Utilize Title Analyzer Tools 109 | Once you create your title, how do you know if it’s effective for YouTube SEO? 110 | Use Headline Studio’s Analyzer Tool to not only give you feedback on SEO rankings but also generate alternative YouTube video titles for you. 111 | Headline Studio’s Analyzer Too 112 | What Is A YouTube Video Title Generator? 113 | What if we told you it was possible to create an effective YouTube title in the blink of an eye? 114 | Headline Studio’s YouTube title generator creates multiple titles at once so that you can choose the best title for your video. one for you. 115 | It analyzes your titles, gives suggestions, and scores its SEO ranking compared to the competition. 116 | Headline Studio’s Analyzer Too 117 | Recommended reading: 118 | 73 Easy Ways To Write A Headline That Will Reach Your Readers 119 | SEO Headlines: 5 Simple Ways to Rank on SERPs 120 | 50+ Headline Formulas and Templates To Craft A Perfect Headline 121 | 40 YouTube Video Title Ideas 122 | How-To And Tutorial Videos 123 | “How to [Achieve Desired Outcome] in [Specific Time Frame]” 124 | “Step-by-Step Guide: [Process or Skill]” 125 | “Beginner’s Guide to [Topic]: Everything You Need to Know” 126 | “Quick Tips: [Topic] for [Specific Audience]” 127 | “Master [Skill or Tool] with These Simple Steps” 128 | Listicle And Compilation Videos 129 | “Top 10 [Product/Tools/Tips] You Must Try” 130 | “Best [Product/Tools] of [Year]: Our Favorites Reviewed” 131 | “Essential [Topic] Ti¡ps You Should Know” 132 | “Most Amazing [Category] Transformations Caught on Camera” 133 | “Our Favorite [Category] Hacks You’ve Never Seen Before” 134 | Educational And Informative Videos 135 | “The Science Behind [Phenomenon or Trend]” 136 | “Understanding [Complex Topic] in Simple Terms” 137 | “Facts You Didn’t Know About [Topic]” 138 | “What Every [Audience] Should Know About [Topic]” 139 | “Inside Look: [Behind-the-Scenes of Topic or Event]” 140 | Entertainment And Lifestyle Videos 141 | “Ultimate Guide to [Activity or Hobby]” 142 | “Day in the Life of [Personality or Role]” 143 | “Exploring [Location or Trend]: Our Adventure” 144 | “Fun Challenges: [Challenge Name] vs. [Challenge Name]” 145 | “Our Favorite [Category] Reactions and Reviews” 146 | News And Trending Topics 147 | “Breaking News: [Headline or Event] Explained” 148 | “Latest Trends in [Industry or Niche]: What You Need to Know” 149 | “Hot Topic Debate: [Topic] vs. [Opposing View]” 150 | “Exclusive Interview with [Influential Figure or Celebrity]” 151 | “Update: [Current Event] and Its Impact on [Industry or Audience]” 152 | Reviews And Unboxings 153 | “Unboxing the Latest [Product]: First Impressions and Review” 154 | “In-Depth Review of [Gadget/Tech Item]: Is It Worth the Hype?” 155 | “Testing the Top [Brand] Products: Which One Reigns Supreme?” 156 | “Honest Review of [Service/Subscription]: What You Need to Know” 157 | “Unboxing and Review: [Product] vs. [Competitor]” 158 | Vlogs And Personal Experiences 159 | “A Day in My Life: Behind the Scenes of [Event/Activity]” 160 | “Weekly Vlog: How I Balance Work and Life” 161 | “Travel Vlog: Exploring [Destination] with Me” 162 | “My Morning Routine: How I Start My Day Energized” 163 | “Weekend Vlog: [Activity/Adventure] and What I Learned” 164 | YouTube Shorts And Quick Content 165 | “Quick Tips: [Topic] in 60 Seconds” 166 | “Instant Recipe: How to Make [Dish] in 1 Minute” 167 | “Fast Facts: [Interesting Fact or Trivia]” 168 | “One-Minute Challenge: [Fun Activity or Skill]” 169 | “Quick Hacks: Improve Your [Skill/Task] Easily” 170 | A Good Video Title Comes With Practice 171 | There is no guarantee an effective YouTube video title will immediately increase your viewership. However, it will improve your chances of being noticed and attract more viewers over time. 172 | The great thing about creating YouTube video titles is that you can change them at any time. Don’t be afraid to experiment with multiple titles to see what works best. 173 | 174 | 175 | IMPORTANT: Use always the language of the transcription 176 | 177 | YouTuber videos transcription: 178 | 179 | {videos_string} 180 | 181 | 182 | Return the data on an json list, do this only, do not return anything more 183 | {{ 184 | "My killer video title 1", 185 | "My killer video title 2", 186 | ... 187 | }} 188 | """ 189 | response = _client.chat.completions.create( 190 | model=MODEL, 191 | messages=[{"role": "user", "content": prompt}], 192 | ) 193 | return response.choices[0].message.content.strip() 194 | 195 | 196 | if __name__ == "__main__": 197 | with open("video_transcription.txt", "r", encoding="utf-8") as file: 198 | video_transcription = file.read() 199 | titles = gen_video_titles(video_transcription, 25) 200 | titles = try_to_load_json(_client, MODEL, titles) 201 | print("\n=== 10 INITIAL VIDEO TITLES ===") 202 | print(titles) 203 | 204 | persona_tester = PersonaTester( 205 | model=MODEL, 206 | client=_client, 207 | comparation_path="videos_to_compare.json", 208 | ) 209 | 210 | titles_results = persona_tester.test_multiples_videos( 211 | titles, 212 | checks=50, 213 | use_extra_titles=True, 214 | ) 215 | 216 | with open("ignore_final_video_titles.json", "w", encoding="utf-8") as file: 217 | file.write(json.dumps(titles_results)) 218 | -------------------------------------------------------------------------------- /agents/persona_testing.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains the TestingWithPersona class, which is used to test video titles 3 | """ 4 | 5 | import random 6 | from concurrent.futures import ThreadPoolExecutor 7 | 8 | try: 9 | from utils import try_to_load_json 10 | except ImportError: 11 | from .utils import try_to_load_json 12 | 13 | 14 | class PersonaTester: 15 | """ 16 | Class to test video titles with different personas. 17 | """ 18 | 19 | def __init__(self, client, model, comparation_path): 20 | """ 21 | Initialize the TestingWithPersona class. 22 | """ 23 | self._model = model 24 | self._client = client 25 | with open(comparation_path, "r", encoding="utf-8") as file: 26 | comparations = try_to_load_json(self._client, self._model, file.read()) 27 | self.comparation = comparations 28 | 29 | def test_video(self, titles_to_compare, title_to_test, persona): 30 | """ 31 | Test the title against a list of titles to compare. 32 | """ 33 | videos = titles_to_compare.copy() 34 | videos.append(title_to_test) 35 | random.shuffle(videos) 36 | title_to_test_index = videos.index(title_to_test) + 1 37 | videos_str = "\n".join( 38 | [f"{idx + 1}. {video}" for idx, video in enumerate(videos)] 39 | ) 40 | prompt = f""" 41 | You are {persona} 42 | You are on your youtube feed and some videos are showing 43 | 44 | {videos_str} 45 | 46 | Which video do you click? Respond only one number inside "", nothing else 47 | eg. "" 48 | """ 49 | response = self._client.chat.completions.create( 50 | model=self._model, 51 | messages=[{"role": "user", "content": prompt}], 52 | ) 53 | selected = response.choices[0].message.content.strip() 54 | return f'"{title_to_test_index}"' in selected 55 | 56 | def test_multiples_videos(self, titles, checks=100, use_extra_titles=False): 57 | """ 58 | Test multiple videos with different personas. 59 | """ 60 | user_personas = self.comparation["user_personas"] 61 | titles_to_compare = self.comparation["titles"] 62 | 63 | if use_extra_titles: 64 | titles += self.comparation.get("extra_titles", []) 65 | 66 | personas_to_test = random.choices(user_personas, k=checks) 67 | 68 | results_by_title = {title: {"selected": 0, "shown": 0} for title in titles} 69 | tasks = [] 70 | results = [] 71 | 72 | with ThreadPoolExecutor() as executor: 73 | # Enviar todas las tareas concurrentemente. 74 | for title in titles: 75 | for persona in personas_to_test: 76 | future = executor.submit( 77 | self.test_video, titles_to_compare, title, persona 78 | ) 79 | tasks.append((title, future)) 80 | 81 | # Recopilar y agrupar los resultados por título. 82 | for title, future in tasks: 83 | try: 84 | result = future.result() 85 | except Exception as e: 86 | print(f"Error processing title {title}: {e}") 87 | result = False 88 | results_by_title[title]["shown"] += 1 89 | if result: 90 | results_by_title[title]["selected"] += 1 91 | 92 | for title, counts in results_by_title.items(): 93 | percentage = ( 94 | (counts["selected"] / counts["shown"] * 100) 95 | if counts["shown"] > 0 96 | else 0 97 | ) 98 | 99 | print( 100 | "Result: ", 101 | { 102 | "title": title, 103 | "time_selected": counts["selected"], 104 | "times_shown": counts["shown"], 105 | "percentage": percentage, 106 | }, 107 | ) 108 | results.append( 109 | { 110 | "title": title, 111 | "time_selected": counts["selected"], 112 | "times_shown": counts["shown"], 113 | "percentage": percentage, 114 | } 115 | ) 116 | return results 117 | -------------------------------------------------------------------------------- /agents/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv==1.0.1 2 | openai==1.68.2 3 | requests==2.32.3 -------------------------------------------------------------------------------- /agents/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for the agents 3 | """ 4 | 5 | import json 6 | import re 7 | from openai import OpenAI 8 | import requests 9 | 10 | 11 | def try_to_load_json(_client: OpenAI, model: str, json_string: str) -> dict | list: 12 | """ 13 | Try to load a JSON string. If it fails, it will try to fix the JSON string 14 | """ 15 | 16 | json_prompt = """ 17 | This is a JSON string, but it is not well formatted. delete everything that is not JSON, fix any possible formatting issue and return only the JSON string. without text, without explanation, ``` or anything else. 18 | """ 19 | 20 | try: 21 | return json.loads(json_string) 22 | except json.JSONDecodeError: 23 | # Si no se puede cargar como JSON, intenta corregirlo 24 | response = _client.chat.completions.create( 25 | model=model, 26 | messages=[{"role": "user", "content": json_prompt + json_string}], 27 | ) 28 | try: 29 | return json.loads(response.choices[0].message.content.strip()) 30 | except json.JSONDecodeError: 31 | return {} 32 | 33 | 34 | def get_youtube_data(youtube_username): 35 | """ 36 | Get YouTube data from a user's channel. 37 | """ 38 | regex = r'""([\sa-zA-Z0-9áéíóúÁÉÍÓÚ]+)""' 39 | replacement = r'"\1"' 40 | 41 | headers = { 42 | "User-Agent": ( 43 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " 44 | "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36" 45 | ) 46 | } 47 | 48 | url = f"https://www.youtube.com/{youtube_username}/videos" 49 | page = requests.get(url, timeout=5, headers=headers) 50 | html_str = page.content.decode("utf-8") 51 | 52 | json_string = html_str.split("var ytInitialData = ")[-1].split(";")[0] 53 | cleaned_json_string = json_string.replace("\n", " ").replace("\r", " ") 54 | cleaned_json_string = re.sub(regex, replacement, cleaned_json_string) 55 | json_data = json.loads(cleaned_json_string, strict=False) 56 | 57 | video_list = [] 58 | tabs = json_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"] 59 | for tab in tabs: 60 | if tab.get("tabRenderer", {}).get("title", "").lower() not in [ 61 | "videos", 62 | "vídeos", 63 | "video", 64 | ]: 65 | continue 66 | for video in tab["tabRenderer"]["content"]["richGridRenderer"]["contents"]: 67 | video_data = {} 68 | if "richItemRenderer" not in video: 69 | continue 70 | video_data["title"] = video["richItemRenderer"]["content"]["videoRenderer"][ 71 | "title" 72 | ]["runs"][0]["text"] 73 | video_data["id"] = video["richItemRenderer"]["content"]["videoRenderer"][ 74 | "videoId" 75 | ] 76 | video_data["url"] = f"https://www.youtube.com/watch?v={video_data['id']}" 77 | video_data["thumbnail"] = ( 78 | f"https://img.youtube.com/vi/{video_data['id']}/0.jpg" 79 | ) 80 | video_data["published"] = video["richItemRenderer"]["content"][ 81 | "videoRenderer" 82 | ]["publishedTimeText"]["simpleText"] 83 | video_data["viewCountText"] = video["richItemRenderer"]["content"][ 84 | "videoRenderer" 85 | ]["viewCountText"]["simpleText"] 86 | video_list.append(video_data) 87 | break 88 | return video_list 89 | -------------------------------------------------------------------------------- /agents/videos_to_compare.json: -------------------------------------------------------------------------------- 1 | { 2 | "extra_titles": [ 3 | "🚀 HICE la MEJOR IA para VIDEOJUEGOS [+ 3 DEMOS] 🎮" 4 | ], 5 | "user_personas": [ 6 | "Una persona que busca aprender de tecnologia", 7 | "Un CEO de una empresa que quiere entender mejor la tecnologia", 8 | "Un CTO intentando aprender una tecnologia especifica para su equipo", 9 | "Un Desarrollador buscando una tecnologia especifica", 10 | "Un experto en el tema buscando novedades", 11 | "Un estudiante buscando aprender algo nuevo", 12 | "Un chico aburrido buscando algo interesante", 13 | "Alguien que quiere aprender a programar", 14 | "Un abuelo que no entiende nada de tecnologia", 15 | "Un chico gamer con el sueño en su subconsciente de ser programador de videojuegos" 16 | ], 17 | "titles": [ 18 | "Crea y Modifica imágenes con ChatGPT 🔥 (Studio Ghibli y más)", 19 | "¿Que es MCP? : Conecta tu IA a TODO con Este Protocolo Gratuito (Guía Paso a Paso)", 20 | "¿El fin de Data Science? 🤯: Data Science Agent con Gemini + Colab ¡GRATIS!", 21 | "¡Imágenes Hiperrealistas! Tutorial Completo de Flux.1 con ComfyUI ¡GRATIS!", 22 | "Las IAs de VÍDEO tienen un GRAN PROBLEMA...", 23 | "HOY SÍ vas a entender QUÉ es el BLOCKCHAIN - (Bitcoin, Cryptos, NFTs y", 24 | "¡Aumentando FOTOGRAMAS con Inteligencia Artificial! (SuperFluidez)", 25 | "TUTORIAL 👉 ¡Entrena a la IA con tu CARA! - 100% GRATIS Y SIN GPUs (Stable Diffusion", 26 | "¿Cuánto Tarda Esta IA En Aprender A Manejar?", 27 | "Tu primera red neuronal en Python y Tensorflow", 28 | "El Software Que Mató A 346 Personas", 29 | "CREAR nuestra primera RED NEURONAL en C# !!! [Perceptron simple]", 30 | "🤖 Hice un ASISTENTE VIRTUAL POR VOZ!!! ► [chatbot con voz en 15 minutos]", 31 | "3 proyectos de ALGORITMOS GENÉTICOS en C# UNITY ► NO VAS A CREER EL ULTIMO" 32 | ] 33 | } -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "subtitles_clip_config": { 3 | "font": "Hey-Comic", 4 | "fontsize": 60, 5 | "color": "white", 6 | "method": "label", 7 | "align": "south", 8 | "bg_color": "black", 9 | "stroke_color": null, 10 | "stroke_width": null 11 | }, 12 | "subtitles_position": { 13 | "text_position_y_offset": -500, 14 | "text_position_x_offset": 0 15 | }, 16 | "titles_clip_config": { 17 | "font": "Hey-Comic", 18 | "fontsize": 90, 19 | "color": "black", 20 | "method": "label", 21 | "align": "south", 22 | "bg_color": "transparent", 23 | "stroke_color": "black", 24 | "stroke_width": 1.5 25 | }, 26 | "titles_position": { 27 | "text_position_y_offset": 500, 28 | "text_position_x_offset": 0 29 | }, 30 | "titles": [ 31 | "", 32 | "Video completo en la descripcion.", 33 | "Suscribete para mas." 34 | ] 35 | } -------------------------------------------------------------------------------- /config/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "avatars": { 3 | "calm": "config/avatar_calm.mp4", 4 | "angry": "config/avatar_angry.mp4", 5 | "sad": "config/avatar_sad.mp4", 6 | "wow": "config/avatar_wow.mp4", 7 | "smug": "config/avatar_smug.mp4" 8 | }, 9 | "shake_factor": 5 10 | } -------------------------------------------------------------------------------- /config_loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module to load configuration from a JSON file. 3 | """ 4 | 5 | import json 6 | from pathlib import Path 7 | 8 | 9 | def load_config(config_path: str = "config.json") -> dict: 10 | """ 11 | Load configuration from a JSON file. 12 | """ 13 | config_file = Path(config_path) 14 | if not config_file.exists(): 15 | raise FileNotFoundError(f"Could not find the configuration file: {config_path}") 16 | with config_file.open("r", encoding="utf-8") as f: 17 | config = json.load(f) 18 | return config 19 | 20 | 21 | # Carga la configuración al importar el módulo 22 | config_data = load_config() 23 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Main script for video editing and processing. 4 | """ 5 | 6 | import argparse 7 | import logging 8 | import json 9 | 10 | from moviepy.editor import VideoFileClip 11 | from operations import ( 12 | add_subtitles, 13 | add_titles, 14 | audio_generator, 15 | denoise_video, 16 | generate_transcript, 17 | generate_video_base, 18 | save_joined_video, 19 | save_separated_video, 20 | save_video, 21 | set_horizontal, 22 | set_vertical, 23 | trim_by_silence, 24 | video_translation, 25 | generate_avatar_video, 26 | generate_transcript_divided, 27 | ) 28 | from config_loader import config_data 29 | from utils import get_audio, get_video_data, str2bool 30 | 31 | logging.basicConfig(level=logging.INFO) 32 | logger = logging.getLogger(__name__) 33 | 34 | # Dictionary for functions in the "video_edit" command 35 | functions_dict = { 36 | "trim_by_silence": trim_by_silence, 37 | "denoise": denoise_video, 38 | "transcript": generate_transcript, 39 | "transcript_divided": generate_transcript_divided, 40 | "subtitles": add_subtitles, 41 | "save_separated_video": save_separated_video, 42 | "save_join": save_joined_video, 43 | "save_video": save_video, 44 | "set_vertical": set_vertical, 45 | "set_horizontal": set_horizontal, 46 | } 47 | 48 | 49 | def video_edit_command(args): 50 | """Executes a sequence of operations for video editing.""" 51 | for input_file in args.input_file: 52 | kwargs = { 53 | "video_path": input_file, 54 | "clip_interval": args.clip_interval, 55 | "sound_threshold": args.sound_threshold, 56 | "discard_silence": args.discard_silence, 57 | "config_data": config_data, 58 | } 59 | kwargs = get_video_data(**kwargs) 60 | for step in args.pipeline: 61 | if step not in functions_dict: 62 | raise ValueError( 63 | f"Function {step} not found. \ 64 | Available options: {', '.join(functions_dict.keys())}" 65 | ) 66 | logger.info("Applying %s to %s", step, input_file) 67 | kwargs = functions_dict[step](**kwargs) 68 | 69 | 70 | def separate_audio_command(args): 71 | """Separates audio from video files.""" 72 | for file in args.files: 73 | clip = VideoFileClip(file) 74 | audio_path = get_audio(clip, file[:-4]) 75 | logger.info("Audio saved to: %s", audio_path) 76 | 77 | 78 | def voice_command(args): 79 | """Performs voice operations: video translation or audio generation.""" 80 | if args.operation == "video_translation": 81 | logger.info("Starting video translation...") 82 | video_translation(args.video_path, args.translate, args.language) 83 | elif args.operation == "audio_generator": 84 | logger.info("Starting audio generation...") 85 | audio_generator(args.video_path, args.voice) 86 | else: 87 | logger.error("Invalid operation. Use --help for more information.") 88 | 89 | 90 | def generator_command(args): 91 | """Generates a base video or adds titles to a short video.""" 92 | tools = { 93 | "base": generate_video_base, 94 | "add_titles": add_titles, 95 | } 96 | for file in args.files: 97 | if args.tool not in tools: 98 | logger.error( 99 | "Tool %s not found, available options: %s", 100 | args.tool, 101 | ", ".join(tools.keys()), 102 | ) 103 | continue 104 | tools[args.tool](file) 105 | 106 | 107 | def video_gen_avatar_command(args): 108 | """Generates a video with avatars based on emotions.""" 109 | 110 | config = None 111 | with open(args.config, "r", encoding="utf-8") as f: 112 | config = json.load(f) 113 | print(config) 114 | 115 | for file in args.files: 116 | generate_avatar_video(file, config) 117 | 118 | 119 | def main(): 120 | """ 121 | Main function to parse arguments 122 | """ 123 | parser = argparse.ArgumentParser( 124 | description="Combined program for video editing and processing" 125 | ) 126 | subparsers = parser.add_subparsers(dest="command", required=True) 127 | 128 | # Subcommand for video editing 129 | parser_edit = subparsers.add_parser( 130 | "video_edit", help="Multiple tools for video editing" 131 | ) 132 | parser_edit.add_argument( 133 | "input_file", type=str, nargs="+", help="Video file(s) to process" 134 | ) 135 | parser_edit.add_argument( 136 | "--pipeline", 137 | type=str, 138 | nargs="+", 139 | help=f"Functions to apply to the video: {', '.join(functions_dict.keys())}", 140 | ) 141 | parser_edit.add_argument( 142 | "-c", "--clip_interval", type=float, default=2, help="Clipping precision" 143 | ) 144 | parser_edit.add_argument( 145 | "-s", 146 | "--sound_threshold", 147 | type=float, 148 | default=0.01, 149 | help="Maximum volume threshold to consider silence", 150 | ) 151 | parser_edit.add_argument( 152 | "-d", 153 | "--discard_silence", 154 | const=True, 155 | default=False, 156 | type=str2bool, 157 | nargs="?", 158 | help="Discard silent clips", 159 | ) 160 | parser_edit.set_defaults(func=video_edit_command) 161 | 162 | # Subcommand for separate_audio 163 | parser_separate = subparsers.add_parser( 164 | "separate_audio", help="Separate audio from video" 165 | ) 166 | parser_separate.add_argument("files", type=str, nargs="+", help="Video file(s)") 167 | parser_separate.set_defaults(func=separate_audio_command) 168 | 169 | # Subcommand for voice operations 170 | parser_voice = subparsers.add_parser( 171 | "voice", help="Voice operations: translation or audio generation" 172 | ) 173 | parser_voice.add_argument( 174 | "operation", 175 | type=str, 176 | help="Operation to perform: video_translation or audio_generator", 177 | ) 178 | parser_voice.add_argument( 179 | "video_path", type=str, help="Path to the video file to process" 180 | ) 181 | parser_voice.add_argument( 182 | "-t", 183 | "--translate", 184 | type=str, 185 | default="Helsinki-NLP/opus-mt-es-en", 186 | help="Translation model to use", 187 | ) 188 | parser_voice.add_argument( 189 | "--voice", 190 | type=str, 191 | default="en-us/af_heart", 192 | help="Voice to use for translation", 193 | ) 194 | parser_voice.add_argument( 195 | "--language", 196 | type=str, 197 | default="en", 198 | help="Language for translation (default: en)", 199 | ) 200 | parser_voice.set_defaults(func=voice_command) 201 | 202 | # Subcommand for generator (short video) 203 | parser_generator = subparsers.add_parser("generator", help="Short video generator") 204 | parser_generator.add_argument( 205 | "files", type=str, nargs="+", help="File(s) to process" 206 | ) 207 | parser_generator.add_argument( 208 | "tool", 209 | type=str, 210 | help="Tool to use: base, add_titles", 211 | ) 212 | parser_generator.set_defaults(func=generator_command) 213 | 214 | # Subcommand for avatar video generation 215 | parser_avatar = subparsers.add_parser( 216 | "avatar_video_generation", help="Avatar video generation" 217 | ) 218 | parser_avatar.add_argument("files", type=str, nargs="+", help="File(s) to process") 219 | parser_avatar.add_argument( 220 | "config", type=str, help="Path to the configuration file" 221 | ) 222 | parser_avatar.set_defaults(func=video_gen_avatar_command) 223 | 224 | args = parser.parse_args() 225 | args.func(args) 226 | 227 | 228 | if __name__ == "__main__": 229 | main() 230 | -------------------------------------------------------------------------------- /operations/__init__.py: -------------------------------------------------------------------------------- 1 | from .denoise import * 2 | from .save import * 3 | from .set_orientation import * 4 | from .subtitles import * 5 | from .transcript import * 6 | from .trim import * 7 | from .translation import video_translation, audio_generator 8 | from .shorts import generate_video_base, add_titles 9 | from .avatar_video_generation import generate_avatar_video 10 | -------------------------------------------------------------------------------- /operations/avatar_video_generation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generating an avatar video based on audio input. 3 | """ 4 | 5 | import logging 6 | import os 7 | import subprocess 8 | from concurrent.futures import ThreadPoolExecutor, as_completed 9 | 10 | from dotenv import load_dotenv 11 | import moviepy.editor as mpe 12 | import numpy as np 13 | from openai import OpenAI 14 | 15 | from utils import apply_shake, get_subclip_volume_segment 16 | 17 | SYSTEM_PROMPT = ( 18 | "You are an emotion classifier. " 19 | "Given a short phrase in any language, reply with exactly one of the following labels: " 20 | "{emotions}" 21 | "Respond with just the label, no extra text." 22 | ) 23 | 24 | load_dotenv() 25 | OPENAI_MODEL = os.getenv("OPENAI_MODEL", "GPT-4.1") 26 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 27 | OPENAI_API_BASE = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1") 28 | _client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_API_BASE) 29 | 30 | WHISPER_MODEL_SIZE = os.getenv("WHISPER_MODEL_SIZE", "turbo") 31 | 32 | logging.basicConfig(level=logging.INFO) 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | def _classify_emotion(text: str, config: dict) -> str: 37 | """ 38 | Use ChatGPT to classify the emotion of a given text. 39 | """ 40 | emotion_map: dict = config.get("avatars", {}) 41 | try: 42 | prompt = SYSTEM_PROMPT.replace("{emotions}", ", ".join(emotion_map.keys())) 43 | response = _client.chat.completions.create( 44 | model=OPENAI_MODEL, 45 | messages=[ 46 | {"role": "system", "content": prompt}, 47 | {"role": "user", "content": text}, 48 | ], 49 | ) 50 | label = response.choices[0].message.content.strip().lower() 51 | for key in emotion_map.keys(): 52 | if key.lower() in label: 53 | return key 54 | logger.warning("Received unexpected label '%s', defaulting to neutral", label) 55 | return list(emotion_map.keys())[0] 56 | except Exception as e: 57 | logger.error("ChatGPT API error: %s", e) 58 | return list(emotion_map.keys())[0] 59 | 60 | 61 | def _process_transcript_segment(seg, pydub_audio, config): 62 | """ 63 | Classify emotion and calculate the volume for the audio segment. 64 | """ 65 | start, end, text = seg.start, seg.end, seg.text.strip() 66 | label = _classify_emotion(text, config) 67 | duration = end - start 68 | volume = get_subclip_volume_segment(pydub_audio, start, duration) 69 | logger.info( 70 | "Segment %f-%fs, Text: '%s', Emotion: %s, Volume: %f", 71 | start, 72 | end, 73 | text, 74 | label, 75 | volume, 76 | ) 77 | return {"start": start, "end": end, "emotion": label, "volume": volume} 78 | 79 | 80 | def generate_segment(audio_path, audio_clip, config: dict, max_workers=None): 81 | """ 82 | Generate segments based on audio input. 83 | Each segment is classified with a dynamic set of emotions via ChatGPT. 84 | """ 85 | try: 86 | from faster_whisper import WhisperModel 87 | from pydub import AudioSegment 88 | except ImportError as e: 89 | logger.error("Error importing required libraries: %s", e) 90 | return [], 0 91 | 92 | emotion_map: dict = config.get("avatars", {}) 93 | 94 | logger.info("Load audio (pydub)...") 95 | pydub_audio = AudioSegment.from_file(audio_path) 96 | logger.info("Transcribing audio...") 97 | whisper_model = WhisperModel(WHISPER_MODEL_SIZE) 98 | result, _ = whisper_model.transcribe(audio_path, multilingual=True) 99 | 100 | with ThreadPoolExecutor(max_workers=max_workers) as executor: 101 | futures = [ 102 | executor.submit(_process_transcript_segment, seg, pydub_audio, config) 103 | for seg in result 104 | ] 105 | segments = [f.result() for f in as_completed(futures)] 106 | 107 | volumes = [s["volume"] for s in segments] 108 | global_avg_volume = np.mean(volumes) if volumes else 0 109 | logger.info("General average volume: %f", global_avg_volume) 110 | 111 | total_duration = sum(s["end"] - s["start"] for s in segments) 112 | if total_duration < audio_clip.duration: 113 | segments.append( 114 | { 115 | "start": total_duration, 116 | "end": audio_clip.duration, 117 | "emotion": list(emotion_map.keys())[0], 118 | "volume": global_avg_volume, 119 | } 120 | ) 121 | 122 | del whisper_model 123 | return segments, global_avg_volume 124 | 125 | 126 | def _render_avatar_segment(i, seg, avatar_path, shake_factor, global_avg_volume): 127 | """ 128 | Load the avatar, apply the loop + shake and write the output file. 129 | """ 130 | start, end = seg["start"], seg["end"] 131 | volume = seg["volume"] 132 | intensity = ( 133 | (volume / global_avg_volume) * shake_factor if global_avg_volume > 0 else 0 134 | ) 135 | 136 | avatar_video = mpe.VideoFileClip(avatar_path).without_audio() 137 | avatar_segment = avatar_video.loop(duration=end - start) 138 | avatar_segment = apply_shake(avatar_segment, intensity) 139 | 140 | out_name = f"temp_{i:03d}_{os.path.splitext(os.path.basename(avatar_path))[0]}.mp4" 141 | logger.info("Rendering segment %d (%s) → %s", i, seg["emotion"], out_name) 142 | avatar_segment.write_videofile( 143 | out_name, codec="libx264", audio=False, verbose=False, logger=None 144 | ) 145 | return out_name 146 | 147 | 148 | def concatenate_segments_ffmpeg(segment_paths, final_output, audio_path): 149 | """ 150 | Use FFmpeg to concatenate segments and add the audio track. 151 | First concatenate the segments without audio and then incorporate the original audio. 152 | """ 153 | list_filename = "segments.txt" 154 | with open(list_filename, "w", encoding="utf-8") as f: 155 | for path in segment_paths: 156 | f.write(f"file '{os.path.abspath(path)}'\n") 157 | 158 | temp_video = "temp_video.mp4" 159 | subprocess.run( 160 | [ 161 | "ffmpeg", 162 | "-f", 163 | "concat", 164 | "-safe", 165 | "0", 166 | "-i", 167 | list_filename, 168 | "-c", 169 | "copy", 170 | temp_video, 171 | ], 172 | check=True, 173 | ) 174 | 175 | subprocess.run( 176 | [ 177 | "ffmpeg", 178 | "-i", 179 | temp_video, 180 | "-i", 181 | audio_path, 182 | "-c:v", 183 | "copy", 184 | "-c:a", 185 | "aac", 186 | "-map", 187 | "0:v:0", 188 | "-map", 189 | "1:a:0", 190 | final_output, 191 | ], 192 | check=True, 193 | ) 194 | 195 | # Cleanup temporary files 196 | os.remove(list_filename) 197 | os.remove(temp_video) 198 | for path in segment_paths: 199 | os.remove(path) 200 | logger.info("Final video exported to %s", final_output) 201 | 202 | 203 | def generate_avatar_video(audio_path: str, config: dict, max_workers=None): 204 | """ 205 | Process audio to generate a video with avatars based on emotions. 206 | """ 207 | avatars = config.get("avatars", {}) 208 | shake_factor = config.get("shake_factor", 0.1) 209 | 210 | try: 211 | clip = mpe.VideoFileClip(audio_path) 212 | audio_clip = clip.audio 213 | except Exception: 214 | audio_clip = mpe.AudioFileClip(audio_path) 215 | clip = None 216 | 217 | segments, global_avg_volume = generate_segment( 218 | audio_path, audio_clip, config, max_workers 219 | ) 220 | 221 | tasks = [] 222 | for i, seg in enumerate(segments): 223 | avatar_path = avatars.get(seg["emotion"]) 224 | if not avatar_path: 225 | logger.warning("No avatar for emotion '%s'. Skipping.", seg["emotion"]) 226 | continue 227 | tasks.append((i, seg, avatar_path)) 228 | 229 | segment_paths = [] 230 | with ThreadPoolExecutor(max_workers=max_workers) as executor: 231 | futures = { 232 | executor.submit( 233 | _render_avatar_segment, 234 | i, 235 | seg, 236 | avatar_path, 237 | shake_factor, 238 | global_avg_volume, 239 | ): (i, avatar_path) 240 | for i, seg, avatar_path in tasks 241 | } 242 | for future in as_completed(futures): 243 | segment_paths.append(future.result()) 244 | 245 | if not segment_paths: 246 | logger.error("No clips generated. Exiting.") 247 | return 248 | 249 | final_output = "output_video.mp4" 250 | concatenate_segments_ffmpeg(segment_paths, final_output, audio_path) 251 | logger.info("Final video saved on: %s", final_output) 252 | -------------------------------------------------------------------------------- /operations/denoise.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module to denoise audio in a video file using the DNS64 model. 3 | """ 4 | 5 | from moviepy import editor 6 | from utils import get_audio 7 | 8 | 9 | def denoise_video(**kwargs): 10 | """ 11 | Denoise the audio of a video file using the DNS64 model. 12 | """ 13 | try: 14 | import torch 15 | import torchaudio 16 | from denoiser import pretrained 17 | from denoiser.dsp import convert_audio 18 | except ImportError as e: 19 | raise ImportError( 20 | "Please install the required libraries: torch, torchaudio, denoiser" 21 | ) from e 22 | 23 | input_video_file_clip, filename = ( 24 | kwargs["input_video_file_clip"], 25 | kwargs["filename"], 26 | ) 27 | audio_file_name = get_audio(input_video_file_clip, filename) 28 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 29 | model = pretrained.dns64().to(device) 30 | wav, source = torchaudio.load(audio_file_name) 31 | wav = convert_audio(wav.to(device), source, model.sample_rate, model.chin) 32 | with torch.no_grad(): 33 | denoised = model(wav[None])[0] 34 | denoised_file_name = f"{filename}_denoised.wav" 35 | torchaudio.save(denoised_file_name, denoised.cpu(), model.sample_rate) 36 | input_video_file_clip.audio = editor.AudioFileClip(denoised_file_name) 37 | kwargs["input_video_file_clip"] = input_video_file_clip 38 | return kwargs 39 | -------------------------------------------------------------------------------- /operations/save.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module to save video clips using moviepy. 3 | """ 4 | 5 | from moviepy import editor 6 | 7 | 8 | def save_video(**kwargs): 9 | """ 10 | Save a video clip to a file. 11 | """ 12 | filename = kwargs["filename"] 13 | input_video_file_clip = kwargs["input_video_file_clip"] 14 | clip_name = f"{filename}_EDITED.mp4" 15 | input_video_file_clip.write_videofile( 16 | clip_name, 17 | audio_codec="aac", 18 | threads=8, 19 | fps=24, 20 | ) 21 | kwargs["clips_name"] = clip_name 22 | return kwargs 23 | 24 | 25 | def save_joined_video(**kwargs): 26 | """ 27 | Save a joined video clip to a file. 28 | """ 29 | if "clips" not in kwargs: 30 | return save_video(**kwargs) 31 | filename = kwargs["filename"] 32 | clips = kwargs["clips"] 33 | clip_name = f"{filename}_EDITED.mp4" 34 | if isinstance(clips, list): 35 | concat_clip = editor.concatenate_videoclips(clips) 36 | concat_clip.write_videofile( 37 | clip_name, 38 | audio_codec="aac", 39 | threads=8, 40 | fps=24, 41 | ) 42 | kwargs["clips_name"] = clip_name 43 | return kwargs 44 | clips.write_videofile(clip_name, audio_codec="aac") 45 | kwargs["clips_name"] = clip_name 46 | return kwargs 47 | 48 | 49 | def save_separated_video(**kwargs): 50 | """ 51 | Save separated video clips to files. 52 | """ 53 | if "clips" not in kwargs: 54 | return save_video(**kwargs) 55 | filename = kwargs["filename"] 56 | clips = kwargs["clips"] 57 | clips_format = f"{filename}_EDITED_{{i}}.mp4" 58 | for i, clip in enumerate(clips): 59 | pad_i = str(i).zfill(5) 60 | clip.write_videofile(clips_format.format(i=pad_i), audio_codec="aac") 61 | kwargs["clips_name"] = clips_format.format(i="{i}") 62 | return kwargs 63 | -------------------------------------------------------------------------------- /operations/set_orientation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module to set the orientation of a video clip. 3 | """ 4 | 5 | 6 | def set_vertical(**kwargs): 7 | """ 8 | Set the orientation of a video clip to vertical. 9 | """ 10 | input_video_file_clip = kwargs["input_video_file_clip"] 11 | width, height = input_video_file_clip.size 12 | if width > height: 13 | new_size = (height, width) 14 | input_video_file_clip = input_video_file_clip.resize(new_size) 15 | kwargs["shape"] = input_video_file_clip.size 16 | kwargs["input_video_file_clip"] = input_video_file_clip 17 | return kwargs 18 | 19 | 20 | def set_horizontal(**kwargs): 21 | """ 22 | Set the orientation of a video clip to horizontal. 23 | """ 24 | input_video_file_clip = kwargs["input_video_file_clip"] 25 | width, height = input_video_file_clip.size 26 | if width < height: 27 | new_size = (height, width) 28 | input_video_file_clip = input_video_file_clip.resize(new_size) 29 | kwargs["shape"] = input_video_file_clip.size 30 | kwargs["input_video_file_clip"] = input_video_file_clip 31 | return kwargs 32 | -------------------------------------------------------------------------------- /operations/shorts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module to generate a video with a blurred background and add titles. 3 | """ 4 | 5 | import os 6 | import logging 7 | from pathlib import Path 8 | 9 | from moviepy import editor 10 | from moviepy.editor import ColorClip, CompositeVideoClip, VideoFileClip 11 | 12 | from config_loader import config_data 13 | 14 | logging.basicConfig(level=logging.INFO) 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def blur_video(video_path: str) -> str: 19 | """ 20 | Blurs the video and saves it with a new name. 21 | """ 22 | new_video_path = f"blurred_{Path(video_path).name}" 23 | with VideoFileClip(video_path) as video: 24 | video_wo_audio = video.without_audio() 25 | video_wo_audio.write_videofile( 26 | new_video_path, 27 | ffmpeg_params=["-vf", "boxblur=10:1"], 28 | preset="ultrafast", 29 | threads=8, 30 | fps=24, 31 | codec="libx264", 32 | ) 33 | return new_video_path 34 | 35 | 36 | def generate_video_base(video_path_data: str, video_size=(1080, 1920)): 37 | """ 38 | Generates a base video with a blurred background and the original video on top. 39 | """ 40 | video_path_output = f"output_{Path(video_path_data).name}" 41 | blurred_video_name = blur_video(video_path_data) 42 | blurred_video = VideoFileClip(blurred_video_name).resize(height=video_size[1]) 43 | video = VideoFileClip(video_path_data).resize(width=video_size[0]) 44 | video_base = ColorClip(video_size, color=(0, 0, 0)).set_duration(video.duration) 45 | composite = CompositeVideoClip( 46 | [video_base, blurred_video.set_position("center"), video.set_position("center")] 47 | ).set_duration(video.duration) 48 | composite.write_videofile( 49 | video_path_output, preset="ultrafast", threads=8, fps=24, codec="libx264" 50 | ) 51 | os.remove(blurred_video_name) 52 | logger.info("Base video generated on %s", video_path_output) 53 | 54 | 55 | def add_titles(video_path: str): 56 | """ 57 | Adds titles to the video. 58 | """ 59 | video = VideoFileClip(video_path) 60 | title_clips = [] 61 | duration = 3 # Duración de cada título 62 | for title in config_data.get("titles", []): 63 | if not title or not title.strip(): 64 | continue 65 | title_clip = editor.TextClip( 66 | title, **config_data["titles_clip_config"] 67 | ).set_duration(duration) 68 | pos = ("center", config_data["titles_position"]["text_position_y_offset"]) 69 | title_clip = title_clip.set_position(pos) 70 | title_clips.append(title_clip) 71 | if not title_clips: 72 | logger.info("No titles to add.") 73 | return 74 | 75 | final_clip = editor.concatenate_videoclips(title_clips + [video]) 76 | output_path = f"output_titles_{Path(video_path).name}" 77 | final_clip.write_videofile( 78 | output_path, preset="ultrafast", threads=8, fps=24, codec="libx264" 79 | ) 80 | logger.info("Video with titles saved at: %s", output_path) 81 | -------------------------------------------------------------------------------- /operations/subtitles.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module to add subtitles to a video using moviepy. 3 | """ 4 | 5 | import os 6 | from moviepy.editor import TextClip, CompositeVideoClip 7 | from moviepy.video.tools.subtitles import SubtitlesClip 8 | 9 | 10 | def add_subtitles(**kwargs): 11 | """ 12 | Add subtitles to a video clip. 13 | """ 14 | 15 | def generator(txt): 16 | return TextClip(txt, **config_data["subtitles_clip_config"]) 17 | 18 | filename = kwargs["filename"] 19 | input_video_file_clip = kwargs["input_video_file_clip"] 20 | subtitles_filename = kwargs.get( 21 | "transcript_file_name", f"{filename}_transcript.srt" 22 | ) 23 | config_data = kwargs.get("config_data", {}) 24 | if not os.path.exists(subtitles_filename): 25 | subtitles_filename = f"{filename}_transcript.srt" 26 | 27 | subtitles = SubtitlesClip(subtitles_filename, generator) 28 | video_list = [ 29 | input_video_file_clip, 30 | subtitles.set_pos( 31 | ( 32 | "center", 33 | input_video_file_clip.h 34 | + config_data["subtitles_position"]["text_position_y_offset"], 35 | ) 36 | ), 37 | ] 38 | video_with_subs = CompositeVideoClip(video_list) 39 | kwargs["input_video_file_clip"] = video_with_subs 40 | return kwargs 41 | -------------------------------------------------------------------------------- /operations/transcript.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains functions to generate transcripts from video files. 3 | """ 4 | 5 | from faster_whisper import WhisperModel 6 | from utils import get_audio, float_to_srt_time 7 | 8 | 9 | MODEL_SIZE = "turbo" 10 | 11 | 12 | def generate_transcript(**kwargs): 13 | """ 14 | Generates a transcript from the input video file and saves it as an SRT file. 15 | """ 16 | input_video_file_clip, filename = ( 17 | kwargs["input_video_file_clip"], 18 | kwargs["filename"], 19 | ) 20 | audio_file_name = get_audio(input_video_file_clip, filename) 21 | model = WhisperModel(MODEL_SIZE) 22 | segments, _ = model.transcribe(audio_file_name, multilingual=True) 23 | transcript = "" 24 | for segment in segments: 25 | start_time = float_to_srt_time(segment.start) 26 | end_time = float_to_srt_time(segment.end) 27 | text_data = segment.text.strip() 28 | transcript += f"{segment.id + 1}\n{start_time} --> {end_time}\n{text_data}\n\n" 29 | transcript_file_name = f"{filename}_transcript.srt" 30 | with open(transcript_file_name, "w", encoding="utf-8") as file: 31 | file.write(transcript) 32 | kwargs["transcript_file_name"] = transcript_file_name 33 | return kwargs 34 | 35 | 36 | def generate_transcript_divided(**kwargs): 37 | """ 38 | Generates a transcript from the input video file and saves it as an SRT file. 39 | The transcript is divided into segments based on word timestamps. 40 | """ 41 | input_video_file_clip, filename = ( 42 | kwargs["input_video_file_clip"], 43 | kwargs["filename"], 44 | ) 45 | audio_file_name = get_audio(input_video_file_clip, filename) 46 | model = WhisperModel(MODEL_SIZE) 47 | segments, _ = model.transcribe( 48 | audio_file_name, multilingual=True, word_timestamps=True 49 | ) 50 | transcript = "" 51 | segment_id = 1 52 | 53 | for segment in segments: 54 | for word in segment.words: 55 | start_time = float_to_srt_time(word.start) 56 | end_time = float_to_srt_time(word.end) 57 | text_data = word.word.strip() 58 | segment_id += 1 59 | transcript += f"{segment_id}\n{start_time} --> {end_time}\n{text_data}\n\n" 60 | 61 | transcript_file_name = f"{filename}_transcript.srt" 62 | with open(transcript_file_name, "w", encoding="utf-8") as file: 63 | file.write(transcript) 64 | kwargs["transcript_file_name"] = transcript_file_name 65 | return kwargs 66 | -------------------------------------------------------------------------------- /operations/translation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for video translation and audio generation. 3 | """ 4 | 5 | import os 6 | import json 7 | import logging 8 | from pathlib import Path 9 | from faster_whisper import WhisperModel 10 | from kokoro import KPipeline 11 | from transformers import pipeline 12 | import soundfile as sf 13 | from moviepy.editor import AudioFileClip, CompositeAudioClip, VideoFileClip 14 | from pydub import AudioSegment 15 | 16 | from utils import get_audio 17 | 18 | logging.basicConfig(level=logging.INFO) 19 | logger = logging.getLogger(__name__) 20 | 21 | MODEL_SIZE = "turbo" 22 | MAX_PAUSE = 1.0 23 | 24 | 25 | def process_transcript(segments): 26 | """ 27 | Process the transcript results to group words into phrases based on pauses. 28 | """ 29 | 30 | audio_info = [] 31 | segment_id = 1 32 | max_pause = 1.0 # Umbral en segundos para determinar el fin de una frase 33 | 34 | # Variables para acumular la frase a lo largo de los segmentos 35 | current_phrase_words = [] 36 | phrase_start = None 37 | phrase_end = None 38 | last_word_end = None 39 | 40 | for segment in segments: 41 | # Se verifica que el segmento tenga palabras con timestamps 42 | if not hasattr(segment, "words") or not segment.words: 43 | continue 44 | for word in segment.words: 45 | # Si es la primera palabra global, inicializamos la acumulación 46 | if phrase_start is None: 47 | phrase_start = word.start 48 | phrase_end = word.end 49 | current_phrase_words.append(word.word.strip()) 50 | last_word_end = word.end 51 | else: 52 | # Comprobamos la pausa entre el inicio de la palabra actual y el final de la última palabra 53 | if (word.start - last_word_end) > max_pause: 54 | # Si la pausa es mayor al umbral, se finaliza la frase actual 55 | phrase_text = " ".join(current_phrase_words) 56 | logger.info( 57 | "Phrase: %s | Start: %s | End: %s", 58 | phrase_text, 59 | phrase_start, 60 | phrase_end, 61 | ) 62 | audio_info.append( 63 | { 64 | "id": segment_id, 65 | "original_text": phrase_text, 66 | "text": "", 67 | "audio_file": "", 68 | "start": phrase_start, 69 | "end": phrase_end, 70 | } 71 | ) 72 | segment_id += 1 73 | # Se reinicia la acumulación para la nueva frase 74 | current_phrase_words = [word.word.strip()] 75 | phrase_start = word.start 76 | phrase_end = word.end 77 | else: 78 | # Si la pausa es menor o igual, se continúa acumulando la palabra 79 | current_phrase_words.append(word.word.strip()) 80 | phrase_end = word.end 81 | last_word_end = word.end 82 | 83 | # Al finalizar, se agrega la última frase acumulada si existe 84 | if current_phrase_words: 85 | phrase_text = " ".join(current_phrase_words) 86 | audio_info.append( 87 | { 88 | "id": segment_id, 89 | "original_text": phrase_text, 90 | "text": "", 91 | "audio_file": "", 92 | "start": phrase_start, 93 | "end": phrase_end, 94 | } 95 | ) 96 | segment_id += 1 97 | 98 | return audio_info 99 | 100 | 101 | def video_translation( 102 | video_path: str, 103 | translate_data: str = "Helsinki-NLP/opus-mt-es-en", 104 | language: str = "en", 105 | ): 106 | """ 107 | Transcribe and translate the audio from a video file. 108 | """ 109 | translator = None 110 | if translate_data: 111 | translator = pipeline("translation", translate_data) 112 | 113 | video_stem = Path(video_path).stem 114 | input_video_file_clip = VideoFileClip(video_path) 115 | audio_path = get_audio(input_video_file_clip, video_stem) 116 | whisper_model = WhisperModel(MODEL_SIZE, num_workers=4, compute_type="int8") 117 | transcribe_params = { 118 | "audio": audio_path, 119 | "language": language, 120 | "multilingual": True, 121 | "temperature": 0.2, 122 | "word_timestamps": True, 123 | } 124 | results, _ = whisper_model.transcribe(**transcribe_params) 125 | audio_info = process_transcript(results) 126 | 127 | if translator: 128 | for segment in audio_info: 129 | segment["text"] = translator(segment["original_text"])[0][ 130 | "translation_text" 131 | ] 132 | logger.info( 133 | "Translating: %s | %s", 134 | segment["original_text"].strip(), 135 | segment["text"].strip(), 136 | ) 137 | 138 | else: 139 | for segment in audio_info: 140 | segment["text"] = segment["original_text"] 141 | 142 | json_file = f"{video_stem}_audio_info.json" 143 | 144 | with open(json_file, "w", encoding="utf-8") as f: 145 | json.dump(audio_info, f, ensure_ascii=False, indent=4) 146 | logger.info("Audio info saved in: %s. Check it before generating audio.", json_file) 147 | 148 | 149 | def change_audio_speed(audio_file: str, speed: float) -> str: 150 | """ 151 | Change the speed of an audio file and save it as a new file. 152 | """ 153 | base = Path(audio_file).stem 154 | output_file = f"{base}_edited.wav" 155 | speed = min(max(speed, 1), 1.4) 156 | logger.info("Changing speed to: %s of file: %s", speed, audio_file) 157 | if speed == 1: 158 | return audio_file 159 | 160 | sound = AudioSegment.from_file(audio_file) 161 | new_sound = sound.speedup(playback_speed=speed) 162 | new_sound.export(output_file, format="wav") 163 | 164 | return output_file 165 | 166 | 167 | def audio_generator(video_path: str, voice_info: str = "en-us/af_heart"): 168 | """ 169 | Generate audio for a video using the specified voice. 170 | """ 171 | lang_code = voice_info.split("/")[0] 172 | voice = voice_info.split("/")[1] 173 | vpipeline = KPipeline(lang_code=lang_code) 174 | 175 | video_stem = Path(video_path).stem 176 | json_file = f"{video_stem}_audio_info.json" 177 | with open(json_file, "r", encoding="utf-8") as openfile: 178 | audio_clips = json.load(openfile) 179 | for segment in audio_clips: 180 | if segment["audio_file"]: 181 | continue 182 | if not segment["text"]: 183 | logger.info("Skipping empty text segment.") 184 | continue 185 | audio_file = f"{video_stem}_generated_audio_{segment['id']}.wav" 186 | logger.info("Generating audio for: %s in %s", segment["text"], audio_file) 187 | 188 | speed = len(segment["text"]) / len(segment["original_text"]) 189 | 190 | generator = vpipeline( 191 | segment["text"].replace("\n", ""), 192 | voice=voice, 193 | speed=speed, 194 | split_pattern=r"\n+", 195 | ) 196 | for _, (__, ___, audio) in enumerate(generator): 197 | sf.write(audio_file, audio, 24000) 198 | break 199 | segment["audio_file"] = audio_file 200 | with open(json_file, "w", encoding="utf-8") as outfile: 201 | json.dump(audio_clips, outfile, indent=2) 202 | input_video_file_clip_no_audio = VideoFileClip(video_path).without_audio() 203 | clips = [] 204 | for item in audio_clips: 205 | audio = AudioFileClip(item["audio_file"]) 206 | duration = item["end"] - item["start"] 207 | target_speed = audio.duration / duration 208 | edited_audio_file = change_audio_speed(item["audio_file"], target_speed) 209 | audio = AudioFileClip(edited_audio_file).set_start(item["start"]) 210 | clips.append(audio) 211 | composite_audio = CompositeAudioClip(clips).subclip( 212 | 0, input_video_file_clip_no_audio.duration 213 | ) 214 | 215 | final_video = input_video_file_clip_no_audio.set_audio(composite_audio) 216 | # Aseguramos que el clip mantenga el tamaño original 217 | final_video = final_video.resize(input_video_file_clip_no_audio.size) 218 | final_video_name = f"{video_stem}_final_video.mp4" 219 | final_video.write_videofile( 220 | final_video_name, 221 | codec="libx264", # Especificamos el codec de video 222 | audio_codec="aac", 223 | ffmpeg_params=["-vf", "scale=iw:ih"], 224 | ) 225 | logger.info("Final video saved in: %s", final_video_name) 226 | 227 | # Eliminamos los archivos de audio generados 228 | for item in audio_clips: 229 | try: 230 | os.remove(item["audio_file"]) 231 | os.remove(item["audio_file"].replace(".wav", "_edited.wav")) 232 | except Exception as e: 233 | logger.error("Error removing file: %s", e) 234 | -------------------------------------------------------------------------------- /operations/trim.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module to trim a video by silence. 3 | """ 4 | 5 | import logging 6 | import numpy as np 7 | 8 | from utils import get_subclip_volume 9 | 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def trim_by_silence(**kwargs): 16 | """ 17 | Function to trim a video by silence. 18 | """ 19 | input_video_file_clip = kwargs["input_video_file_clip"] 20 | clip_interval = kwargs["clip_interval"] 21 | sound_threshold = kwargs["sound_threshold"] 22 | discard_silence = kwargs["discard_silence"] 23 | logger.info("Chunking video...") 24 | volumes = [] 25 | for i in np.arange(0, input_video_file_clip.duration, clip_interval): 26 | if input_video_file_clip.duration <= i + clip_interval: 27 | continue 28 | logger.info("Processing chunk %s/%s", i, input_video_file_clip.duration) 29 | 30 | volumes.append(get_subclip_volume(input_video_file_clip, i, clip_interval)) 31 | logger.info("Processing silences...") 32 | volumes = np.array(volumes) 33 | volumes_binary = volumes > sound_threshold 34 | change_times = [0] 35 | for i in range(1, len(volumes_binary)): 36 | if volumes_binary[i] != volumes_binary[i - 1]: 37 | change_times.append(i * clip_interval) 38 | change_times.append(input_video_file_clip.duration) 39 | logger.info("Subclipping...") 40 | first_piece_silence = 1 if volumes_binary[0] else 0 41 | clips = [] 42 | for i in range(1, len(change_times)): 43 | if discard_silence and i % 2 != first_piece_silence: 44 | continue 45 | new_clip = input_video_file_clip.subclip(change_times[i - 1], change_times[i]) 46 | clips.append(new_clip) 47 | kwargs["clips"] = clips 48 | return kwargs 49 | -------------------------------------------------------------------------------- /recipes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | This script provides a command line interface for various video processing tasks. 4 | """ 5 | import subprocess 6 | import sys 7 | import os 8 | 9 | 10 | def transcribe_video(video): 11 | """ 12 | Transcribes the video using the 'video_transcription' pipeline. 13 | Command: 14 | python main.py video_edit {video} --pipeline transcript 15 | """ 16 | 17 | command = f"python main.py video_edit {video} --pipeline transcript" 18 | subprocess.run(command, shell=True, check=True) 19 | 20 | 21 | def separate_video(video): 22 | """ 23 | Separates the video using the 'trim_by_silence' pipeline. 24 | Command: 25 | python main.py video_edit {video} --pipeline \ 26 | trim_by_silence save_separated_video -c 0.25 -s 0.01 -d True 27 | """ 28 | command = f"python main.py video_edit {video} --pipeline \ 29 | trim_by_silence save_separated_video -c 0.25 -s 0.01 -d True" 30 | subprocess.run(command, shell=True, check=True) 31 | 32 | 33 | def generate_avatar(video): 34 | """ 35 | Generates the video avatar. 36 | Command: 37 | python main.py avatar_video_generation {video} config/config.json 38 | """ 39 | command = f"python main.py avatar_video_generation {video} config/config.json" 40 | subprocess.run(command, shell=True, check=True) 41 | 42 | 43 | def generate_short_base(video): 44 | """ 45 | Generates a short base from the video by chaining several commands: 46 | 1. Divides the transcript: 47 | python main.py video_edit {video} --pipeline transcript_divided 48 | 2. Renames the subtitle file: 49 | mv {base_name}_transcript.srt output_{base_name}_transcript.srt 50 | 3. Generates the base: 51 | python main.py generator {video} base 52 | 4. Joins the subtitles: 53 | python main.py video_edit {video} --pipeline subtitles save_join 54 | """ 55 | # Get the base name of the video (without extension) 56 | base_name = os.path.splitext(video)[0] 57 | 58 | command = ( 59 | f"python main.py video_edit {video} --pipeline transcript_divided && " 60 | f"mv {base_name}_transcript.srt output_{base_name}_transcript.srt && " 61 | f"python main.py generator {video} base && " 62 | f"python main.py video_edit output_{video} --pipeline subtitles save_join" 63 | ) 64 | subprocess.run(command, shell=True, check=True) 65 | 66 | 67 | def main(): 68 | """ 69 | Main function to handle command line arguments and execute the appropriate function. 70 | """ 71 | if len(sys.argv) < 3: 72 | print("Usage: python recipes.py