├── .env.template
├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── agents
    ├── .env.template
    ├── .gitignore
    ├── __init__.py
    ├── killer_video_idea.py
    ├── killer_video_title_gen.py
    ├── persona_testing.py
    ├── requirements.txt
    ├── utils.py
    └── videos_to_compare.json
├── config.json
├── config
    └── config.json
├── config_loader.py
├── main.py
├── operations
    ├── __init__.py
    ├── avatar_video_generation.py
    ├── denoise.py
    ├── save.py
    ├── set_orientation.py
    ├── shorts.py
    ├── subtitles.py
    ├── transcript.py
    ├── translation.py
    └── trim.py
├── recipes.py
├── requirements.txt
└── utils
    ├── __init__.py
    └── utils.py


/.env.template:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=
2 | OPENAI_MODEL=o4-mini
3 | OPENAI_API_BASE=https://api.openai.com/v1
4 | WHISPER_MODEL_SIZE=turbo


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.wav
  2 | *.mp4
  3 | *.srt
  4 | *.mp3
  5 | *.jpe?g
  6 | *.png
  7 | .DS_Store
  8 | 
  9 | # Byte-compiled / optimized / DLL files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | cover/
 61 | 
 62 | # Translations
 63 | *.mo
 64 | *.pot
 65 | 
 66 | # Django stuff:
 67 | *.log
 68 | local_settings.py
 69 | db.sqlite3
 70 | db.sqlite3-journal
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | .pybuilder/
 84 | target/
 85 | 
 86 | # Jupyter Notebook
 87 | .ipynb_checkpoints
 88 | 
 89 | # IPython
 90 | profile_default/
 91 | ipython_config.py
 92 | 
 93 | # pyenv
 94 | #   For a library or package, you might want to ignore these files since the code is
 95 | #   intended to run in multiple environments; otherwise, check them in:
 96 | # .python-version
 97 | 
 98 | # pipenv
 99 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
101 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
102 | #   install all needed dependencies.
103 | #Pipfile.lock
104 | 
105 | # poetry
106 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
107 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
108 | #   commonly ignored for libraries.
109 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
110 | #poetry.lock
111 | 
112 | # pdm
113 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
114 | #pdm.lock
115 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
116 | #   in version control.
117 | #   https://pdm.fming.dev/#use-with-ide
118 | .pdm.toml
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Hector Pulido
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Youtubers toolkit
  2 | 
  3 | This project is a comprehensive command-line toolkit designed for video editing and processing tasks. It provides a modular framework to perform operations such as trimming by silence, adding subtitles, denoising audio, generating transcripts and translations, and even creating short videos with dynamic titles.
  4 | 
  5 | ## Features
  6 | 
  7 | - **Configuration Loader:** Loads settings from a JSON file (`config.json`) to configure various operations.
  8 | - **Video Editing Pipeline:** Supports a sequence of operations (e.g., trim by silence, add subtitles, set orientation) to edit videos.
  9 | - **Audio Processing:** Extracts, separates, and denoises audio from video files.
 10 | - **Voice Operations:** Utilizes AI models (e.g., Whisper, Bark) for video translation and audio generation.
 11 | - **Short Video Generation:** Creates a video base with a blurred background and overlays content or titles.
 12 | - **Utility Functions:** Includes tools to concatenate source files and handle subtitle splitting.
 13 | 
 14 | ## Installation
 15 | 
 16 | ### Prerequisites
 17 | 
 18 | - **Python 3.10+**  
 19 | - Required libraries (installable via `pip`):
 20 |   - `moviepy`
 21 |   - `numpy`
 22 |   - `scipy`
 23 |   - `whisper`
 24 |   - `torch`
 25 |   - `torchaudio`
 26 |   - `pydub`
 27 |   - `librosa`
 28 |   - `bark` (for audio generation)
 29 |   - _...and any additional dependencies as noted in individual modules._
 30 | 
 31 | ### Setup
 32 | 
 33 | 1. Clone the repository:
 34 |    ```bash
 35 |    git clone <repository_url>
 36 |    cd <repository_directory>
 37 |    ```
 38 | 
 39 | 2. Create and activate a virtual environment (recommended):
 40 |    ```bash
 41 |    python -m venv venv
 42 |    source venv/bin/activate   # On Windows: venv\Scripts\activate
 43 |    ```
 44 | 
 45 | 3. Install the required packages:
 46 |    ```bash
 47 |    pip install -r requirements.txt
 48 |    ```
 49 |    *(Ensure your `requirements.txt` lists all necessary dependencies.)*
 50 | 
 51 | 4. Configure the project by editing the `config.json` file to adjust settings such as subtitle and title clip configurations.
 52 | 
 53 | ## Usage
 54 | 
 55 | The main entry point is `main.py`, which provides several subcommands for different tasks. Run the following command to see available options:
 56 | 
 57 | ```bash
 58 | python main.py --help
 59 | ```
 60 | 
 61 | ## Subcommands Overview
 62 | 
 63 | The CLI tool uses subcommands to select the desired functionality. You invoke the tool using one of the following subcommands:
 64 | 
 65 | 1. **video_edit** – For processing video files with a customizable pipeline of functions.
 66 | 2. **separate_audio** – To extract audio from video files.
 67 | 3. **split_str** – To split SRT subtitle files based on a specified number of words per subtitle.
 68 | 4. **voice** – To perform voice operations such as video translation or audio generation.
 69 | 5. **generator** – To generate a base video or add titles for short video production.
 70 | 
 71 | Each subcommand has its own required and optional arguments. Use the `--help` flag with any subcommand to see detailed usage information.
 72 | 
 73 | ---
 74 | 
 75 | ## 1. Video Editing (`video_edit`)
 76 | 
 77 | **Description:**  
 78 | This subcommand processes video files by applying a sequence (pipeline) of editing functions.
 79 | 
 80 | ### Usage
 81 | ```bash
 82 | python main.py video_edit <input_file1> [<input_file2> ...] --pipeline <function1> [<function2> ...] [options]
 83 | ```
 84 | 
 85 | ### Arguments
 86 | 
 87 | - **input_file** (required):  
 88 |   One or more video file paths to process.
 89 | 
 90 | - **--pipeline** (required):  
 91 |   List of functions to apply to each video file.  
 92 |   **Available functions:**
 93 |   - `trim_by_silence`
 94 |   - `denoise`
 95 |   - `transcript`
 96 |   - `subtitles`
 97 |   - `save_separated_video`
 98 |   - `save_join`
 99 |   - `save_video`
100 |   - `set_vertical`
101 |   - `set_horizontal`
102 | 
103 | ### Options
104 | 
105 | - **-c, --clip_interval**:  
106 |   *Type:* float, *Default:* 2  
107 |   *Description:* Clipping precision.
108 | 
109 | - **-s, --sound_threshold**:  
110 |   *Type:* float, *Default:* 0.01  
111 |   *Description:* Maximum volume threshold to consider silence.
112 | 
113 | - **-d, --discard_silence**:  
114 |   *Type:* boolean flag (uses a string-to-boolean converter), *Default:* False  
115 |   *Description:* Discard silent clips.
116 | 
117 | ### Example
118 | ```bash
119 | python main.py video_edit video1.mp4 video2.mp4 --pipeline trim_by_silence subtitles -c 3 -s 0.02 -d True
120 | ```
121 | *This applies the `trim_by_silence` and `subtitles` functions to `video1.mp4` and `video2.mp4` with a clip interval of 3 seconds and a sound threshold of 0.02, discarding silent clips.*
122 | 
123 | ---
124 | 
125 | ## 2. Separate Audio (`separate_audio`)
126 | 
127 | **Description:**  
128 | Extracts audio from the given video files.
129 | 
130 | ### Usage
131 | ```bash
132 | python main.py separate_audio <video_file1> [<video_file2> ...]
133 | ```
134 | 
135 | ### Arguments
136 | 
137 | - **files** (required):  
138 |   One or more video files from which to extract audio.
139 | 
140 | ### Example
141 | ```bash
142 | python main.py separate_audio video1.mp4 video2.mp4
143 | ```
144 | *This command will extract the audio from `video1.mp4` and `video2.mp4` and save them accordingly.*
145 | 
146 | ---
147 | 
148 | ## 3. Split SRT (`split_str`)
149 | 
150 | **Description:**  
151 | Splits SRT subtitle files into smaller segments based on a specified number of words per subtitle.
152 | 
153 | ### Usage
154 | ```bash
155 | python main.py split_str <srt_file1> [<srt_file2> ...] <words_per_subtitle>
156 | ```
157 | 
158 | ### Arguments
159 | 
160 | - **files** (required):  
161 |   One or more SRT files to split.
162 | 
163 | - **words_per_subtitle** (required):  
164 |   *Type:* integer  
165 |   *Description:* The number of words per subtitle segment.
166 | 
167 | ### Example
168 | ```bash
169 | python main.py split_str subtitles.srt 5
170 | ```
171 | *This will split the subtitles in `subtitles.srt` so that each subtitle contains approximately 5 words.*
172 | 
173 | ---
174 | 
175 | ## 4. Voice Operations (`voice`)
176 | 
177 | **Description:**  
178 | Performs voice operations such as video translation or audio generation.
179 | 
180 | ### Usage
181 | ```bash
182 | python main.py voice <operation> <video_path> [options]
183 | ```
184 | 
185 | ### Arguments
186 | 
187 | - **operation** (required):  
188 |   Operation to perform. Choose between:
189 |   - `video_translation`
190 |   - `audio_generator`
191 | 
192 | - **video_path** (required):  
193 |   The path to the video file to process.
194 | 
195 | ### Options
196 | 
197 | - **-t, --translate**:  
198 |   *Type:* Model to use for translation, *Default:* `"Helsinki-NLP/opus-mt-es-en"`
199 |   *Description:* Translate the video to English or transcribe in the same language.
200 | 
201 | - **--voice**:  
202 |   *Type:* string, *Default:* `"en-us/af_heart"`  
203 |   *Description:* Voice model to use for translation.
204 | 
205 | ### Example
206 | ```bash
207 | python main.py voice video_translation video1.mp4 -t Helsinki-NLP/opus-mt-es-en --voice en-us/af_heart
208 | ```
209 | *This translates `video1.mp4` using the specified voice model, with translation turned off (if you only want transcription).*
210 | 
211 | ---
212 | 
213 | ## 5. Short Video Generator (`generator`)
214 | 
215 | **Description:**  
216 | Generates a base video or adds titles to a short video.
217 | 
218 | ### Usage
219 | ```bash
220 | python main.py generator <file1> [<file2> ...] <tool>
221 | ```
222 | 
223 | ### Arguments
224 | 
225 | - **files** (required):  
226 |   One or more files to process.
227 | 
228 | - **tool** (required):  
229 |   The tool to use. Available options:
230 |   - `base` – to generate a base video.
231 |   - `add_titles` – to add titles to the video.
232 | 
233 | ### Example
234 | ```bash
235 | python main.py generator video1.mp4 base
236 | ```
237 | *This command uses the `base` tool on `video1.mp4` to generate a base video.*
238 | 
239 | ---
240 | 
241 | ## General Help
242 | 
243 | To display the help information for the CLI tool or a specific subcommand, use the `--help` flag. For example:
244 | ```bash
245 | python main.py --help
246 | python main.py video_edit --help
247 | ```
248 | 
249 | This will display all available options and arguments for that command.
250 | 
251 | 
252 | ## Project Structure
253 | 
254 | - **config_loader.py:** Loads configuration from `config.json` and makes it available throughout the project.
255 | - **main.py:** The central entry point that defines and handles multiple subcommands for video processing.
256 | - **automatic_short_generator.py:** A script to generate short videos using predefined tools.
257 | - **get_data.py:** A utility to traverse directories and concatenate files.
258 | - **utils/**
259 |   - **utils.py:** Contains helper functions (e.g., converting strings to booleans, audio extraction, video metadata extraction).
260 | - **operations/**
261 |   - **save.py:** Functions to save edited or joined video clips.
262 |   - **set_orientation.py:** Adjusts video orientation (vertical/horizontal).
263 |   - **subtitles.py:** Adds subtitles to videos.
264 |   - **shorts.py:** Generates base videos with effects (e.g., blurred background) and adds title clips.
265 |   - **transcript.py:** Generates transcripts using the Whisper model.
266 |   - **trim.py:** Implements silence detection and video trimming.
267 |   - **translation.py:** Handles video translation and audio generation.
268 |   - **denoise.py:** Applies denoising filters using deep learning models.
269 | 
270 | ## Configuration
271 | 
272 | The toolkit uses a JSON configuration file (`config.json`) to define parameters such as:
273 | - Subtitle and title clip settings (e.g., font, size, position).
274 | - Other customizable options for processing operations.
275 | 
276 | Adjust these settings according to your needs before running any commands.
277 | 
278 | ## Contributing
279 | 
280 | Contributions are welcome! If you have suggestions or improvements, feel free to open an issue or submit a pull request.
281 | 
282 | ## License
283 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
284 | 
285 | 
286 | <div align="center">
287 | <h3 align="center">Let's connect 😋</h3>
288 | </div>
289 | <p align="center">
290 | <a href="https://www.linkedin.com/in/hector-pulido-17547369/" target="blank">
291 | <img align="center" width="30px" alt="Hector's LinkedIn" src="https://www.vectorlogo.zone/logos/linkedin/linkedin-icon.svg"/></a> &nbsp; &nbsp;
292 | <a href="https://twitter.com/Hector_Pulido_" target="blank">
293 | <img align="center" width="30px" alt="Hector's Twitter" src="https://www.vectorlogo.zone/logos/twitter/twitter-official.svg"/></a> &nbsp; &nbsp;
294 | <a href="https://www.twitch.tv/hector_pulido_" target="blank">
295 | <img align="center" width="30px" alt="Hector's Twitch" src="https://www.vectorlogo.zone/logos/twitch/twitch-icon.svg"/></a> &nbsp; &nbsp;
296 | <a href="https://www.youtube.com/channel/UCS_iMeH0P0nsIDPvBaJckOw" target="blank">
297 | <img align="center" width="30px" alt="Hector's Youtube" src="https://www.vectorlogo.zone/logos/youtube/youtube-icon.svg"/></a> &nbsp; &nbsp;
298 | <a href="https://pequesoft.net/" target="blank">
299 | <img align="center" width="30px" alt="Pequesoft website" src="https://github.com/HectorPulido/HectorPulido/blob/master/img/pequesoft-favicon.png?raw=true"/></a> &nbsp; &nbsp;
300 | 
301 | 
302 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HectorPulido/Youtubers-toolkit/a99be24edc2bef6d02039cd0b87f93bc61efe680/__init__.py


--------------------------------------------------------------------------------
/agents/.env.template:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=
2 | OPENAI_MODEL=o4-mini
3 | OPENAI_API_BASE=https://api.openai.com/v1


--------------------------------------------------------------------------------
/agents/.gitignore:
--------------------------------------------------------------------------------
1 | *.env
2 | ignore_*
3 | *.ipynb
4 | video_transcription.txt


--------------------------------------------------------------------------------
/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HectorPulido/Youtubers-toolkit/a99be24edc2bef6d02039cd0b87f93bc61efe680/agents/__init__.py


--------------------------------------------------------------------------------
/agents/killer_video_idea.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script generates unique and engaging YouTube video ideas for a specific channel
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | from concurrent.futures import ThreadPoolExecutor, as_completed
  8 | 
  9 | from dotenv import load_dotenv
 10 | from openai import OpenAI
 11 | 
 12 | try:
 13 |     from .utils import get_youtube_data, try_to_load_json
 14 |     from .persona_testing import PersonaTester
 15 | except ImportError:
 16 |     from utils import get_youtube_data, try_to_load_json
 17 |     from persona_testing import PersonaTester
 18 | 
 19 | load_dotenv()
 20 | 
 21 | MODEL = os.getenv("MODEL", "o3-mini")
 22 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 23 | OPENAI_API_BASE = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1")
 24 | 
 25 | _client = OpenAI(
 26 |     api_key=OPENAI_API_KEY,
 27 |     base_url=OPENAI_API_BASE,
 28 | )
 29 | 
 30 | WHATS_AN_IDEA = """
 31 | Concepts examples 
 32 |     ❌ THOSE ARE EXAMPLES, You should NOT use them, apply the concept to the user style:
 33 | 
 34 |     A video idea is NOT a topic IS THE HOOK. 
 35 | 
 36 |     Topic examples:
 37 |         > How to cook eggs
 38 |         > How to bake cookies
 39 |         > Best kitchen appliances in 2025
 40 |         > How to cook like a michelin star chef
 41 | 
 42 |     Angles of "How to cook like a michelin star chef":
 43 |         > How does a michelin star chef cook 200 meals per night?
 44 |         > The process, start to finish, of a michelin star dish
 45 | 
 46 |     Hook of "The process, start to finish, of a michelin star dish":
 47 |         > Breaking down the process of a michelin star dish from menu to ingredients to cook to table
 48 | 
 49 |     A good idea should:
 50 |     1. Create a sense of urgency and a need to watch the video NOW.
 51 |     2. Spark curiosity and make the viewer want to learn more.
 52 |     3. Open a loop of information.
 53 | """
 54 | 
 55 | 
 56 | def chain_summarize_style(videos_string: str) -> str:
 57 |     """
 58 |     Take the description (videos_string) of the existing YouTuber videos
 59 |     and generate a brief summary about their style.
 60 |     """
 61 | 
 62 |     prompt = f"""
 63 |     You are an AI assistant that analyzes the given YouTuber's content description 
 64 |     and writes a concise overview of their style, common themes, and audience expectations.
 65 | 
 66 |     YouTuber videos description:
 67 |     <videos>
 68 |     {videos_string}
 69 |     </videos>
 70 |     
 71 |     TASK:
 72 |         Summarize the main themes, style, and audience interest found in the videos described above.
 73 |         Add also the language of the videos.
 74 |     """
 75 |     response = _client.chat.completions.create(
 76 |         model=MODEL,
 77 |         messages=[{"role": "user", "content": prompt}],
 78 |     )
 79 |     return response.choices[0].message.content.strip()
 80 | 
 81 | 
 82 | def chain_generate_ideas(summarized_style: str, num_ideas: int = 15) -> list:
 83 |     """
 84 |     Create a list of video ideas based on the summarized style of the YouTuber.
 85 |     """
 86 |     prompt = f"""
 87 |     You are an AI specialized in brainstorming extraordinary YouTube video ideas.
 88 | 
 89 |     SUMMARY OF THE YOUTUBER'S STYLE:
 90 |     <youtuber_style>
 91 |     {summarized_style}
 92 |     </youtuber_style>
 93 |     
 94 |     {WHATS_AN_IDEA}
 95 | 
 96 |     TASK:
 97 |     1. Propose at least {num_ideas} different video ideas (Topic -> Angle -> Hook).
 98 |     2. Make sure these ideas align with the YouTuber's direction and style.
 99 |     3. Keep them creative and unique don't use the same ideas as the examples.
100 |     4. Use the same language as the YouTuber's style.
101 |     5. Focus on doable ideas, not impossible ones.
102 | 
103 |     Return your response in a JSON format like this, do not return anything else:
104 |     [
105 |         {{
106 |             "Topic": "How to cook like a michelin star chef",
107 |             "Angle": "How does a michelin star chef cook 200 meals per night?",
108 |             "Hook": "Breaking down the process of a michelin star dish from menu to ingredients to cook to table"
109 |         }},
110 |         {{
111 |         ...
112 |         }}
113 |     ]
114 |     """
115 |     response = _client.chat.completions.create(
116 |         model=MODEL,
117 |         messages=[{"role": "user", "content": prompt}],
118 |     )
119 | 
120 |     response_final = response.choices[0].message.content.strip()
121 |     return try_to_load_json(_client, MODEL, response_final)
122 | 
123 | 
124 | def chain_criticize_and_refine(ideas: dict, testing: dict) -> dict:
125 |     """
126 |     This chain "critique" and refines the generated list of ideas.
127 |     """
128 | 
129 |     prompt = f"""
130 |     You are an AI harsh critic that wants to refine the video ideas to ensure 
131 |     they are as unique and engaging as possible.
132 | 
133 |     {WHATS_AN_IDEA}
134 | 
135 |     BELOW ARE THE VIDEO IDEA (Topic -> Angle -> Hook):
136 |     <ideas>
137 |     {ideas}
138 |     </ideas>
139 | 
140 |     AND BELOW is how the ideas performed on the testing, a good testing is >50%:
141 |     <testing_results>
142 |     {testing}
143 |     </testing_results>
144 |     
145 |     TASK:
146 |     1. Critique each idea concisely, pointing out any weak or cliché aspects.
147 |     2. Suggest a refined or improved version if necessary.
148 |     3. The goal is to transform each idea into a truly outstanding, fresh concept.
149 |     4. Use the same language as the ideas.
150 |     5. Make sure the ideas are doable, not impossible ones.
151 |     
152 |     Return the refined ideas, maintaining the (Topic, Angle, Hook) format but incorporating your improvements.
153 | 
154 |     And add if not existing a WOW FACTOR to each idea.
155 | 
156 |     Return your response in a JSON format like this, do not return anything else:
157 |     {{
158 |         "Feedback": "This is a great idea, but it could be improved by...",
159 |         "Topic": "How to cook like a michelin star chef",
160 |         "Angle": "How does a michelin star chef cook 200 meals per night?",
161 |         "Hook": "Breaking down the process of a michelin star dish from menu to ingredients to cook to table",
162 |         "WOW": "WOW, Do I really will know how to cook like a michelin star chef after this video??"
163 |     }}
164 |     """
165 |     response = _client.chat.completions.create(
166 |         model=MODEL,
167 |         messages=[{"role": "user", "content": prompt}],
168 |     )
169 |     response_final = response.choices[0].message.content.strip()
170 |     return try_to_load_json(_client, MODEL, response_final)
171 | 
172 | 
173 | def iterative_idea_generator(
174 |     videos_string: str,
175 |     iterations: int = 2,
176 |     num_initial_ideas: int = 100,
177 |     persona: PersonaTester = None,
178 | ) -> str:
179 |     """
180 |     1. Summary of the style of the channel
181 |     2. Generate several initial ideas
182 |     3. Test the video ideas using the personas
183 |     4. Remove the ideas with a testing result <10%
184 |     5. Critique and refine the ideas
185 |     6. Repeat the process N times
186 |     7. Return the final ideas
187 |     """
188 | 
189 |     # 1. Resumen de estilo
190 |     style_summary = chain_summarize_style(videos_string)
191 |     print(f"\n--- Style Summary ---\n{style_summary}")
192 | 
193 |     # 2. Generar ideas iniciales
194 |     ideas_current = chain_generate_ideas(style_summary, num_ideas=num_initial_ideas)
195 |     print(f"\n--- Initial Ideas ({len(ideas_current)}) ---\n{ideas_current}")
196 | 
197 |     for i in range(1, iterations + 1):
198 |         # Extraer sólo los títulos
199 |         idea_topics = [idea["Topic"] for idea in ideas_current]
200 | 
201 |         # 3. Testeo en lote (una sola llamada, devuelve lista ordenada según idea_topics)
202 |         testing_results = persona.test_multiples_videos(titles=idea_topics, checks=20)
203 |         print(f"\n--- Testing Results ({len(testing_results)}) ---\n{testing_results}")
204 | 
205 |         # 4. Filtrar ideas con percentage < 10%
206 |         kept = [
207 |             (idea, result)
208 |             for idea, result in zip(ideas_current, testing_results)
209 |             if result["percentage"] >= 10
210 |         ]
211 |         ideas_filtered, results_filtered = zip(*kept) if kept else ([], [])
212 |         ideas_current = list(ideas_filtered)
213 |         testing_results = list(results_filtered)
214 | 
215 |         # 5. Paralelizar crítica y refinamiento
216 |         refined = [None] * len(ideas_current)
217 |         with ThreadPoolExecutor(max_workers=len(ideas_current) or 1) as executor:
218 |             # Lanzar tareas con su índice
219 |             future_to_idx = {
220 |                 executor.submit(chain_criticize_and_refine, idea, result): idx
221 |                 for idx, (idea, result) in enumerate(
222 |                     zip(ideas_current, testing_results)
223 |                 )
224 |             }
225 |             for future in as_completed(future_to_idx):
226 |                 idx = future_to_idx[future]
227 |                 idea_refined = future.result()
228 |                 # Actualizar porcentaje y limpiar feedback
229 |                 idea_refined["Likeness_percentage"] = testing_results[idx]["percentage"]
230 |                 idea_refined.pop("Feedback", None)
231 |                 refined[idx] = idea_refined
232 | 
233 |         ideas_current = refined
234 |         print(
235 |             f"\n--- Iteration {i}: Critiquing and Refining Ideas ({len(ideas_current)}) ---"
236 |         )
237 | 
238 |     return ideas_current
239 | 
240 | 
241 | if __name__ == "__main__":
242 |     if len(sys.argv) < 2 or len(sys.argv) > 2:
243 |         print("Usage: python killer_video_idea.py <channel_name>")
244 |         sys.exit(1)
245 | 
246 |     channel_name = sys.argv[1]
247 |     channel_data = get_youtube_data(channel_name)
248 |     if not channel_data or len(channel_data) < 1:
249 |         print("Error: Unable to retrieve channel data.")
250 |         sys.exit(1)
251 | 
252 |     channel_data = channel_data[:25]
253 | 
254 |     persona_tester = PersonaTester(
255 |         model=MODEL,
256 |         client=_client,
257 |         comparation_path="videos_to_compare.json",
258 |     )
259 | 
260 |     _final_ideas = iterative_idea_generator(
261 |         videos_string=channel_data,
262 |         iterations=3,
263 |         num_initial_ideas=25,
264 |         persona=persona_tester,
265 |     )
266 | 
267 |     print("\n=== 10 FINAL VIDEO IDEAS (TOPIC -> ANGLE -> HOOK) ===")
268 |     print(_final_ideas)
269 | 
270 |     with open("ignore_video_ideas.txt", "w", encoding="utf-8") as file:
271 |         file.write(_final_ideas)
272 | 


--------------------------------------------------------------------------------
/agents/killer_video_title_gen.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script generates killer video titles for YouTube videos using OpenAI's API.
  3 | """
  4 | 
  5 | import os
  6 | import json
  7 | 
  8 | from dotenv import load_dotenv
  9 | from openai import OpenAI
 10 | 
 11 | try:
 12 |     from .utils import try_to_load_json
 13 |     from .persona_testing import PersonaTester
 14 | except ImportError:
 15 |     from utils import try_to_load_json
 16 |     from persona_testing import PersonaTester
 17 | 
 18 | load_dotenv()
 19 | MODEL = os.getenv("MODEL", "o3-mini")
 20 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 21 | OPENAI_API_BASE = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1")
 22 | _client = OpenAI(
 23 |     api_key=OPENAI_API_KEY,
 24 |     base_url=OPENAI_API_BASE,
 25 | )
 26 | 
 27 | 
 28 | def gen_video_titles(videos_string: str, num_of_titles: int) -> str:
 29 |     """
 30 |     Take the description (videos_string) of the existing YouTuber videos
 31 |     and generate a brief summary about their style.
 32 |     """
 33 |     prompt = f"""
 34 |     You are a World wide known Youtuber, even bigger than MR Beast, and you are a master of creating killer video titles.
 35 |     Your objective is to generate {str(num_of_titles)} perfects videos titles for this video.
 36 |     Here are some guidelines for title generation
 37 |     <guidelines>
 38 |         Especially at higher levels of YouTube, they stress me out all the time. But I was reviewing some videos shared to this subreddit and I noticed that the vast majority of small creators just completely have the wrong ideas when it comes to titles. And this isn't a roast, it's hard to know where to start with titles, especially if you have no guidance. So here's some guidance.
 39 |         I originally shared this advice to someone individually but I thought it could help the rest of the community.
 40 |         Titles should be short. Generally 50 characters or less is a safe bet. Mostly this is because in some places titles longer than this will be cut off and the viewer won't see the full title. But also, short titles are easier to see and immediately understand. And if you can come up with a way to describe your video in 50 characters or less, that probably means it's a strong idea. (and the other way around. If you can't think of a short, simple title for your video, that potentially means your idea is not that clear/strong).
 41 |         Titles should be not only short, but also simple. They should use simple language that people of all ages and education levels can understand. Anytime your video is shown, it's competing with hundreds of other videos on the screen at any given time. You want a bold, easy to understand title that will catch someone's eyes. Overly long, complex titles don't stand out, they are easy to glaze over because we actually have to "think" about what they say, and no one wants to do that. Consider these two titles.
 42 |         A. One Hit Kills? Full Overwatch Review and Gameplay | Sam the Gamer
 43 |         B. Overwatch is a Terrible Game
 44 |         One of these requires significantly more effort to read and understand than the other. We want to be more like the second one. Not only is it easier to read, it much more clearly communicates what to expect from the video.
 45 |         Title should not be vague**.** This is a big one I've been seeing a lot. People understand that you have to generate curiosity in your titles so they make them super vague. But vagueness causes confusion, not curiosity. Consider these two title:
 46 |         A. Before You Start Cycling, Do This
 47 |         B. Everything You Need to Know as a New Cyclist
 48 |         Title A is okay, but it's a bit unclear what to expect from the video. Title B however is extremely clear in showing us what the video is about.
 49 |         An easy way to think of your title is "A simple statement that, alongside my thumbnail, shows why my video is worth watching". What is special about your video? Why should people watch it? What is contained in the video that people need to see? The answer to that should most likely be in the title (and thumbnail). Sometimes the idea itself is so interesting, you can just put the premise of the video as the title. Like if you have a video about climbing mount everest, you could probably just title the video "I Climbed Mount Everest" because that's interesting alone. Most top YouTubers have figured out how to do this every video.
 50 |         If you're doing a Overwatch gameplay, you probably wont get way with "Overwatch Gameplay" as the title because that's not that interesting. So then the question is, what IS interesting about the video? As implied before, this is a good way to know if you actually have a good video idea--if you can't figure out what's interesting you might not. So maybe Sam the Gamer became the top ranked player in Overwatch - so he made a video about it -- THAT'S interesting. So maybe a good title is "I Got Rank 1 in Overwatch"
 51 |         Im sure you've seen successful videos that break these rules and any other titles advice you've received, but that's how it goes. Titles are just one part of the picture. Sometimes the other parts are so strong it makes up for it.
 52 |         
 53 |         More info:
 54 |         YouTube isn’t just another social media platform like TikTok or Instagram. It is a search engine, like Google, specifically for video content.
 55 |         This means people actively search for content they’re interested in, and your YouTube title is the gateway to users discovering your videos.
 56 |         A YouTube title greatly influences search engine optimization (SEO) and video ranking.
 57 |         YouTube’s algorithm scans your titles for specific keywords and phrases and bumps your video higher in search results because of it.
 58 |         How To Write YouTube Video Titles In 7 Easy Steps
 59 |         Creating an effective YouTube Video Title isn’t always an easy task. There’s a lot of strategy and technique involved.
 60 |         Follow these 7 easy steps to create a YouTube video title that increases views.
 61 |         Use Simple And Clear Language
 62 |         Create a title using elementary language that is relatively conversational.. Simplify your title to a 5th-grade reading level, making it easier to read and understand.
 63 |         Simple YouTube video description from Finn Whitaker
 64 |         YouTube
 65 |         While we don’t necessarily recommend using one word to describe your video like this YouTube creator. Sometimes all you need is a short and concise title to get your point across.
 66 |         The only exception: know your audience. If you are creating a video for a niche community that uses more advanced language, then you can use a more specific video title.
 67 |         Optimize Title Length And Format
 68 |         YouTube limits titles to 100 characters; however, long titles get cut off at a certain point.
 69 |         Keep your YouTube video title between 60 and 70 characters for full visibility on both desktop and mobile.
 70 |         Alex Armitage grabbing audience's attention with his YouTube video title
 71 |         YouTube
 72 |         The words up front grab the viewer’s attention. People have a low attention span, so get your point across at the beginning of the title.
 73 |         In this example, the second title, “Grocery Shopping Tips – Daily Vlog #30,” is better because it puts the main keywords first, making the content clearer and more attractive to viewers.
 74 |         Use title modifiers such as “How-to,” “Top 10,” or “Ultimate Guide” to gain more viewership. These are small things that can go a long way.
 75 |         YouTube Video Title Ideas: using "Top 15," strategy to gain more viewership
 76 |         Youtube
 77 |         Be Honest And Direct
 78 |         Avoid clickbait titles. Don’t trick viewers into watching the video.
 79 |         If your content isn’t giving them what they want, then they will click off your video immediately.
 80 |         As a result, your average watch time will decrease, and the algorithm will show your video to fewer people.
 81 |         On the other hand, if your average watch time increases, so will your views. So, be honest about the content of your video in your title, even if you get fewer views right away.
 82 |         Get Inspiration From Competitors
 83 |         Search your topic on YouTube and scroll through the top videos.
 84 |         Look for videos with lots of views but not many subscribers. This means the video is doing well, and YouTube is promoting this video.
 85 |         YouTube Video Title Ideas: get inspiration from successful vidoes
 86 |         YouTube
 87 |         Get inspiration from these videos. Look at the wording and structure of their titles, and incorporate them into your video titles.
 88 |         Conduct Keyword Research
 89 |         Keyword research
 90 |         The process of identifying words and phrases that rank well in search engines such as Google and YouTube
 91 |         Keywords should be included in not only your video title, but also the description, thumbnail, and tags.
 92 |         “top travel destinations 2024” YouTube search, Ryan Shirley's video is one of the top results
 93 |         YouTube
 94 |         When searching for “top travel destinations 2024”, this creator’s video is one of the top results. This is partly because he uses keywords like “2024”, “travel”, and “travel guide”.
 95 |         However, finding the right keywords can be difficult.
 96 |         Use keyword research tools like Ahrefs. This software suggests relevant keywords in your title to help it rank higher in search results.
 97 |         Ahrefs - keyword research tool
 98 |         Ahrefs
 99 |         Try our new YouTube Channel Keyword Generator today!
100 |         Add Hashtags To Your Video Titles
101 |         Hashtags allow YouTube to understand your content and target a specific audience.
102 |         If YouTube understands your content it will categorize it and show it to an audience that is interested in similar content.
103 |         Hashtags will also allow viewers to find these videos on the hashtag page.
104 |         YouTube Video Title Ideas: adding Hashtags To Your Video Titles
105 |         YouTube
106 |         How do I add hashtags to my YouTube videos?
107 |         At the bottom of your video’s description, add the # symbol and start typing words or keywords related to your topic. YouTube will give hashtag suggestions and choose which ones work best.
108 |         Utilize Title Analyzer Tools
109 |         Once you create your title, how do you know if it’s effective for YouTube SEO?
110 |         Use Headline Studio’s Analyzer Tool to not only give you feedback on SEO rankings but also generate alternative YouTube video titles for you.
111 |         Headline Studio’s Analyzer Too
112 |         What Is A YouTube Video Title Generator?
113 |         What if we told you it was possible to create an effective YouTube title in the blink of an eye?
114 |         Headline Studio’s YouTube title generator creates multiple titles at once so that you can choose the best title for your video. one for you.
115 |         It analyzes your titles, gives suggestions, and scores its SEO ranking compared to the competition.
116 |         Headline Studio’s Analyzer Too
117 |         Recommended reading:
118 |         73 Easy Ways To Write A Headline That Will Reach Your Readers
119 |         SEO Headlines: 5 Simple Ways to Rank on SERPs
120 |         50+ Headline Formulas and Templates To Craft A Perfect Headline
121 |         40 YouTube Video Title Ideas
122 |         How-To And Tutorial Videos
123 |         “How to [Achieve Desired Outcome] in [Specific Time Frame]”
124 |         “Step-by-Step Guide: [Process or Skill]”
125 |         “Beginner’s Guide to [Topic]: Everything You Need to Know”
126 |         “Quick Tips: [Topic] for [Specific Audience]”
127 |         “Master [Skill or Tool] with These Simple Steps”
128 |         Listicle And Compilation Videos
129 |         “Top 10 [Product/Tools/Tips] You Must Try”
130 |         “Best [Product/Tools] of [Year]: Our Favorites Reviewed”
131 |         “Essential [Topic] Ti¡ps You Should Know”
132 |         “Most Amazing [Category] Transformations Caught on Camera”
133 |         “Our Favorite [Category] Hacks You’ve Never Seen Before”
134 |         Educational And Informative Videos
135 |         “The Science Behind [Phenomenon or Trend]”
136 |         “Understanding [Complex Topic] in Simple Terms”
137 |         “Facts You Didn’t Know About [Topic]”
138 |         “What Every [Audience] Should Know About [Topic]”
139 |         “Inside Look: [Behind-the-Scenes of Topic or Event]”
140 |         Entertainment And Lifestyle Videos
141 |         “Ultimate Guide to [Activity or Hobby]”
142 |         “Day in the Life of [Personality or Role]”
143 |         “Exploring [Location or Trend]: Our Adventure”
144 |         “Fun Challenges: [Challenge Name] vs. [Challenge Name]”
145 |         “Our Favorite [Category] Reactions and Reviews”
146 |         News And Trending Topics
147 |         “Breaking News: [Headline or Event] Explained”
148 |         “Latest Trends in [Industry or Niche]: What You Need to Know”
149 |         “Hot Topic Debate: [Topic] vs. [Opposing View]”
150 |         “Exclusive Interview with [Influential Figure or Celebrity]”
151 |         “Update: [Current Event] and Its Impact on [Industry or Audience]”
152 |         Reviews And Unboxings
153 |         “Unboxing the Latest [Product]: First Impressions and Review”
154 |         “In-Depth Review of [Gadget/Tech Item]: Is It Worth the Hype?”
155 |         “Testing the Top [Brand] Products: Which One Reigns Supreme?”
156 |         “Honest Review of [Service/Subscription]: What You Need to Know”
157 |         “Unboxing and Review: [Product] vs. [Competitor]”
158 |         Vlogs And Personal Experiences
159 |         “A Day in My Life: Behind the Scenes of [Event/Activity]”
160 |         “Weekly Vlog: How I Balance Work and Life”
161 |         “Travel Vlog: Exploring [Destination] with Me”
162 |         “My Morning Routine: How I Start My Day Energized”
163 |         “Weekend Vlog: [Activity/Adventure] and What I Learned”
164 |         YouTube Shorts And Quick Content
165 |         “Quick Tips: [Topic] in 60 Seconds”
166 |         “Instant Recipe: How to Make [Dish] in 1 Minute”
167 |         “Fast Facts: [Interesting Fact or Trivia]”
168 |         “One-Minute Challenge: [Fun Activity or Skill]”
169 |         “Quick Hacks: Improve Your [Skill/Task] Easily”
170 |         A Good Video Title Comes With Practice
171 |         There is no guarantee an effective YouTube video title will immediately increase your viewership. However, it will improve your chances of being noticed and attract more viewers over time.
172 |         The great thing about creating YouTube video titles is that you can change them at any time. Don’t be afraid to experiment with multiple titles to see what works best.
173 |     </guidelines>
174 | 
175 |     IMPORTANT: Use always the language of the transcription
176 | 
177 |     YouTuber videos transcription:
178 |     <video_data>
179 |     {videos_string}
180 |     </video_data>
181 | 
182 |     Return the data on an json list, do this only, do not return anything more
183 |     {{
184 |         "My killer video title 1",
185 |         "My killer video title 2",
186 |         ...
187 |     }}
188 |     """
189 |     response = _client.chat.completions.create(
190 |         model=MODEL,
191 |         messages=[{"role": "user", "content": prompt}],
192 |     )
193 |     return response.choices[0].message.content.strip()
194 | 
195 | 
196 | if __name__ == "__main__":
197 |     with open("video_transcription.txt", "r", encoding="utf-8") as file:
198 |         video_transcription = file.read()
199 |     titles = gen_video_titles(video_transcription, 25)
200 |     titles = try_to_load_json(_client, MODEL, titles)
201 |     print("\n=== 10 INITIAL VIDEO TITLES ===")
202 |     print(titles)
203 | 
204 |     persona_tester = PersonaTester(
205 |         model=MODEL,
206 |         client=_client,
207 |         comparation_path="videos_to_compare.json",
208 |     )
209 | 
210 |     titles_results = persona_tester.test_multiples_videos(
211 |         titles,
212 |         checks=50,
213 |         use_extra_titles=True,
214 |     )
215 | 
216 |     with open("ignore_final_video_titles.json", "w", encoding="utf-8") as file:
217 |         file.write(json.dumps(titles_results))
218 | 


--------------------------------------------------------------------------------
/agents/persona_testing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module contains the TestingWithPersona class, which is used to test video titles
  3 | """
  4 | 
  5 | import random
  6 | from concurrent.futures import ThreadPoolExecutor
  7 | 
  8 | try:
  9 |     from utils import try_to_load_json
 10 | except ImportError:
 11 |     from .utils import try_to_load_json
 12 | 
 13 | 
 14 | class PersonaTester:
 15 |     """
 16 |     Class to test video titles with different personas.
 17 |     """
 18 | 
 19 |     def __init__(self, client, model, comparation_path):
 20 |         """
 21 |         Initialize the TestingWithPersona class.
 22 |         """
 23 |         self._model = model
 24 |         self._client = client
 25 |         with open(comparation_path, "r", encoding="utf-8") as file:
 26 |             comparations = try_to_load_json(self._client, self._model, file.read())
 27 |         self.comparation = comparations
 28 | 
 29 |     def test_video(self, titles_to_compare, title_to_test, persona):
 30 |         """
 31 |         Test the title against a list of titles to compare.
 32 |         """
 33 |         videos = titles_to_compare.copy()
 34 |         videos.append(title_to_test)
 35 |         random.shuffle(videos)
 36 |         title_to_test_index = videos.index(title_to_test) + 1
 37 |         videos_str = "\n".join(
 38 |             [f"{idx + 1}. {video}" for idx, video in enumerate(videos)]
 39 |         )
 40 |         prompt = f"""
 41 |             You are {persona}
 42 |             You are on your youtube feed and some videos are showing
 43 |             <videos>
 44 |             {videos_str}
 45 |             </videos>
 46 |             Which video do you click? Respond only one number inside "", nothing else
 47 |             eg. "<my_selected_title_number_video>"
 48 |         """
 49 |         response = self._client.chat.completions.create(
 50 |             model=self._model,
 51 |             messages=[{"role": "user", "content": prompt}],
 52 |         )
 53 |         selected = response.choices[0].message.content.strip()
 54 |         return f'"{title_to_test_index}"' in selected
 55 | 
 56 |     def test_multiples_videos(self, titles, checks=100, use_extra_titles=False):
 57 |         """
 58 |         Test multiple videos with different personas.
 59 |         """
 60 |         user_personas = self.comparation["user_personas"]
 61 |         titles_to_compare = self.comparation["titles"]
 62 | 
 63 |         if use_extra_titles:
 64 |             titles += self.comparation.get("extra_titles", [])
 65 | 
 66 |         personas_to_test = random.choices(user_personas, k=checks)
 67 | 
 68 |         results_by_title = {title: {"selected": 0, "shown": 0} for title in titles}
 69 |         tasks = []
 70 |         results = []
 71 | 
 72 |         with ThreadPoolExecutor() as executor:
 73 |             # Enviar todas las tareas concurrentemente.
 74 |             for title in titles:
 75 |                 for persona in personas_to_test:
 76 |                     future = executor.submit(
 77 |                         self.test_video, titles_to_compare, title, persona
 78 |                     )
 79 |                     tasks.append((title, future))
 80 | 
 81 |             # Recopilar y agrupar los resultados por título.
 82 |             for title, future in tasks:
 83 |                 try:
 84 |                     result = future.result()
 85 |                 except Exception as e:
 86 |                     print(f"Error processing title {title}: {e}")
 87 |                     result = False
 88 |                 results_by_title[title]["shown"] += 1
 89 |                 if result:
 90 |                     results_by_title[title]["selected"] += 1
 91 | 
 92 |         for title, counts in results_by_title.items():
 93 |             percentage = (
 94 |                 (counts["selected"] / counts["shown"] * 100)
 95 |                 if counts["shown"] > 0
 96 |                 else 0
 97 |             )
 98 | 
 99 |             print(
100 |                 "Result: ",
101 |                 {
102 |                     "title": title,
103 |                     "time_selected": counts["selected"],
104 |                     "times_shown": counts["shown"],
105 |                     "percentage": percentage,
106 |                 },
107 |             )
108 |             results.append(
109 |                 {
110 |                     "title": title,
111 |                     "time_selected": counts["selected"],
112 |                     "times_shown": counts["shown"],
113 |                     "percentage": percentage,
114 |                 }
115 |             )
116 |         return results
117 | 


--------------------------------------------------------------------------------
/agents/requirements.txt:
--------------------------------------------------------------------------------
1 | python-dotenv==1.0.1
2 | openai==1.68.2
3 | requests==2.32.3


--------------------------------------------------------------------------------
/agents/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility functions for the agents
 3 | """
 4 | 
 5 | import json
 6 | import re
 7 | from openai import OpenAI
 8 | import requests
 9 | 
10 | 
11 | def try_to_load_json(_client: OpenAI, model: str, json_string: str) -> dict | list:
12 |     """
13 |     Try to load a JSON string. If it fails, it will try to fix the JSON string
14 |     """
15 | 
16 |     json_prompt = """
17 |         This is a JSON string, but it is not well formatted. delete everything that is not JSON, fix any possible formatting issue and return only the JSON string. without text, without explanation, ``` or anything else.
18 |     """
19 | 
20 |     try:
21 |         return json.loads(json_string)
22 |     except json.JSONDecodeError:
23 |         # Si no se puede cargar como JSON, intenta corregirlo
24 |         response = _client.chat.completions.create(
25 |             model=model,
26 |             messages=[{"role": "user", "content": json_prompt + json_string}],
27 |         )
28 |         try:
29 |             return json.loads(response.choices[0].message.content.strip())
30 |         except json.JSONDecodeError:
31 |             return {}
32 | 
33 | 
34 | def get_youtube_data(youtube_username):
35 |     """
36 |     Get YouTube data from a user's channel.
37 |     """
38 |     regex = r'""([\sa-zA-Z0-9áéíóúÁÉÍÓÚ]+)""'
39 |     replacement = r'"\1"'
40 | 
41 |     headers = {
42 |         "User-Agent": (
43 |             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
44 |             "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36"
45 |         )
46 |     }
47 | 
48 |     url = f"https://www.youtube.com/{youtube_username}/videos"
49 |     page = requests.get(url, timeout=5, headers=headers)
50 |     html_str = page.content.decode("utf-8")
51 | 
52 |     json_string = html_str.split("var ytInitialData = ")[-1].split(";</script>")[0]
53 |     cleaned_json_string = json_string.replace("\n", " ").replace("\r", " ")
54 |     cleaned_json_string = re.sub(regex, replacement, cleaned_json_string)
55 |     json_data = json.loads(cleaned_json_string, strict=False)
56 | 
57 |     video_list = []
58 |     tabs = json_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]
59 |     for tab in tabs:
60 |         if tab.get("tabRenderer", {}).get("title", "").lower() not in [
61 |             "videos",
62 |             "vídeos",
63 |             "video",
64 |         ]:
65 |             continue
66 |         for video in tab["tabRenderer"]["content"]["richGridRenderer"]["contents"]:
67 |             video_data = {}
68 |             if "richItemRenderer" not in video:
69 |                 continue
70 |             video_data["title"] = video["richItemRenderer"]["content"]["videoRenderer"][
71 |                 "title"
72 |             ]["runs"][0]["text"]
73 |             video_data["id"] = video["richItemRenderer"]["content"]["videoRenderer"][
74 |                 "videoId"
75 |             ]
76 |             video_data["url"] = f"https://www.youtube.com/watch?v={video_data['id']}"
77 |             video_data["thumbnail"] = (
78 |                 f"https://img.youtube.com/vi/{video_data['id']}/0.jpg"
79 |             )
80 |             video_data["published"] = video["richItemRenderer"]["content"][
81 |                 "videoRenderer"
82 |             ]["publishedTimeText"]["simpleText"]
83 |             video_data["viewCountText"] = video["richItemRenderer"]["content"][
84 |                 "videoRenderer"
85 |             ]["viewCountText"]["simpleText"]
86 |             video_list.append(video_data)
87 |         break
88 |     return video_list
89 | 


--------------------------------------------------------------------------------
/agents/videos_to_compare.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extra_titles": [
 3 |         "🚀 HICE la MEJOR IA para VIDEOJUEGOS [+ 3 DEMOS] 🎮"
 4 |     ],
 5 |     "user_personas": [
 6 |         "Una persona que busca aprender de tecnologia",
 7 |         "Un CEO de una empresa que quiere entender mejor la tecnologia",
 8 |         "Un CTO intentando aprender una tecnologia especifica para su equipo",
 9 |         "Un Desarrollador buscando una tecnologia especifica",
10 |         "Un experto en el tema buscando novedades",
11 |         "Un estudiante buscando aprender algo nuevo",
12 |         "Un chico aburrido buscando algo interesante",
13 |         "Alguien que quiere aprender a programar",
14 |         "Un abuelo que no entiende nada de tecnologia",
15 |         "Un chico gamer con el sueño en su subconsciente de ser programador de videojuegos"
16 |     ],
17 |     "titles": [
18 |         "Crea y Modifica imágenes con ChatGPT 🔥 (Studio Ghibli y más)",
19 |         "¿Que es MCP? : Conecta tu IA a TODO con Este Protocolo Gratuito (Guía Paso a Paso)",
20 |         "¿El fin de Data Science? 🤯: Data Science Agent con Gemini + Colab ¡GRATIS!",
21 |         "¡Imágenes Hiperrealistas! Tutorial Completo de Flux.1 con ComfyUI ¡GRATIS!",
22 |         "Las IAs de VÍDEO tienen un GRAN PROBLEMA...",
23 |         "HOY SÍ vas a entender QUÉ es el BLOCKCHAIN - (Bitcoin, Cryptos, NFTs y",
24 |         "¡Aumentando FOTOGRAMAS con Inteligencia Artificial! (SuperFluidez)",
25 |         "TUTORIAL 👉 ¡Entrena a la IA con tu CARA! - 100% GRATIS Y SIN GPUs (Stable Diffusion",
26 |         "¿Cuánto Tarda Esta IA En Aprender A Manejar?",
27 |         "Tu primera red neuronal en Python y Tensorflow",
28 |         "El Software Que Mató A 346 Personas",
29 |         "CREAR nuestra primera RED NEURONAL en C# !!! [Perceptron simple]",
30 |         "🤖 Hice un ASISTENTE VIRTUAL POR VOZ!!! ► [chatbot con voz en 15 minutos]",
31 |         "3 proyectos de ALGORITMOS GENÉTICOS en C# UNITY ► NO VAS A CREER EL ULTIMO"
32 |     ]
33 | }


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "subtitles_clip_config": {
 3 |         "font": "Hey-Comic",
 4 |         "fontsize": 60,
 5 |         "color": "white",
 6 |         "method": "label",
 7 |         "align": "south",
 8 |         "bg_color": "black",
 9 |         "stroke_color": null,
10 |         "stroke_width": null
11 |     },
12 |     "subtitles_position": {
13 |         "text_position_y_offset": -500,
14 |         "text_position_x_offset": 0
15 |     },
16 |     "titles_clip_config": {
17 |         "font": "Hey-Comic",
18 |         "fontsize": 90,
19 |         "color": "black",
20 |         "method": "label",
21 |         "align": "south",
22 |         "bg_color": "transparent",
23 |         "stroke_color": "black",
24 |         "stroke_width": 1.5
25 |     },
26 |     "titles_position": {
27 |         "text_position_y_offset": 500,
28 |         "text_position_x_offset": 0
29 |     },
30 |     "titles": [
31 |         "",
32 |         "Video completo en la descripcion.",
33 |         "Suscribete para mas."
34 |     ]
35 | }


--------------------------------------------------------------------------------
/config/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "avatars": {
 3 |         "calm": "config/avatar_calm.mp4",
 4 |         "angry": "config/avatar_angry.mp4",
 5 |         "sad": "config/avatar_sad.mp4",
 6 |         "wow": "config/avatar_wow.mp4",
 7 |         "smug": "config/avatar_smug.mp4"
 8 |     },
 9 |     "shake_factor": 5
10 | }


--------------------------------------------------------------------------------
/config_loader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module to load configuration from a JSON file.
 3 | """
 4 | 
 5 | import json
 6 | from pathlib import Path
 7 | 
 8 | 
 9 | def load_config(config_path: str = "config.json") -> dict:
10 |     """
11 |     Load configuration from a JSON file.
12 |     """
13 |     config_file = Path(config_path)
14 |     if not config_file.exists():
15 |         raise FileNotFoundError(f"Could not find the configuration file: {config_path}")
16 |     with config_file.open("r", encoding="utf-8") as f:
17 |         config = json.load(f)
18 |     return config
19 | 
20 | 
21 | # Carga la configuración al importar el módulo
22 | config_data = load_config()
23 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Main script for video editing and processing.
  4 | """
  5 | 
  6 | import argparse
  7 | import logging
  8 | import json
  9 | 
 10 | from moviepy.editor import VideoFileClip
 11 | from operations import (
 12 |     add_subtitles,
 13 |     add_titles,
 14 |     audio_generator,
 15 |     denoise_video,
 16 |     generate_transcript,
 17 |     generate_video_base,
 18 |     save_joined_video,
 19 |     save_separated_video,
 20 |     save_video,
 21 |     set_horizontal,
 22 |     set_vertical,
 23 |     trim_by_silence,
 24 |     video_translation,
 25 |     generate_avatar_video,
 26 |     generate_transcript_divided,
 27 | )
 28 | from config_loader import config_data
 29 | from utils import get_audio, get_video_data, str2bool
 30 | 
 31 | logging.basicConfig(level=logging.INFO)
 32 | logger = logging.getLogger(__name__)
 33 | 
 34 | # Dictionary for functions in the "video_edit" command
 35 | functions_dict = {
 36 |     "trim_by_silence": trim_by_silence,
 37 |     "denoise": denoise_video,
 38 |     "transcript": generate_transcript,
 39 |     "transcript_divided": generate_transcript_divided,
 40 |     "subtitles": add_subtitles,
 41 |     "save_separated_video": save_separated_video,
 42 |     "save_join": save_joined_video,
 43 |     "save_video": save_video,
 44 |     "set_vertical": set_vertical,
 45 |     "set_horizontal": set_horizontal,
 46 | }
 47 | 
 48 | 
 49 | def video_edit_command(args):
 50 |     """Executes a sequence of operations for video editing."""
 51 |     for input_file in args.input_file:
 52 |         kwargs = {
 53 |             "video_path": input_file,
 54 |             "clip_interval": args.clip_interval,
 55 |             "sound_threshold": args.sound_threshold,
 56 |             "discard_silence": args.discard_silence,
 57 |             "config_data": config_data,
 58 |         }
 59 |         kwargs = get_video_data(**kwargs)
 60 |         for step in args.pipeline:
 61 |             if step not in functions_dict:
 62 |                 raise ValueError(
 63 |                     f"Function {step} not found. \
 64 |                         Available options: {', '.join(functions_dict.keys())}"
 65 |                 )
 66 |             logger.info("Applying %s to %s", step, input_file)
 67 |             kwargs = functions_dict[step](**kwargs)
 68 | 
 69 | 
 70 | def separate_audio_command(args):
 71 |     """Separates audio from video files."""
 72 |     for file in args.files:
 73 |         clip = VideoFileClip(file)
 74 |         audio_path = get_audio(clip, file[:-4])
 75 |         logger.info("Audio saved to: %s", audio_path)
 76 | 
 77 | 
 78 | def voice_command(args):
 79 |     """Performs voice operations: video translation or audio generation."""
 80 |     if args.operation == "video_translation":
 81 |         logger.info("Starting video translation...")
 82 |         video_translation(args.video_path, args.translate, args.language)
 83 |     elif args.operation == "audio_generator":
 84 |         logger.info("Starting audio generation...")
 85 |         audio_generator(args.video_path, args.voice)
 86 |     else:
 87 |         logger.error("Invalid operation. Use --help for more information.")
 88 | 
 89 | 
 90 | def generator_command(args):
 91 |     """Generates a base video or adds titles to a short video."""
 92 |     tools = {
 93 |         "base": generate_video_base,
 94 |         "add_titles": add_titles,
 95 |     }
 96 |     for file in args.files:
 97 |         if args.tool not in tools:
 98 |             logger.error(
 99 |                 "Tool %s not found, available options: %s",
100 |                 args.tool,
101 |                 ", ".join(tools.keys()),
102 |             )
103 |             continue
104 |         tools[args.tool](file)
105 | 
106 | 
107 | def video_gen_avatar_command(args):
108 |     """Generates a video with avatars based on emotions."""
109 | 
110 |     config = None
111 |     with open(args.config, "r", encoding="utf-8") as f:
112 |         config = json.load(f)
113 |     print(config)
114 | 
115 |     for file in args.files:
116 |         generate_avatar_video(file, config)
117 | 
118 | 
119 | def main():
120 |     """
121 |     Main function to parse arguments
122 |     """
123 |     parser = argparse.ArgumentParser(
124 |         description="Combined program for video editing and processing"
125 |     )
126 |     subparsers = parser.add_subparsers(dest="command", required=True)
127 | 
128 |     # Subcommand for video editing
129 |     parser_edit = subparsers.add_parser(
130 |         "video_edit", help="Multiple tools for video editing"
131 |     )
132 |     parser_edit.add_argument(
133 |         "input_file", type=str, nargs="+", help="Video file(s) to process"
134 |     )
135 |     parser_edit.add_argument(
136 |         "--pipeline",
137 |         type=str,
138 |         nargs="+",
139 |         help=f"Functions to apply to the video: {', '.join(functions_dict.keys())}",
140 |     )
141 |     parser_edit.add_argument(
142 |         "-c", "--clip_interval", type=float, default=2, help="Clipping precision"
143 |     )
144 |     parser_edit.add_argument(
145 |         "-s",
146 |         "--sound_threshold",
147 |         type=float,
148 |         default=0.01,
149 |         help="Maximum volume threshold to consider silence",
150 |     )
151 |     parser_edit.add_argument(
152 |         "-d",
153 |         "--discard_silence",
154 |         const=True,
155 |         default=False,
156 |         type=str2bool,
157 |         nargs="?",
158 |         help="Discard silent clips",
159 |     )
160 |     parser_edit.set_defaults(func=video_edit_command)
161 | 
162 |     # Subcommand for separate_audio
163 |     parser_separate = subparsers.add_parser(
164 |         "separate_audio", help="Separate audio from video"
165 |     )
166 |     parser_separate.add_argument("files", type=str, nargs="+", help="Video file(s)")
167 |     parser_separate.set_defaults(func=separate_audio_command)
168 | 
169 |     # Subcommand for voice operations
170 |     parser_voice = subparsers.add_parser(
171 |         "voice", help="Voice operations: translation or audio generation"
172 |     )
173 |     parser_voice.add_argument(
174 |         "operation",
175 |         type=str,
176 |         help="Operation to perform: video_translation or audio_generator",
177 |     )
178 |     parser_voice.add_argument(
179 |         "video_path", type=str, help="Path to the video file to process"
180 |     )
181 |     parser_voice.add_argument(
182 |         "-t",
183 |         "--translate",
184 |         type=str,
185 |         default="Helsinki-NLP/opus-mt-es-en",
186 |         help="Translation model to use",
187 |     )
188 |     parser_voice.add_argument(
189 |         "--voice",
190 |         type=str,
191 |         default="en-us/af_heart",
192 |         help="Voice to use for translation",
193 |     )
194 |     parser_voice.add_argument(
195 |         "--language",
196 |         type=str,
197 |         default="en",
198 |         help="Language for translation (default: en)",
199 |     )
200 |     parser_voice.set_defaults(func=voice_command)
201 | 
202 |     # Subcommand for generator (short video)
203 |     parser_generator = subparsers.add_parser("generator", help="Short video generator")
204 |     parser_generator.add_argument(
205 |         "files", type=str, nargs="+", help="File(s) to process"
206 |     )
207 |     parser_generator.add_argument(
208 |         "tool",
209 |         type=str,
210 |         help="Tool to use: base, add_titles",
211 |     )
212 |     parser_generator.set_defaults(func=generator_command)
213 | 
214 |     # Subcommand for avatar video generation
215 |     parser_avatar = subparsers.add_parser(
216 |         "avatar_video_generation", help="Avatar video generation"
217 |     )
218 |     parser_avatar.add_argument("files", type=str, nargs="+", help="File(s) to process")
219 |     parser_avatar.add_argument(
220 |         "config", type=str, help="Path to the configuration file"
221 |     )
222 |     parser_avatar.set_defaults(func=video_gen_avatar_command)
223 | 
224 |     args = parser.parse_args()
225 |     args.func(args)
226 | 
227 | 
228 | if __name__ == "__main__":
229 |     main()
230 | 


--------------------------------------------------------------------------------
/operations/__init__.py:
--------------------------------------------------------------------------------
 1 | from .denoise import *
 2 | from .save import *
 3 | from .set_orientation import *
 4 | from .subtitles import *
 5 | from .transcript import *
 6 | from .trim import *
 7 | from .translation import video_translation, audio_generator
 8 | from .shorts import generate_video_base, add_titles
 9 | from .avatar_video_generation import generate_avatar_video
10 | 


--------------------------------------------------------------------------------
/operations/avatar_video_generation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generating an avatar video based on audio input.
  3 | """
  4 | 
  5 | import logging
  6 | import os
  7 | import subprocess
  8 | from concurrent.futures import ThreadPoolExecutor, as_completed
  9 | 
 10 | from dotenv import load_dotenv
 11 | import moviepy.editor as mpe
 12 | import numpy as np
 13 | from openai import OpenAI
 14 | 
 15 | from utils import apply_shake, get_subclip_volume_segment
 16 | 
 17 | SYSTEM_PROMPT = (
 18 |     "You are an emotion classifier. "
 19 |     "Given a short phrase in any language, reply with exactly one of the following labels: "
 20 |     "{emotions}"
 21 |     "Respond with just the label, no extra text."
 22 | )
 23 | 
 24 | load_dotenv()
 25 | OPENAI_MODEL = os.getenv("OPENAI_MODEL", "GPT-4.1")
 26 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 27 | OPENAI_API_BASE = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1")
 28 | _client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_API_BASE)
 29 | 
 30 | WHISPER_MODEL_SIZE = os.getenv("WHISPER_MODEL_SIZE", "turbo")
 31 | 
 32 | logging.basicConfig(level=logging.INFO)
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | def _classify_emotion(text: str, config: dict) -> str:
 37 |     """
 38 |     Use ChatGPT to classify the emotion of a given text.
 39 |     """
 40 |     emotion_map: dict = config.get("avatars", {})
 41 |     try:
 42 |         prompt = SYSTEM_PROMPT.replace("{emotions}", ", ".join(emotion_map.keys()))
 43 |         response = _client.chat.completions.create(
 44 |             model=OPENAI_MODEL,
 45 |             messages=[
 46 |                 {"role": "system", "content": prompt},
 47 |                 {"role": "user", "content": text},
 48 |             ],
 49 |         )
 50 |         label = response.choices[0].message.content.strip().lower()
 51 |         for key in emotion_map.keys():
 52 |             if key.lower() in label:
 53 |                 return key
 54 |         logger.warning("Received unexpected label '%s', defaulting to neutral", label)
 55 |         return list(emotion_map.keys())[0]
 56 |     except Exception as e:
 57 |         logger.error("ChatGPT API error: %s", e)
 58 |         return list(emotion_map.keys())[0]
 59 | 
 60 | 
 61 | def _process_transcript_segment(seg, pydub_audio, config):
 62 |     """
 63 |     Classify emotion and calculate the volume for the audio segment.
 64 |     """
 65 |     start, end, text = seg.start, seg.end, seg.text.strip()
 66 |     label = _classify_emotion(text, config)
 67 |     duration = end - start
 68 |     volume = get_subclip_volume_segment(pydub_audio, start, duration)
 69 |     logger.info(
 70 |         "Segment %f-%fs, Text: '%s', Emotion: %s, Volume: %f",
 71 |         start,
 72 |         end,
 73 |         text,
 74 |         label,
 75 |         volume,
 76 |     )
 77 |     return {"start": start, "end": end, "emotion": label, "volume": volume}
 78 | 
 79 | 
 80 | def generate_segment(audio_path, audio_clip, config: dict, max_workers=None):
 81 |     """
 82 |     Generate segments based on audio input.
 83 |     Each segment is classified with a dynamic set of emotions via ChatGPT.
 84 |     """
 85 |     try:
 86 |         from faster_whisper import WhisperModel
 87 |         from pydub import AudioSegment
 88 |     except ImportError as e:
 89 |         logger.error("Error importing required libraries: %s", e)
 90 |         return [], 0
 91 | 
 92 |     emotion_map: dict = config.get("avatars", {})
 93 | 
 94 |     logger.info("Load audio (pydub)...")
 95 |     pydub_audio = AudioSegment.from_file(audio_path)
 96 |     logger.info("Transcribing audio...")
 97 |     whisper_model = WhisperModel(WHISPER_MODEL_SIZE)
 98 |     result, _ = whisper_model.transcribe(audio_path, multilingual=True)
 99 | 
100 |     with ThreadPoolExecutor(max_workers=max_workers) as executor:
101 |         futures = [
102 |             executor.submit(_process_transcript_segment, seg, pydub_audio, config)
103 |             for seg in result
104 |         ]
105 |         segments = [f.result() for f in as_completed(futures)]
106 | 
107 |     volumes = [s["volume"] for s in segments]
108 |     global_avg_volume = np.mean(volumes) if volumes else 0
109 |     logger.info("General average volume: %f", global_avg_volume)
110 | 
111 |     total_duration = sum(s["end"] - s["start"] for s in segments)
112 |     if total_duration < audio_clip.duration:
113 |         segments.append(
114 |             {
115 |                 "start": total_duration,
116 |                 "end": audio_clip.duration,
117 |                 "emotion": list(emotion_map.keys())[0],
118 |                 "volume": global_avg_volume,
119 |             }
120 |         )
121 | 
122 |     del whisper_model
123 |     return segments, global_avg_volume
124 | 
125 | 
126 | def _render_avatar_segment(i, seg, avatar_path, shake_factor, global_avg_volume):
127 |     """
128 |     Load the avatar, apply the loop + shake and write the output file.
129 |     """
130 |     start, end = seg["start"], seg["end"]
131 |     volume = seg["volume"]
132 |     intensity = (
133 |         (volume / global_avg_volume) * shake_factor if global_avg_volume > 0 else 0
134 |     )
135 | 
136 |     avatar_video = mpe.VideoFileClip(avatar_path).without_audio()
137 |     avatar_segment = avatar_video.loop(duration=end - start)
138 |     avatar_segment = apply_shake(avatar_segment, intensity)
139 | 
140 |     out_name = f"temp_{i:03d}_{os.path.splitext(os.path.basename(avatar_path))[0]}.mp4"
141 |     logger.info("Rendering segment %d (%s) → %s", i, seg["emotion"], out_name)
142 |     avatar_segment.write_videofile(
143 |         out_name, codec="libx264", audio=False, verbose=False, logger=None
144 |     )
145 |     return out_name
146 | 
147 | 
148 | def concatenate_segments_ffmpeg(segment_paths, final_output, audio_path):
149 |     """
150 |     Use FFmpeg to concatenate segments and add the audio track.
151 |     First concatenate the segments without audio and then incorporate the original audio.
152 |     """
153 |     list_filename = "segments.txt"
154 |     with open(list_filename, "w", encoding="utf-8") as f:
155 |         for path in segment_paths:
156 |             f.write(f"file '{os.path.abspath(path)}'\n")
157 | 
158 |     temp_video = "temp_video.mp4"
159 |     subprocess.run(
160 |         [
161 |             "ffmpeg",
162 |             "-f",
163 |             "concat",
164 |             "-safe",
165 |             "0",
166 |             "-i",
167 |             list_filename,
168 |             "-c",
169 |             "copy",
170 |             temp_video,
171 |         ],
172 |         check=True,
173 |     )
174 | 
175 |     subprocess.run(
176 |         [
177 |             "ffmpeg",
178 |             "-i",
179 |             temp_video,
180 |             "-i",
181 |             audio_path,
182 |             "-c:v",
183 |             "copy",
184 |             "-c:a",
185 |             "aac",
186 |             "-map",
187 |             "0:v:0",
188 |             "-map",
189 |             "1:a:0",
190 |             final_output,
191 |         ],
192 |         check=True,
193 |     )
194 | 
195 |     # Cleanup temporary files
196 |     os.remove(list_filename)
197 |     os.remove(temp_video)
198 |     for path in segment_paths:
199 |         os.remove(path)
200 |     logger.info("Final video exported to %s", final_output)
201 | 
202 | 
203 | def generate_avatar_video(audio_path: str, config: dict, max_workers=None):
204 |     """
205 |     Process audio to generate a video with avatars based on emotions.
206 |     """
207 |     avatars = config.get("avatars", {})
208 |     shake_factor = config.get("shake_factor", 0.1)
209 | 
210 |     try:
211 |         clip = mpe.VideoFileClip(audio_path)
212 |         audio_clip = clip.audio
213 |     except Exception:
214 |         audio_clip = mpe.AudioFileClip(audio_path)
215 |         clip = None
216 | 
217 |     segments, global_avg_volume = generate_segment(
218 |         audio_path, audio_clip, config, max_workers
219 |     )
220 | 
221 |     tasks = []
222 |     for i, seg in enumerate(segments):
223 |         avatar_path = avatars.get(seg["emotion"])
224 |         if not avatar_path:
225 |             logger.warning("No avatar for emotion '%s'. Skipping.", seg["emotion"])
226 |             continue
227 |         tasks.append((i, seg, avatar_path))
228 | 
229 |     segment_paths = []
230 |     with ThreadPoolExecutor(max_workers=max_workers) as executor:
231 |         futures = {
232 |             executor.submit(
233 |                 _render_avatar_segment,
234 |                 i,
235 |                 seg,
236 |                 avatar_path,
237 |                 shake_factor,
238 |                 global_avg_volume,
239 |             ): (i, avatar_path)
240 |             for i, seg, avatar_path in tasks
241 |         }
242 |         for future in as_completed(futures):
243 |             segment_paths.append(future.result())
244 | 
245 |     if not segment_paths:
246 |         logger.error("No clips generated. Exiting.")
247 |         return
248 | 
249 |     final_output = "output_video.mp4"
250 |     concatenate_segments_ffmpeg(segment_paths, final_output, audio_path)
251 |     logger.info("Final video saved on: %s", final_output)
252 | 


--------------------------------------------------------------------------------
/operations/denoise.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module to denoise audio in a video file using the DNS64 model.
 3 | """
 4 | 
 5 | from moviepy import editor
 6 | from utils import get_audio
 7 | 
 8 | 
 9 | def denoise_video(**kwargs):
10 |     """
11 |     Denoise the audio of a video file using the DNS64 model.
12 |     """
13 |     try:
14 |         import torch
15 |         import torchaudio
16 |         from denoiser import pretrained
17 |         from denoiser.dsp import convert_audio
18 |     except ImportError as e:
19 |         raise ImportError(
20 |             "Please install the required libraries: torch, torchaudio, denoiser"
21 |         ) from e
22 | 
23 |     input_video_file_clip, filename = (
24 |         kwargs["input_video_file_clip"],
25 |         kwargs["filename"],
26 |     )
27 |     audio_file_name = get_audio(input_video_file_clip, filename)
28 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29 |     model = pretrained.dns64().to(device)
30 |     wav, source = torchaudio.load(audio_file_name)
31 |     wav = convert_audio(wav.to(device), source, model.sample_rate, model.chin)
32 |     with torch.no_grad():
33 |         denoised = model(wav[None])[0]
34 |     denoised_file_name = f"{filename}_denoised.wav"
35 |     torchaudio.save(denoised_file_name, denoised.cpu(), model.sample_rate)
36 |     input_video_file_clip.audio = editor.AudioFileClip(denoised_file_name)
37 |     kwargs["input_video_file_clip"] = input_video_file_clip
38 |     return kwargs
39 | 


--------------------------------------------------------------------------------
/operations/save.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module to save video clips using moviepy.
 3 | """
 4 | 
 5 | from moviepy import editor
 6 | 
 7 | 
 8 | def save_video(**kwargs):
 9 |     """
10 |     Save a video clip to a file.
11 |     """
12 |     filename = kwargs["filename"]
13 |     input_video_file_clip = kwargs["input_video_file_clip"]
14 |     clip_name = f"{filename}_EDITED.mp4"
15 |     input_video_file_clip.write_videofile(
16 |         clip_name,
17 |         audio_codec="aac",
18 |         threads=8,
19 |         fps=24,
20 |     )
21 |     kwargs["clips_name"] = clip_name
22 |     return kwargs
23 | 
24 | 
25 | def save_joined_video(**kwargs):
26 |     """
27 |     Save a joined video clip to a file.
28 |     """
29 |     if "clips" not in kwargs:
30 |         return save_video(**kwargs)
31 |     filename = kwargs["filename"]
32 |     clips = kwargs["clips"]
33 |     clip_name = f"{filename}_EDITED.mp4"
34 |     if isinstance(clips, list):
35 |         concat_clip = editor.concatenate_videoclips(clips)
36 |         concat_clip.write_videofile(
37 |             clip_name,
38 |             audio_codec="aac",
39 |             threads=8,
40 |             fps=24,
41 |         )
42 |         kwargs["clips_name"] = clip_name
43 |         return kwargs
44 |     clips.write_videofile(clip_name, audio_codec="aac")
45 |     kwargs["clips_name"] = clip_name
46 |     return kwargs
47 | 
48 | 
49 | def save_separated_video(**kwargs):
50 |     """
51 |     Save separated video clips to files.
52 |     """
53 |     if "clips" not in kwargs:
54 |         return save_video(**kwargs)
55 |     filename = kwargs["filename"]
56 |     clips = kwargs["clips"]
57 |     clips_format = f"{filename}_EDITED_{{i}}.mp4"
58 |     for i, clip in enumerate(clips):
59 |         pad_i = str(i).zfill(5)
60 |         clip.write_videofile(clips_format.format(i=pad_i), audio_codec="aac")
61 |     kwargs["clips_name"] = clips_format.format(i="{i}")
62 |     return kwargs
63 | 


--------------------------------------------------------------------------------
/operations/set_orientation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module to set the orientation of a video clip.
 3 | """
 4 | 
 5 | 
 6 | def set_vertical(**kwargs):
 7 |     """
 8 |     Set the orientation of a video clip to vertical.
 9 |     """
10 |     input_video_file_clip = kwargs["input_video_file_clip"]
11 |     width, height = input_video_file_clip.size
12 |     if width > height:
13 |         new_size = (height, width)
14 |         input_video_file_clip = input_video_file_clip.resize(new_size)
15 |     kwargs["shape"] = input_video_file_clip.size
16 |     kwargs["input_video_file_clip"] = input_video_file_clip
17 |     return kwargs
18 | 
19 | 
20 | def set_horizontal(**kwargs):
21 |     """
22 |     Set the orientation of a video clip to horizontal.
23 |     """
24 |     input_video_file_clip = kwargs["input_video_file_clip"]
25 |     width, height = input_video_file_clip.size
26 |     if width < height:
27 |         new_size = (height, width)
28 |         input_video_file_clip = input_video_file_clip.resize(new_size)
29 |     kwargs["shape"] = input_video_file_clip.size
30 |     kwargs["input_video_file_clip"] = input_video_file_clip
31 |     return kwargs
32 | 


--------------------------------------------------------------------------------
/operations/shorts.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module to generate a video with a blurred background and add titles.
 3 | """
 4 | 
 5 | import os
 6 | import logging
 7 | from pathlib import Path
 8 | 
 9 | from moviepy import editor
10 | from moviepy.editor import ColorClip, CompositeVideoClip, VideoFileClip
11 | 
12 | from config_loader import config_data
13 | 
14 | logging.basicConfig(level=logging.INFO)
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | def blur_video(video_path: str) -> str:
19 |     """
20 |     Blurs the video and saves it with a new name.
21 |     """
22 |     new_video_path = f"blurred_{Path(video_path).name}"
23 |     with VideoFileClip(video_path) as video:
24 |         video_wo_audio = video.without_audio()
25 |         video_wo_audio.write_videofile(
26 |             new_video_path,
27 |             ffmpeg_params=["-vf", "boxblur=10:1"],
28 |             preset="ultrafast",
29 |             threads=8,
30 |             fps=24,
31 |             codec="libx264",
32 |         )
33 |     return new_video_path
34 | 
35 | 
36 | def generate_video_base(video_path_data: str, video_size=(1080, 1920)):
37 |     """
38 |     Generates a base video with a blurred background and the original video on top.
39 |     """
40 |     video_path_output = f"output_{Path(video_path_data).name}"
41 |     blurred_video_name = blur_video(video_path_data)
42 |     blurred_video = VideoFileClip(blurred_video_name).resize(height=video_size[1])
43 |     video = VideoFileClip(video_path_data).resize(width=video_size[0])
44 |     video_base = ColorClip(video_size, color=(0, 0, 0)).set_duration(video.duration)
45 |     composite = CompositeVideoClip(
46 |         [video_base, blurred_video.set_position("center"), video.set_position("center")]
47 |     ).set_duration(video.duration)
48 |     composite.write_videofile(
49 |         video_path_output, preset="ultrafast", threads=8, fps=24, codec="libx264"
50 |     )
51 |     os.remove(blurred_video_name)
52 |     logger.info("Base video generated on %s", video_path_output)
53 | 
54 | 
55 | def add_titles(video_path: str):
56 |     """
57 |     Adds titles to the video.
58 |     """
59 |     video = VideoFileClip(video_path)
60 |     title_clips = []
61 |     duration = 3  # Duración de cada título
62 |     for title in config_data.get("titles", []):
63 |         if not title or not title.strip():
64 |             continue
65 |         title_clip = editor.TextClip(
66 |             title, **config_data["titles_clip_config"]
67 |         ).set_duration(duration)
68 |         pos = ("center", config_data["titles_position"]["text_position_y_offset"])
69 |         title_clip = title_clip.set_position(pos)
70 |         title_clips.append(title_clip)
71 |     if not title_clips:
72 |         logger.info("No titles to add.")
73 |         return
74 | 
75 |     final_clip = editor.concatenate_videoclips(title_clips + [video])
76 |     output_path = f"output_titles_{Path(video_path).name}"
77 |     final_clip.write_videofile(
78 |         output_path, preset="ultrafast", threads=8, fps=24, codec="libx264"
79 |     )
80 |     logger.info("Video with titles saved at: %s", output_path)
81 | 


--------------------------------------------------------------------------------
/operations/subtitles.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module to add subtitles to a video using moviepy.
 3 | """
 4 | 
 5 | import os
 6 | from moviepy.editor import TextClip, CompositeVideoClip
 7 | from moviepy.video.tools.subtitles import SubtitlesClip
 8 | 
 9 | 
10 | def add_subtitles(**kwargs):
11 |     """
12 |     Add subtitles to a video clip.
13 |     """
14 | 
15 |     def generator(txt):
16 |         return TextClip(txt, **config_data["subtitles_clip_config"])
17 | 
18 |     filename = kwargs["filename"]
19 |     input_video_file_clip = kwargs["input_video_file_clip"]
20 |     subtitles_filename = kwargs.get(
21 |         "transcript_file_name", f"{filename}_transcript.srt"
22 |     )
23 |     config_data = kwargs.get("config_data", {})
24 |     if not os.path.exists(subtitles_filename):
25 |         subtitles_filename = f"{filename}_transcript.srt"
26 | 
27 |     subtitles = SubtitlesClip(subtitles_filename, generator)
28 |     video_list = [
29 |         input_video_file_clip,
30 |         subtitles.set_pos(
31 |             (
32 |                 "center",
33 |                 input_video_file_clip.h
34 |                 + config_data["subtitles_position"]["text_position_y_offset"],
35 |             )
36 |         ),
37 |     ]
38 |     video_with_subs = CompositeVideoClip(video_list)
39 |     kwargs["input_video_file_clip"] = video_with_subs
40 |     return kwargs
41 | 


--------------------------------------------------------------------------------
/operations/transcript.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module contains functions to generate transcripts from video files.
 3 | """
 4 | 
 5 | from faster_whisper import WhisperModel
 6 | from utils import get_audio, float_to_srt_time
 7 | 
 8 | 
 9 | MODEL_SIZE = "turbo"
10 | 
11 | 
12 | def generate_transcript(**kwargs):
13 |     """
14 |     Generates a transcript from the input video file and saves it as an SRT file.
15 |     """
16 |     input_video_file_clip, filename = (
17 |         kwargs["input_video_file_clip"],
18 |         kwargs["filename"],
19 |     )
20 |     audio_file_name = get_audio(input_video_file_clip, filename)
21 |     model = WhisperModel(MODEL_SIZE)
22 |     segments, _ = model.transcribe(audio_file_name, multilingual=True)
23 |     transcript = ""
24 |     for segment in segments:
25 |         start_time = float_to_srt_time(segment.start)
26 |         end_time = float_to_srt_time(segment.end)
27 |         text_data = segment.text.strip()
28 |         transcript += f"{segment.id + 1}\n{start_time} --> {end_time}\n{text_data}\n\n"
29 |     transcript_file_name = f"{filename}_transcript.srt"
30 |     with open(transcript_file_name, "w", encoding="utf-8") as file:
31 |         file.write(transcript)
32 |     kwargs["transcript_file_name"] = transcript_file_name
33 |     return kwargs
34 | 
35 | 
36 | def generate_transcript_divided(**kwargs):
37 |     """
38 |     Generates a transcript from the input video file and saves it as an SRT file.
39 |     The transcript is divided into segments based on word timestamps.
40 |     """
41 |     input_video_file_clip, filename = (
42 |         kwargs["input_video_file_clip"],
43 |         kwargs["filename"],
44 |     )
45 |     audio_file_name = get_audio(input_video_file_clip, filename)
46 |     model = WhisperModel(MODEL_SIZE)
47 |     segments, _ = model.transcribe(
48 |         audio_file_name, multilingual=True, word_timestamps=True
49 |     )
50 |     transcript = ""
51 |     segment_id = 1
52 | 
53 |     for segment in segments:
54 |         for word in segment.words:
55 |             start_time = float_to_srt_time(word.start)
56 |             end_time = float_to_srt_time(word.end)
57 |             text_data = word.word.strip()
58 |             segment_id += 1
59 |             transcript += f"{segment_id}\n{start_time} --> {end_time}\n{text_data}\n\n"
60 | 
61 |     transcript_file_name = f"{filename}_transcript.srt"
62 |     with open(transcript_file_name, "w", encoding="utf-8") as file:
63 |         file.write(transcript)
64 |     kwargs["transcript_file_name"] = transcript_file_name
65 |     return kwargs
66 | 


--------------------------------------------------------------------------------
/operations/translation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for video translation and audio generation.
  3 | """
  4 | 
  5 | import os
  6 | import json
  7 | import logging
  8 | from pathlib import Path
  9 | from faster_whisper import WhisperModel
 10 | from kokoro import KPipeline
 11 | from transformers import pipeline
 12 | import soundfile as sf
 13 | from moviepy.editor import AudioFileClip, CompositeAudioClip, VideoFileClip
 14 | from pydub import AudioSegment
 15 | 
 16 | from utils import get_audio
 17 | 
 18 | logging.basicConfig(level=logging.INFO)
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | MODEL_SIZE = "turbo"
 22 | MAX_PAUSE = 1.0
 23 | 
 24 | 
 25 | def process_transcript(segments):
 26 |     """
 27 |     Process the transcript results to group words into phrases based on pauses.
 28 |     """
 29 | 
 30 |     audio_info = []
 31 |     segment_id = 1
 32 |     max_pause = 1.0  # Umbral en segundos para determinar el fin de una frase
 33 | 
 34 |     # Variables para acumular la frase a lo largo de los segmentos
 35 |     current_phrase_words = []
 36 |     phrase_start = None
 37 |     phrase_end = None
 38 |     last_word_end = None
 39 | 
 40 |     for segment in segments:
 41 |         # Se verifica que el segmento tenga palabras con timestamps
 42 |         if not hasattr(segment, "words") or not segment.words:
 43 |             continue
 44 |         for word in segment.words:
 45 |             # Si es la primera palabra global, inicializamos la acumulación
 46 |             if phrase_start is None:
 47 |                 phrase_start = word.start
 48 |                 phrase_end = word.end
 49 |                 current_phrase_words.append(word.word.strip())
 50 |                 last_word_end = word.end
 51 |             else:
 52 |                 # Comprobamos la pausa entre el inicio de la palabra actual y el final de la última palabra
 53 |                 if (word.start - last_word_end) > max_pause:
 54 |                     # Si la pausa es mayor al umbral, se finaliza la frase actual
 55 |                     phrase_text = " ".join(current_phrase_words)
 56 |                     logger.info(
 57 |                         "Phrase: %s | Start: %s | End: %s",
 58 |                         phrase_text,
 59 |                         phrase_start,
 60 |                         phrase_end,
 61 |                     )
 62 |                     audio_info.append(
 63 |                         {
 64 |                             "id": segment_id,
 65 |                             "original_text": phrase_text,
 66 |                             "text": "",
 67 |                             "audio_file": "",
 68 |                             "start": phrase_start,
 69 |                             "end": phrase_end,
 70 |                         }
 71 |                     )
 72 |                     segment_id += 1
 73 |                     # Se reinicia la acumulación para la nueva frase
 74 |                     current_phrase_words = [word.word.strip()]
 75 |                     phrase_start = word.start
 76 |                     phrase_end = word.end
 77 |                 else:
 78 |                     # Si la pausa es menor o igual, se continúa acumulando la palabra
 79 |                     current_phrase_words.append(word.word.strip())
 80 |                     phrase_end = word.end
 81 |                 last_word_end = word.end
 82 | 
 83 |     # Al finalizar, se agrega la última frase acumulada si existe
 84 |     if current_phrase_words:
 85 |         phrase_text = " ".join(current_phrase_words)
 86 |         audio_info.append(
 87 |             {
 88 |                 "id": segment_id,
 89 |                 "original_text": phrase_text,
 90 |                 "text": "",
 91 |                 "audio_file": "",
 92 |                 "start": phrase_start,
 93 |                 "end": phrase_end,
 94 |             }
 95 |         )
 96 |         segment_id += 1
 97 | 
 98 |     return audio_info
 99 | 
100 | 
101 | def video_translation(
102 |     video_path: str,
103 |     translate_data: str = "Helsinki-NLP/opus-mt-es-en",
104 |     language: str = "en",
105 | ):
106 |     """
107 |     Transcribe and translate the audio from a video file.
108 |     """
109 |     translator = None
110 |     if translate_data:
111 |         translator = pipeline("translation", translate_data)
112 | 
113 |     video_stem = Path(video_path).stem
114 |     input_video_file_clip = VideoFileClip(video_path)
115 |     audio_path = get_audio(input_video_file_clip, video_stem)
116 |     whisper_model = WhisperModel(MODEL_SIZE, num_workers=4, compute_type="int8")
117 |     transcribe_params = {
118 |         "audio": audio_path,
119 |         "language": language,
120 |         "multilingual": True,
121 |         "temperature": 0.2,
122 |         "word_timestamps": True,
123 |     }
124 |     results, _ = whisper_model.transcribe(**transcribe_params)
125 |     audio_info = process_transcript(results)
126 | 
127 |     if translator:
128 |         for segment in audio_info:
129 |             segment["text"] = translator(segment["original_text"])[0][
130 |                 "translation_text"
131 |             ]
132 |             logger.info(
133 |                 "Translating: %s | %s",
134 |                 segment["original_text"].strip(),
135 |                 segment["text"].strip(),
136 |             )
137 | 
138 |     else:
139 |         for segment in audio_info:
140 |             segment["text"] = segment["original_text"]
141 | 
142 |     json_file = f"{video_stem}_audio_info.json"
143 | 
144 |     with open(json_file, "w", encoding="utf-8") as f:
145 |         json.dump(audio_info, f, ensure_ascii=False, indent=4)
146 |     logger.info("Audio info saved in: %s. Check it before generating audio.", json_file)
147 | 
148 | 
149 | def change_audio_speed(audio_file: str, speed: float) -> str:
150 |     """
151 |     Change the speed of an audio file and save it as a new file.
152 |     """
153 |     base = Path(audio_file).stem
154 |     output_file = f"{base}_edited.wav"
155 |     speed = min(max(speed, 1), 1.4)
156 |     logger.info("Changing speed to: %s of file: %s", speed, audio_file)
157 |     if speed == 1:
158 |         return audio_file
159 | 
160 |     sound = AudioSegment.from_file(audio_file)
161 |     new_sound = sound.speedup(playback_speed=speed)
162 |     new_sound.export(output_file, format="wav")
163 | 
164 |     return output_file
165 | 
166 | 
167 | def audio_generator(video_path: str, voice_info: str = "en-us/af_heart"):
168 |     """
169 |     Generate audio for a video using the specified voice.
170 |     """
171 |     lang_code = voice_info.split("/")[0]
172 |     voice = voice_info.split("/")[1]
173 |     vpipeline = KPipeline(lang_code=lang_code)
174 | 
175 |     video_stem = Path(video_path).stem
176 |     json_file = f"{video_stem}_audio_info.json"
177 |     with open(json_file, "r", encoding="utf-8") as openfile:
178 |         audio_clips = json.load(openfile)
179 |     for segment in audio_clips:
180 |         if segment["audio_file"]:
181 |             continue
182 |         if not segment["text"]:
183 |             logger.info("Skipping empty text segment.")
184 |             continue
185 |         audio_file = f"{video_stem}_generated_audio_{segment['id']}.wav"
186 |         logger.info("Generating audio for: %s in %s", segment["text"], audio_file)
187 | 
188 |         speed = len(segment["text"]) / len(segment["original_text"])
189 | 
190 |         generator = vpipeline(
191 |             segment["text"].replace("\n", ""),
192 |             voice=voice,
193 |             speed=speed,
194 |             split_pattern=r"\n+",
195 |         )
196 |         for _, (__, ___, audio) in enumerate(generator):
197 |             sf.write(audio_file, audio, 24000)
198 |             break
199 |         segment["audio_file"] = audio_file
200 |     with open(json_file, "w", encoding="utf-8") as outfile:
201 |         json.dump(audio_clips, outfile, indent=2)
202 |     input_video_file_clip_no_audio = VideoFileClip(video_path).without_audio()
203 |     clips = []
204 |     for item in audio_clips:
205 |         audio = AudioFileClip(item["audio_file"])
206 |         duration = item["end"] - item["start"]
207 |         target_speed = audio.duration / duration
208 |         edited_audio_file = change_audio_speed(item["audio_file"], target_speed)
209 |         audio = AudioFileClip(edited_audio_file).set_start(item["start"])
210 |         clips.append(audio)
211 |     composite_audio = CompositeAudioClip(clips).subclip(
212 |         0, input_video_file_clip_no_audio.duration
213 |     )
214 | 
215 |     final_video = input_video_file_clip_no_audio.set_audio(composite_audio)
216 |     # Aseguramos que el clip mantenga el tamaño original
217 |     final_video = final_video.resize(input_video_file_clip_no_audio.size)
218 |     final_video_name = f"{video_stem}_final_video.mp4"
219 |     final_video.write_videofile(
220 |         final_video_name,
221 |         codec="libx264",  # Especificamos el codec de video
222 |         audio_codec="aac",
223 |         ffmpeg_params=["-vf", "scale=iw:ih"],
224 |     )
225 |     logger.info("Final video saved in: %s", final_video_name)
226 | 
227 |     # Eliminamos los archivos de audio generados
228 |     for item in audio_clips:
229 |         try:
230 |             os.remove(item["audio_file"])
231 |             os.remove(item["audio_file"].replace(".wav", "_edited.wav"))
232 |         except Exception as e:
233 |             logger.error("Error removing file: %s", e)
234 | 


--------------------------------------------------------------------------------
/operations/trim.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module to trim a video by silence.
 3 | """
 4 | 
 5 | import logging
 6 | import numpy as np
 7 | 
 8 | from utils import get_subclip_volume
 9 | 
10 | 
11 | logging.basicConfig(level=logging.INFO)
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | def trim_by_silence(**kwargs):
16 |     """
17 |     Function to trim a video by silence.
18 |     """
19 |     input_video_file_clip = kwargs["input_video_file_clip"]
20 |     clip_interval = kwargs["clip_interval"]
21 |     sound_threshold = kwargs["sound_threshold"]
22 |     discard_silence = kwargs["discard_silence"]
23 |     logger.info("Chunking video...")
24 |     volumes = []
25 |     for i in np.arange(0, input_video_file_clip.duration, clip_interval):
26 |         if input_video_file_clip.duration <= i + clip_interval:
27 |             continue
28 |         logger.info("Processing chunk %s/%s", i, input_video_file_clip.duration)
29 | 
30 |         volumes.append(get_subclip_volume(input_video_file_clip, i, clip_interval))
31 |     logger.info("Processing silences...")
32 |     volumes = np.array(volumes)
33 |     volumes_binary = volumes > sound_threshold
34 |     change_times = [0]
35 |     for i in range(1, len(volumes_binary)):
36 |         if volumes_binary[i] != volumes_binary[i - 1]:
37 |             change_times.append(i * clip_interval)
38 |     change_times.append(input_video_file_clip.duration)
39 |     logger.info("Subclipping...")
40 |     first_piece_silence = 1 if volumes_binary[0] else 0
41 |     clips = []
42 |     for i in range(1, len(change_times)):
43 |         if discard_silence and i % 2 != first_piece_silence:
44 |             continue
45 |         new_clip = input_video_file_clip.subclip(change_times[i - 1], change_times[i])
46 |         clips.append(new_clip)
47 |     kwargs["clips"] = clips
48 |     return kwargs
49 | 


--------------------------------------------------------------------------------
/recipes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | This script provides a command line interface for various video processing tasks.
 4 | """
 5 | import subprocess
 6 | import sys
 7 | import os
 8 | 
 9 | 
10 | def transcribe_video(video):
11 |     """
12 |     Transcribes the video using the 'video_transcription' pipeline.
13 |     Command:
14 |     python main.py video_edit {video} --pipeline transcript
15 |     """
16 | 
17 |     command = f"python main.py video_edit {video} --pipeline transcript"
18 |     subprocess.run(command, shell=True, check=True)
19 | 
20 | 
21 | def separate_video(video):
22 |     """
23 |     Separates the video using the 'trim_by_silence' pipeline.
24 |     Command:
25 |     python main.py video_edit {video} --pipeline \
26 |         trim_by_silence save_separated_video -c 0.25 -s 0.01 -d True
27 |     """
28 |     command = f"python main.py video_edit {video} --pipeline \
29 |         trim_by_silence save_separated_video -c 0.25 -s 0.01 -d True"
30 |     subprocess.run(command, shell=True, check=True)
31 | 
32 | 
33 | def generate_avatar(video):
34 |     """
35 |     Generates the video avatar.
36 |     Command:
37 |     python main.py avatar_video_generation {video} config/config.json
38 |     """
39 |     command = f"python main.py avatar_video_generation {video} config/config.json"
40 |     subprocess.run(command, shell=True, check=True)
41 | 
42 | 
43 | def generate_short_base(video):
44 |     """
45 |     Generates a short base from the video by chaining several commands:
46 |     1. Divides the transcript:
47 |        python main.py video_edit {video} --pipeline transcript_divided
48 |     2. Renames the subtitle file:
49 |        mv {base_name}_transcript.srt output_{base_name}_transcript.srt
50 |     3. Generates the base:
51 |        python main.py generator {video} base
52 |     4. Joins the subtitles:
53 |        python main.py video_edit {video} --pipeline subtitles save_join
54 |     """
55 |     # Get the base name of the video (without extension)
56 |     base_name = os.path.splitext(video)[0]
57 | 
58 |     command = (
59 |         f"python main.py video_edit {video} --pipeline transcript_divided && "
60 |         f"mv {base_name}_transcript.srt output_{base_name}_transcript.srt && "
61 |         f"python main.py generator {video} base && "
62 |         f"python main.py video_edit output_{video} --pipeline subtitles save_join"
63 |     )
64 |     subprocess.run(command, shell=True, check=True)
65 | 
66 | 
67 | def main():
68 |     """
69 |     Main function to handle command line arguments and execute the appropriate function.
70 |     """
71 |     if len(sys.argv) < 3:
72 |         print("Usage: python recipes.py <command> <video>")
73 |         print(
74 |             "Available commands: separate_video, generate_avatar, generate_short_base"
75 |         )
76 |         sys.exit(1)
77 | 
78 |     command = sys.argv[1]
79 |     video = sys.argv[2]
80 | 
81 |     if command == "separate_video":
82 |         separate_video(video)
83 |     elif command == "generate_avatar":
84 |         generate_avatar(video)
85 |     elif command == "generate_short_base":
86 |         generate_short_base(video)
87 |     elif command == "transcribe_video":
88 |         transcribe_video(video)
89 |     else:
90 |         print("Command not recognized.")
91 |         sys.exit(1)
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     main()
96 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.26.4
 2 | matplotlib==3.10.1
 3 | moviepy==1.0.3
 4 | denoiser==0.1.5
 5 | python-dotenv==1.0.1
 6 | faster-whisper==1.1.1
 7 | librosa==0.10.2.post1
 8 | scipy==1.15.2
 9 | pydub==0.25.1
10 | kokoro==0.7.13
11 | soundfile==0.13.1
12 | transformers==4.50.0
13 | sentencepiece==0.1.99
14 | openai==1.68.2


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import *
2 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module to handle video processing and audio extraction.
 3 | """
 4 | 
 5 | import os
 6 | import argparse
 7 | from pathlib import Path
 8 | 
 9 | import numpy as np
10 | from moviepy.editor import VideoFileClip
11 | 
12 | 
13 | def str2bool(v):
14 |     """
15 |     Convert a string to a boolean value.
16 |     """
17 |     if isinstance(v, bool):
18 |         return v
19 |     if v.lower() in ("yes", "true", "t", "y", "1"):
20 |         return True
21 |     if v.lower() in ("no", "false", "f", "n", "0"):
22 |         return False
23 |     raise argparse.ArgumentTypeError("Boolean value expected.")
24 | 
25 | 
26 | def get_subclip_volume(subclip, second, interval):
27 |     """
28 |     Get the volume of a subclip.
29 |     """
30 |     cut = subclip.subclip(second, second + interval).audio.to_soundarray(fps=44100)
31 |     return np.sqrt(((1.0 * cut) ** 2).mean())
32 | 
33 | 
34 | def get_subclip_volume_segment(audio_segment, start: float, duration: float) -> float:
35 |     """
36 |     Get the volume of a segment of an audio file.
37 |     """
38 |     start_ms = int(start * 1000)
39 |     end_ms = int((start + duration) * 1000)
40 |     segment = audio_segment[start_ms:end_ms]
41 |     return segment.rms
42 | 
43 | 
44 | def float_to_srt_time(seconds: float) -> str:
45 |     """
46 |     Convert a float to SRT time format.
47 |     """
48 |     hours = int(seconds // 3600)
49 |     minutes = int((seconds % 3600) // 60)
50 |     sec = int(seconds % 60)
51 |     milliseconds = int((seconds - int(seconds)) * 1000)
52 |     return f"{hours:02d}:{minutes:02d}:{sec:02d},{milliseconds:03d}"
53 | 
54 | 
55 | def get_audio(input_video_file_clip, filename: str) -> str:
56 |     """
57 |     Extract audio from a video file and save it as a WAV file.
58 |     """
59 |     base = Path(filename)
60 |     audio_file_name = f"{base}_audio.wav"
61 |     audio_path = Path(audio_file_name)
62 |     if audio_path.exists():
63 |         audio_path.unlink()
64 |     input_video_file_clip.audio.write_audiofile(str(audio_path), codec="pcm_s16le")
65 |     return str(audio_path)
66 | 
67 | 
68 | def get_video_data(**kwargs):
69 |     """
70 |     Get video data from the input video file.
71 |     """
72 |     video_path = kwargs["video_path"]
73 |     filename = os.path.splitext(os.path.basename(video_path))[0]
74 |     input_video_file_clip = VideoFileClip(video_path)
75 |     kwargs["shape"] = input_video_file_clip.size
76 |     kwargs["filename"] = filename
77 |     kwargs["input_video_file_clip"] = input_video_file_clip
78 |     return kwargs
79 | 
80 | 
81 | def apply_shake(clip, shake_intensity: float):
82 |     """
83 |     Apply shake effect to a clip.
84 |     The image is randomly shifted in x and y according to the intensity.
85 |     """
86 | 
87 |     def shake_transform(get_frame, t):
88 |         frame = get_frame(t)
89 |         dx = int(np.random.uniform(-shake_intensity, shake_intensity))
90 |         dy = int(np.random.uniform(-shake_intensity, shake_intensity))
91 |         shaken_frame = np.roll(frame, dx, axis=1)
92 |         shaken_frame = np.roll(shaken_frame, dy, axis=0)
93 |         return shaken_frame
94 | 
95 |     return clip.fl(shake_transform)
96 | 


--------------------------------------------------------------------------------