├── .gitattributes
├── .github
    └── workflows
    │   └── sync_to_huggingface_space.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── app.py
├── assets
    └── demo.gif
├── download_video.py
├── environment.yml
├── packages.txt
└── requirements.txt


/.gitattributes:
--------------------------------------------------------------------------------
1 | assets/demo.gif filter=lfs diff=lfs merge=lfs -text
2 | assets/demo.m4v filter=lfs diff=lfs merge=lfs -text
3 | 


--------------------------------------------------------------------------------
/.github/workflows/sync_to_huggingface_space.yml:
--------------------------------------------------------------------------------
 1 | name: Sync to Hugging Face hub
 2 | on:
 3 |   push:
 4 |     branches: [main]
 5 | 
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   sync-to-hub:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |         with:
14 |           fetch-depth: 0
15 |           lfs: true
16 |       - name: Set git committer identity
17 |         run: |
18 |           git config --global user.email "danilotpnta@gmail.com"
19 |           git config --global user.name "danilotpnta"
20 |       - name: Pull latest changes from Hugging Face with rebase
21 |         env:
22 |           HF_TOKEN: ${{ secrets.HF_TOKEN }}
23 |         run: |
24 |           git pull --rebase https://huggingface.co/spaces/danilotpnta/Youtube-Whisper main
25 |       - name: Push to hub
26 |         env:
27 |           HF_TOKEN: ${{ secrets.HF_TOKEN }}
28 |         run: git push https://danilotpnta:$HF_TOKEN@huggingface.co/spaces/danilotpnta/Youtube-Whisper main
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 
164 | 
165 | *.mp3
166 | .DS_Store
167 | *.mp4
168 | *.m4v
169 | thumbnail.jpg
170 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use an official Python runtime as a parent image
 2 | FROM python:3.9-slim
 3 | 
 4 | # Set the working directory in the container
 5 | WORKDIR /app
 6 | 
 7 | # Copy the current directory contents into the container at /app
 8 | COPY . /app
 9 | 
10 | # Install necessary dependencies including ffmpeg
11 | RUN apt-get update && apt-get install -y \
12 |     wget \
13 |     curl \
14 |     unzip \
15 |     git \
16 |     chromium \
17 |     chromium-driver \
18 |     ffmpeg \
19 |     && rm -rf /var/lib/apt/lists/*
20 | 
21 | # Install pip and the required Python packages
22 | RUN pip install --upgrade pip \
23 |     && pip install selenium requests gradio \
24 |     && pip install git+https://github.com/openai/whisper.git
25 | 
26 | # Set environment variables for Selenium
27 | ENV CHROME_BIN=/usr/bin/chromium
28 | ENV CHROMEDRIVER_BIN=/usr/bin/chromedriver
29 | 
30 | # Expose the port the app will run on
31 | EXPOSE 7860
32 | 
33 | # Command to run the Gradio app
34 | CMD ["python", "app.py"]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Danilo Toapanta
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Youtube Whisper
 3 | emoji: 🐢
 4 | colorFrom: purple
 5 | colorTo: pink
 6 | sdk: docker
 7 | sdk_version: 4.44.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | ---
12 | 
13 | # Youtube-Whisper
14 | A simple Gradio app that transcribes YouTube videos by extracting audio and using OpenAI’s Whisper model for transcription. Paste a YouTube link and get the video’s audio transcribed into text.
15 | 
16 | ![Demo](assets/demo.gif)
17 | 
18 | ## Requirements
19 | 
20 | - Conda installed (for managing environments)
21 | - Python 3.9 or above
22 | - **FFmpeg** installed (required for audio conversion)
23 | 
24 | ## Installation
25 | 
26 | ### Step 1: Clone the Repository
27 | 
28 | ```bash
29 | git clone https://github.com/danilotpnta/Youtube-Whisper.git
30 | cd Youtube-Whisper
31 | ```
32 | 
33 | ### Step 2: Install FFmpeg
34 | 
35 | You need FFmpeg for processing the audio. Install it based on your operating system:
36 | 
37 | - **macOS**: Install FFmpeg via Homebrew:
38 |   ```bash
39 |   brew install ffmpeg
40 |   ```
41 | 
42 | - **Ubuntu/Linux**: Install FFmpeg via apt:
43 |   ```bash
44 |   sudo apt update
45 |   sudo apt install ffmpeg
46 |   ```
47 | 
48 | - **Windows**: 
49 |   - Download FFmpeg from the official website: [FFmpeg Download](https://ffmpeg.org/download.html).
50 |   - Extract the files and add the `bin` folder to your system’s PATH environment variable. For detailed instructions on adding FFmpeg to PATH, you can follow [this guide](https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/).
51 | 
52 | Verify the installation by running:
53 | ```bash
54 | ffmpeg -version
55 | ```
56 | 
57 | ### Step 3: Create and Activate the Conda Environment
58 | 
59 | To set up the environment using the provided `environment.yml` file:
60 | 
61 | ```bash
62 | conda env create -f environment.yml
63 | ```
64 | 
65 | Once the environment is created, activate it with:
66 | 
67 | ```bash
68 | conda activate yt-whisper
69 | ```
70 | 
71 | ### Step 4: Run the App
72 | 
73 | Once the environment is active, you can launch the Gradio app with:
74 | 
75 | ```bash
76 | python app.py
77 | ```
78 | 
79 | This will start a local server for the app, and you can access it by visiting the URL printed in the terminal (usually `http://localhost:7860/`).
80 | 
81 | ### Troubleshooting
82 | 
83 | 1. **FFmpeg Not Found**: 
84 |    If you see an error related to `ffmpeg not found`, ensure FFmpeg is installed and added to your system's PATH. You can also specify its location manually in the script by setting `ffmpeg_location`.
85 | 
86 | 2. **Pytube Errors**:
87 |    If you encounter issues with `pytube`, ensure you’re using the `yt-dlp` version and that your URL is correctly formatted.
88 | 
89 | 3. **Update Dependencies**:
90 |    Ensure that `pip` and `conda` are up to date:
91 |    ```bash
92 |    conda update conda
93 |    pip install --upgrade pip
94 |    ```
95 | 
96 | ## License
97 | 
98 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
99 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import whisper
 3 | import gradio as gr
 4 | from download_video import download_mp3_yt_dlp 
 5 | 
 6 | import warnings
 7 | warnings.filterwarnings("ignore", category=FutureWarning, module="torch")
 8 | 
 9 | # Function to download the audio, title, and thumbnail from YouTube
10 | def download_video_info(url):
11 |     try:
12 |         # Call the function to download video and get title, thumbnail
13 |         title, thumbnail_url = download_mp3_yt_dlp(url)
14 |         audio_file = "downloaded_video.mp3"  # Path to the downloaded audio (MP3)
15 | 
16 |         return audio_file, title, thumbnail_url
17 |     except Exception as e:
18 |         return None, None, None, str(e)
19 | 
20 | # Function to transcribe the downloaded audio using Whisper
21 | def transcribe_audio(audio_path, model_size="base", language="en"):
22 |     model = whisper.load_model(model_size)
23 |     result = model.transcribe(audio_path, language=language)
24 |     return result['text']
25 | 
26 | # Split logic: First fetch title and thumbnail, then transcribe
27 | def get_video_info_and_transcribe(youtube_url, model_size="base", language="en"):
28 |     # Fetch title and thumbnail first
29 |     audio_path, title, thumbnail_url = download_video_info(youtube_url)
30 |     
31 |     # If fetching video info fails
32 |     if not audio_path or not os.path.exists(audio_path):
33 |         return gr.update(value="Error fetching video."), None, None
34 | 
35 |     # Show title and thumbnail to the user while the transcription is happening
36 |     title_output = gr.update(value=title)
37 |     
38 |     # Show the thumbnail if available
39 |     if thumbnail_url:
40 |         thumbnail_output = gr.update(value=thumbnail_url)
41 |     else:
42 |         thumbnail_output = gr.update(visible=False)  # Hide if no thumbnail
43 |     
44 |     # Start transcription
45 |     transcription = transcribe_audio(audio_path, model_size, language)
46 | 
47 |     return title_output, thumbnail_output, gr.update(value=transcription)
48 | 
49 | # Gradio interface setup using gradio.components
50 | with gr.Blocks() as demo:
51 | 
52 |     title = "<center><h1>YouTube Whisper ⚡️ </h1></center>"
53 |     gr.HTML(title)
54 | 
55 |     gr.Markdown(
56 |     """
57 |     This tool lets you transcribe YouTube videos in multiple languages using **[Whisper](https://openai.com/research/whisper)**, an open-source speech recognition (ASR) model developed by OpenAI.
58 | 
59 | 
60 |     ### Key Features:
61 |     - **Fast transcription**: Using the **base** model, transcribing a **3 minute** video takes approximately **30 seconds**.
62 |     - **Multiple language support**: Choose from **English**, **Spanish**, **French**, and more!
63 |     - **Simple workflow**: 
64 |         1. Paste a YouTube link.
65 |         2. Select the model size and language.
66 |         3. Click "Transcribe" to get the text from the video.
67 | 
68 |     _Transcription times may vary based on model size and video length._
69 |     """)
70 | 
71 |     with gr.Row():
72 |         youtube_url = gr.Textbox(label="YouTube Link", elem_id="yt_link", scale=5)
73 |         model_size = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], label="Model Size", value="base", scale=1)
74 |         language = gr.Dropdown(choices=["en", "es", "fr", "de", "it", "ja"], label="Language", value="en", scale=1)
75 |     
76 |     title_output = gr.Textbox(label="Video Title", interactive=False)
77 | 
78 |     with gr.Row():
79 |         thumbnail_output = gr.Image(label="Thumbnail", interactive=False, scale=1)
80 |         transcription_output = gr.Textbox(label="Transcription", interactive=False, scale=1)
81 |     
82 |     transcribe_button = gr.Button("Transcribe")
83 | 
84 |     transcribe_button.click(
85 |         get_video_info_and_transcribe, 
86 |         inputs=[youtube_url, model_size, language],
87 |         outputs=[title_output, thumbnail_output, transcription_output]
88 |     )
89 | 
90 | # Launch the app
91 | if __name__ == "__main__":
92 |     demo.launch(server_name="0.0.0.0", server_port=7860)
93 | 


--------------------------------------------------------------------------------
/assets/demo.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8a41d51a5ada3991d2704711abb0744c24256e1478154f64fc530bd813a652c1
3 | size 1942440
4 | 


--------------------------------------------------------------------------------
/download_video.py:
--------------------------------------------------------------------------------
 1 | import yt_dlp as youtube_dl
 2 | import requests
 3 | 
 4 | def download_mp3_yt_dlp(youtube_url):
 5 |     # Set up yt-dlp options
 6 |     ydl_opts = {
 7 |         'format': 'bestaudio/best',
 8 |         'postprocessors': [{
 9 |             'key': 'FFmpegExtractAudio',
10 |             'preferredcodec': 'mp3',
11 |             'preferredquality': '192',
12 |         }],
13 |         'outtmpl': 'downloaded_video.%(ext)s',
14 |         'quiet': False,
15 |         'no_warnings': True,
16 |         'progress_hooks': [lambda d: print(f"Downloading {d['filename']}: {d['_percent_str']}")],
17 |     }
18 | 
19 |     # Extract video info including title and thumbnail
20 |     with youtube_dl.YoutubeDL() as ydl:
21 |         info_dict = ydl.extract_info(youtube_url, download=False)
22 |         title = info_dict.get('title', 'Unknown Title')
23 |         thumbnail_url = info_dict.get('thumbnail', None)
24 | 
25 |     # Download the MP3 using yt-dlp
26 |     with youtube_dl.YoutubeDL(ydl_opts) as ydl:
27 |         ydl.download([youtube_url])
28 | 
29 |     # Fetch the thumbnail for display
30 |     if thumbnail_url:
31 |         response = requests.get(thumbnail_url)
32 |         if response.status_code == 200:
33 |             with open('thumbnail.jpg', 'wb') as f:
34 |                 f.write(response.content)
35 |             print(f"Thumbnail downloaded successfully.")
36 |         else:
37 |             print(f"Failed to download thumbnail. HTTP Status Code: {response.status_code}")
38 | 
39 |     # Return the title and thumbnail URL
40 |     return title, thumbnail_url
41 | 
42 | # Example usage:
43 | # youtube_url = "https://youtu.be/MAZyQ-38b8M?si=q0dai-wF6FQz6MGN"
44 | # title, thumbnail_url = download_mp3_yt_dlp(youtube_url)
45 | # print(f"Title: {title}")
46 | # print(f"Thumbnail: {thumbnail_url}")
47 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: yt-whisper
 2 | channels:
 3 |   - defaults
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python=3.9
 7 |   - pip
 8 |   - pip:
 9 |       - requests
10 |       - gradio
11 |       - openai-whisper @ git+https://github.com/openai/whisper.git
12 |       - yt_dlp
13 |       - tqdm


--------------------------------------------------------------------------------
/packages.txt:
--------------------------------------------------------------------------------
1 | chromium-driver


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | gradio
3 | openai-whisper @ git+https://github.com/openai/whisper.git
4 | tqdm
5 | yt_dlp


--------------------------------------------------------------------------------