├── README.md ├── .gitignore ├── LICENSE └── autotranslate.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # **autotranslate** 2 | 3 | Videos Transcription and Translation with Faster Whisper and ChatGPT 4 | 5 | [![notebook shield](https://img.shields.io/static/v1?label=&message=Notebook&color=blue&style=for-the-badge&logo=googlecolab&link=https://colab.research.google.com/github/lewangdev/autotranslate/blob/main/autotranslate.ipynb)](https://colab.research.google.com/github/lewangdev/autotranslate/blob/main/autotranslate.ipynb) 6 | [![repository shield](https://img.shields.io/static/v1?label=&message=Repository&color=blue&style=for-the-badge&logo=github&link=https://github.com/lewangdev/autotranslate)](https://github.com/lewangdev/autotranslate) 7 | 8 | This Notebook will guide you through the transcription and translation of video using [Faster Whisper](https://github.com/SYSTRAN/faster-whisper) and ChatGPT. You'll be able to explore most inference parameters or use the Notebook as-is to store the transcript, translation and video audio in your Google Drive. 9 | 10 | ## Supported/Tested Platforms 11 | 12 | * [x] Windows 11 Pro/RTX3060 13 | * [x] Colab 14 | 15 | ## Supported Local Video Files 16 | 17 | * [x] Google Drive 18 | 19 | ## Supported Sites 20 | 21 | * [x] YouTube 22 | * [x] Twitter 23 | * [x] Bilibili 24 | * [x] DeepLearning.AI 25 | * [x] [More supported sites](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md) 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /autotranslate.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "96kvih9mXkNN" 7 | }, 8 | "source": [ 9 | "# **Videos Transcription and Translation with Faster Whisper and ChatGPT**\n", 10 | "\n", 11 | "\n", 12 | "[![notebook shield](https://img.shields.io/static/v1?label=&message=Notebook&color=blue&style=for-the-badge&logo=googlecolab&link=https://colab.research.google.com/github/lewangdev/autotranslate/blob/main/autotranslate.ipynb)](https://colab.research.google.com/github/lewangdev/autotranslate/blob/main/autotranslate.ipynb)\n", 13 | "[![repository shield](https://img.shields.io/static/v1?label=&message=Repository&color=blue&style=for-the-badge&logo=github&link=https://github.com/lewangdev/autotranslate)](https://github.com/lewangdev/autotranslate)\n", 14 | "\n", 15 | "This Notebook will guide you through the transcription and translation of video using [Faster Whisper](https://github.com/guillaumekln/faster-whisper) and ChatGPT. You'll be able to explore most inference parameters or use the Notebook as-is to store the transcript, translation and video audio in your Google Drive." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "cellView": "form", 23 | "id": "QshUbLqpX7L4" 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "#@markdown # **Check GPU type** 🕵️\n", 28 | "\n", 29 | "#@markdown The type of GPU you get assigned in your Colab session defined the speed at which the video will be transcribed.\n", 30 | "#@markdown The higher the number of floating point operations per second (FLOPS), the faster the transcription.\n", 31 | "#@markdown But even the least powerful GPU available in Colab is able to run any Whisper model.\n", 32 | "#@markdown Make sure you've selected `GPU` as hardware accelerator for the Notebook (Runtime → Change runtime type → Hardware accelerator).\n", 33 | "\n", 34 | "#@markdown | GPU | GPU RAM | FP32 teraFLOPS | Availability |\n", 35 | "#@markdown |:------:|:----------:|:--------------:|:------------------:|\n", 36 | "#@markdown | T4 | 16 GB | 8.1 | Free |\n", 37 | "#@markdown | P100 | 16 GB | 10.6 | Colab Pro |\n", 38 | "#@markdown | V100 | 16 GB | 15.7 | Colab Pro (Rare) |\n", 39 | "\n", 40 | "#@markdown ---\n", 41 | "#@markdown **Factory reset your Notebook's runtime if you want to get assigned a new GPU.**\n", 42 | "\n", 43 | "!nvidia-smi -L\n", 44 | "\n", 45 | "!nvidia-smi" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "source": [ 51 | "#@markdown # **Install libraries** 🏗️\n", 52 | "#@markdown This cell will take a little while to download several libraries.\n", 53 | "\n", 54 | "#@markdown ---\n", 55 | "! pip install faster-whisper==0.10.0\n", 56 | "! pip install yt-dlp==2023.11.16\n", 57 | "! pip install openai==0.28.1\n", 58 | "\n", 59 | "# Windows Libs:https://github.com/Purfview/whisper-standalone-win/releases/download/libs/cuBLAS.and.cuDNN_CUDA11_win_v2.7z\n", 60 | "! apt install -y p7zip-full p7zip-rar\n", 61 | "! wget https://github.com/Purfview/whisper-standalone-win/releases/download/libs/cuBLAS.and.cuDNN_CUDA11_linux_v2.7z\n", 62 | "! 7z x cuBLAS.and.cuDNN_CUDA11_linux_v2.7z -o/usr/lib\n", 63 | "\n" 64 | ], 65 | "metadata": { 66 | "cellView": "form", 67 | "id": "DDX38HH5xLot" 68 | }, 69 | "execution_count": null, 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "cellView": "form", 77 | "id": "IfG0E_WbRFI0" 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "#@markdown # **Import libraries for Python** 🐍\n", 82 | "\n", 83 | "#@markdown This cell will import all libraries for python code.\n", 84 | "import sys\n", 85 | "import warnings\n", 86 | "from faster_whisper import WhisperModel\n", 87 | "from pathlib import Path\n", 88 | "import yt_dlp\n", 89 | "import subprocess\n", 90 | "import torch\n", 91 | "import shutil\n", 92 | "import numpy as np\n", 93 | "from IPython.display import display, Markdown, YouTubeVideo\n", 94 | "\n", 95 | "device = torch.device('cuda:0')\n", 96 | "print('Using device:', device, file=sys.stderr)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "cellView": "form", 104 | "id": "1zwGAsr4sIgd" 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "#@markdown # **Optional:** Save data in Google Drive 💾\n", 109 | "#@markdown Enter a Google Drive path and run this cell if you want to store the results inside Google Drive.\n", 110 | "\n", 111 | "# Uncomment to copy generated images to drive, faster than downloading directly from colab in my experience.\n", 112 | "from google.colab import drive\n", 113 | "drive_mount_path = Path(\"/\") / \"content\" / \"drive\"\n", 114 | "drive.mount(str(drive_mount_path))\n", 115 | "drive_mount_path /= \"My Drive\"\n", 116 | "#@markdown ---\n", 117 | "drive_path = \"Colab Notebooks/Videos Transcription and Translation\" #@param {type:\"string\"}\n", 118 | "#@markdown ---\n", 119 | "#@markdown **Run this cell again if you change your Google Drive path.**\n", 120 | "\n", 121 | "drive_whisper_path = drive_mount_path / Path(drive_path.lstrip(\"/\"))\n", 122 | "drive_whisper_path.mkdir(parents=True, exist_ok=True)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "cellView": "form", 130 | "id": "TMhrSq_GZ6kA" 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "#@markdown # **Model selection** 🧠\n", 135 | "\n", 136 | "#@markdown As of the first public release, there are 4 pre-trained options to play with:\n", 137 | "\n", 138 | "#@markdown | Size | Parameters | English-only model | Multilingual model | Required VRAM | Relative speed |\n", 139 | "#@markdown |:------:|:----------:|:------------------:|:------------------:|:-------------:|:--------------:|\n", 140 | "#@markdown | tiny | 39 M | `tiny.en` | `tiny` | ~0.8 GB | ~32x |\n", 141 | "#@markdown | base | 74 M | `base.en` | `base` | ~1.0 GB | ~16x |\n", 142 | "#@markdown | small | 244 M | `small.en` | `small` | ~1.4 GB | ~6x |\n", 143 | "#@markdown | medium | 769 M | `medium.en` | `medium` | ~2.7 GB | ~2x |\n", 144 | "#@markdown | large-v1 | 1550 M | N/A | `large-v1` | ~4.3 GB | 1x |\n", 145 | "#@markdown | large-v2 | 1550 M | N/A | `large-v2` | ~4.3 GB | 1x |\n", 146 | "#@markdown | large-v3 | 1550 M | N/A | `large-v2` | ~3.6 GB | 1x |\n", 147 | "\n", 148 | "#@markdown ---\n", 149 | "model_size = 'large-v2' #@param ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v1', 'large-v2', 'large-v3']\n", 150 | "device_type = \"cuda\" #@param {type:\"string\"} ['cuda', 'cpu']\n", 151 | "compute_type = \"float16\" #@param {type:\"string\"} ['float16', 'int8_float16', 'int8']\n", 152 | "#@markdown ---\n", 153 | "#@markdown **Run this cell again if you change the model.**\n", 154 | "\n", 155 | "model = WhisperModel(model_size, device=device_type, compute_type=compute_type)\n" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "cellView": "form", 163 | "id": "xYLPZQX9S7tU" 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "#@markdown # **Video selection** 📺\n", 168 | "\n", 169 | "#@markdown Enter the URL of the video you want to transcribe, wether you want to save the audio file in your Google Drive, and run the cell.\n", 170 | "\n", 171 | "Type = \"Video or playlist URL\" #@param ['Video or playlist URL', 'Google Drive']\n", 172 | "#@markdown ---\n", 173 | "#@markdown #### **Video or playlist URL**\n", 174 | "URL = \"https://dft3h5i221ap1.cloudfront.net/OpenAI/c2/video/sc-openai-c2-L5-vid6_2.mp4\" #@param {type:\"string\"}\n", 175 | "# store_audio = True #@param {type:\"boolean\"}\n", 176 | "#@markdown ---\n", 177 | "#@markdown #### **Google Drive video, audio (mp4, wav), or folder containing video and/or audio files**\n", 178 | "video_path = \"Colab Notebooks/transcription/my_video.mp4\" #@param {type:\"string\"}\n", 179 | "#@markdown ---\n", 180 | "#@markdown **Run this cell again if you change the video.**\n", 181 | "\n", 182 | "video_path_local_list = []\n", 183 | "\n", 184 | "if Type == \"Video or playlist URL\":\n", 185 | "\n", 186 | " ydl_opts = {\n", 187 | " 'format': 'm4a/bestaudio/best',\n", 188 | " 'outtmpl': '%(id)s.%(ext)s',\n", 189 | " # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments\n", 190 | " 'postprocessors': [{ # Extract audio using ffmpeg\n", 191 | " 'key': 'FFmpegExtractAudio',\n", 192 | " 'preferredcodec': 'wav',\n", 193 | " }]\n", 194 | " }\n", 195 | "\n", 196 | " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", 197 | " error_code = ydl.download([URL])\n", 198 | " list_video_info = [ydl.extract_info(URL, download=False)]\n", 199 | "\n", 200 | " for video_info in list_video_info:\n", 201 | " video_path_local_list.append(Path(f\"{video_info['id']}.wav\"))\n", 202 | "\n", 203 | "elif Type == \"Google Drive\":\n", 204 | " # video_path_drive = drive_mount_path / Path(video_path.lstrip(\"/\"))\n", 205 | " video_path = drive_mount_path / Path(video_path.lstrip(\"/\"))\n", 206 | " if video_path.is_dir():\n", 207 | " for video_path_drive in video_path.glob(\"**/*\"):\n", 208 | " if video_path_drive.is_file():\n", 209 | " display(Markdown(f\"**{str(video_path_drive)} selected for transcription.**\"))\n", 210 | " elif video_path_drive.is_dir():\n", 211 | " display(Markdown(f\"**Subfolders not supported.**\"))\n", 212 | " else:\n", 213 | " display(Markdown(f\"**{str(video_path_drive)} does not exist, skipping.**\"))\n", 214 | " video_path_local = Path(\".\").resolve() / (video_path_drive.name)\n", 215 | " shutil.copy(video_path_drive, video_path_local)\n", 216 | " video_path_local_list.append(video_path_local)\n", 217 | " elif video_path.is_file():\n", 218 | " video_path_local = Path(\".\").resolve() / (video_path.name)\n", 219 | " shutil.copy(video_path, video_path_local)\n", 220 | " video_path_local_list.append(video_path_local)\n", 221 | " display(Markdown(f\"**{str(video_path)} selected for transcription.**\"))\n", 222 | " else:\n", 223 | " display(Markdown(f\"**{str(video_path)} does not exist.**\"))\n", 224 | "\n", 225 | "else:\n", 226 | " raise(TypeError(\"Please select supported input type.\"))\n", 227 | "\n", 228 | "for video_path_local in video_path_local_list:\n", 229 | " if video_path_local.suffix == \".mp4\":\n", 230 | " video_path_local = video_path_local.with_suffix(\".wav\")\n", 231 | " result = subprocess.run([\"ffmpeg\", \"-i\", str(video_path_local.with_suffix(\".mp4\")), \"-vn\", \"-acodec\", \"pcm_s16le\", \"-ar\", \"16000\", \"-ac\", \"1\", str(video_path_local)])\n" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": { 238 | "cellView": "form", 239 | "collapsed": true, 240 | "id": "Ad6n1m4deAHp", 241 | "jupyter": { 242 | "outputs_hidden": true 243 | } 244 | }, 245 | "outputs": [], 246 | "source": [ 247 | "#@markdown # **Run the model** 🚀\n", 248 | "\n", 249 | "#@markdown Run this cell to execute the transcription of the video. This can take a while and very based on the length of the video and the number of parameters of the model selected above.\n", 250 | "def seconds_to_time_format(s):\n", 251 | " # Convert seconds to hours, minutes, seconds, and milliseconds\n", 252 | " hours = s // 3600\n", 253 | " s %= 3600\n", 254 | " minutes = s // 60\n", 255 | " s %= 60\n", 256 | " seconds = s // 1\n", 257 | " milliseconds = round((s % 1) * 1000)\n", 258 | "\n", 259 | " # Return the formatted string\n", 260 | " return f\"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}\"\n", 261 | "\n", 262 | "\n", 263 | "#@markdown ## **Parameters** ⚙️\n", 264 | "\n", 265 | "#@markdown ### **Behavior control**\n", 266 | "#@markdown #### Language\n", 267 | "language_options = {\n", 268 | " \"Auto Detect\": \"auto\",\n", 269 | " \"English\": \"en\",\n", 270 | " \"中文(Chinese)\": \"zh\",\n", 271 | " \"日本語(Japanese)\": \"ja\",\n", 272 | " \"Deutsch(German)\": \"de\",\n", 273 | " \"Français(French)\": \"fr\"\n", 274 | "}\n", 275 | "\n", 276 | "language_option = \"Auto Detect\" #@param [\"Auto Detect\", \"English\", \"中文(Chinese)\", \"日本語(Japanese)\", \"Deutsch(German)\", \"Français(French)\"] {allow-input: true}\n", 277 | "language = language_options.get(language_option, language_option)\n", 278 | "\n", 279 | "#@markdown #### initial prompt\n", 280 | "initial_prompt = \"Hello, Let's begin to talk.\" #@param {type:\"string\"}\n", 281 | "#@markdown ---\n", 282 | "#@markdown #### Word-level timestamps\n", 283 | "word_level_timestamps = True #@param {type:\"boolean\"}\n", 284 | "#@markdown ---\n", 285 | "#@markdown #### VAD filter\n", 286 | "vad_filter = False #@param {type:\"boolean\"}\n", 287 | "vad_filter_min_silence_duration_ms = 50 #@param {type:\"integer\"}\n", 288 | "#@markdown ---\n", 289 | "\n", 290 | "\n", 291 | "segments, info = model.transcribe(str(video_path_local), beam_size=5,\n", 292 | " language=None if language == \"auto\" else language,\n", 293 | " initial_prompt=initial_prompt,\n", 294 | " word_timestamps=word_level_timestamps,\n", 295 | " vad_filter=vad_filter,\n", 296 | " vad_parameters=dict(min_silence_duration_ms=vad_filter_min_silence_duration_ms))\n", 297 | "\n", 298 | "language_detected = info.language\n", 299 | "display(Markdown(f\"Detected language '{info.language}' with probability {info.language_probability}\"))\n", 300 | "\n", 301 | "fragments = []\n", 302 | "\n", 303 | "for segment in segments:\n", 304 | " print(f\"[{seconds_to_time_format(segment.start)} --> {seconds_to_time_format(segment.end)}] {segment.text}\")\n", 305 | " if word_level_timestamps:\n", 306 | " for word in segment.words:\n", 307 | " ts_start = seconds_to_time_format(word.start)\n", 308 | " ts_end = seconds_to_time_format(word.end)\n", 309 | " #print(f\"[{ts_start} --> {ts_end}] {word.word}\")\n", 310 | " fragments.append(dict(start=word.start,end=word.end,text=word.word))\n", 311 | " else:\n", 312 | " ts_start = seconds_to_time_format(segment.start)\n", 313 | " ts_end = seconds_to_time_format(segment.end)\n", 314 | " #print(f\"[{ts_start} --> {ts_end}] {segment.text}\")\n", 315 | " fragments.append(dict(start=segment.start,end=segment.end,text=segment.text))\n" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": { 322 | "cellView": "form", 323 | "id": "v9214wd0Nk5J" 324 | }, 325 | "outputs": [], 326 | "source": [ 327 | "#@title Merge words/segments to sentences\n", 328 | "\n", 329 | "#@markdown Run this cell to merge words/segments to sentences.\n", 330 | "#@markdown ## **Parameters** ⚙️\n", 331 | "\n", 332 | "#@markdown ### **Behavior control**\n", 333 | "#@markdown #### Milliseconds gap between_two sentences\n", 334 | "max_gap_ms_between_two_sentence = 200 #@param {type:\"integer\"}\n", 335 | "\n", 336 | "import json\n", 337 | "\n", 338 | "# Merge words/segments to sentences\n", 339 | "def merge_fragments(fragments, gap_ms):\n", 340 | " new_fragments = []\n", 341 | " new_fragment = {}\n", 342 | " length = len(fragments)\n", 343 | " for i, fragment in enumerate(fragments):\n", 344 | " start = fragment['start']\n", 345 | " end = fragment['end']\n", 346 | " text = fragment['text']\n", 347 | "\n", 348 | " if new_fragment.get('start', None) is None:\n", 349 | " new_fragment['start'] = start\n", 350 | " if new_fragment.get('end', None) is None:\n", 351 | " new_fragment['end'] = end\n", 352 | " if new_fragment.get('text', None) is None:\n", 353 | " new_fragment['text'] = \"\"\n", 354 | "\n", 355 | " if start - new_fragment['end'] > gap_ms:\n", 356 | " new_fragments.append(new_fragment)\n", 357 | " new_fragment = dict(start=start, end=end, text=text)\n", 358 | " continue\n", 359 | "\n", 360 | " new_fragment['end'] = end\n", 361 | "\n", 362 | " #delimiter = '' if text.startswith('-') else ' '\n", 363 | " delimiter = ' ' if language_detected in ['en', 'de', 'fr'] else ''\n", 364 | " new_fragment['text'] = f\"{new_fragment['text']}{delimiter}{text.lstrip()}\"\n", 365 | "\n", 366 | " # End of a sentence when symbols found: [.?]\n", 367 | " if (len(text) > 0 and text[-1] in ['.', '?', '。', '?', '!', '!']) or i == length-1:\n", 368 | " new_fragments.append(new_fragment)\n", 369 | " new_fragment = {}\n", 370 | " return new_fragments\n", 371 | "\n", 372 | "\n", 373 | "new_fragments = merge_fragments(fragments, max_gap_ms_between_two_sentence/1000.0)\n", 374 | "\n", 375 | "# Save as json file\n", 376 | "json_ext_name = \".json\"\n", 377 | "json_transcript_file_name = video_path_local.stem + json_ext_name\n", 378 | "with open(json_transcript_file_name, 'w') as f:\n", 379 | " f.write(json.dumps(new_fragments))\n", 380 | "display(Markdown(f\"**Transcript SRT file created: {video_path_local.parent / json_transcript_file_name}**\"))\n", 381 | "\n", 382 | "# Save as srt\n", 383 | "srt_ext_name = \".srt\"\n", 384 | "srt_transcript_file_name = video_path_local.stem + srt_ext_name\n", 385 | "with open(srt_transcript_file_name, 'w') as f:\n", 386 | " for sentence_idx, fragment in enumerate(new_fragments):\n", 387 | " ts_start = seconds_to_time_format(fragment['start'])\n", 388 | " ts_end = seconds_to_time_format(fragment['end'])\n", 389 | " text = fragment['text']\n", 390 | " print(f\"[{ts_start} --> {ts_end}] {text}\")\n", 391 | " f.write(f\"{sentence_idx + 1}\\n\")\n", 392 | " f.write(f\"{ts_start} --> {ts_end}\\n\")\n", 393 | " f.write(f\"{text.strip()}\\n\\n\")\n", 394 | "\n", 395 | "try:\n", 396 | " shutil.copy(video_path_local.parent / srt_transcript_file_name,\n", 397 | " drive_whisper_path / srt_transcript_file_name\n", 398 | " )\n", 399 | " display(Markdown(f\"**Transcript SRT file created: {drive_whisper_path / srt_transcript_file_name}**\"))\n", 400 | "except:\n", 401 | " display(Markdown(f\"**Transcript SRT file created: {video_path_local.parent / srt_transcript_file_name}**\"))\n" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "metadata": { 408 | "cellView": "form", 409 | "collapsed": true, 410 | "id": "L3lwd7ZF1SRX", 411 | "jupyter": { 412 | "outputs_hidden": true 413 | } 414 | }, 415 | "outputs": [], 416 | "source": [ 417 | "#@markdown # **Translate**\n", 418 | "#@markdown Run this cell to translate subtitles to the language you want.\n", 419 | "#@markdown ## **Parameters** ⚙️\n", 420 | "\n", 421 | "#@markdown ### **Behavior control**\n", 422 | "\n", 423 | "#@markdown #### API Type\n", 424 | "api_type = \"openai\" #@param [\"azure\", \"openai\"]\n", 425 | "\n", 426 | "#@markdown #### Azure API Config(If you are using `openai`, please leave these fields blank.)\n", 427 | "api_base = \"https://xxxxxx.openai.azure.com\" #@param {type:\"string\"}\n", 428 | "api_version = \"2023-05-15\" #@param {type:\"string\"}\n", 429 | "deployment_id = \"gpt3\" #@param {type:\"string\"}\n", 430 | "\n", 431 | "#@markdown #### API Key and Model Config\n", 432 | "api_key = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\" #@param {type:\"string\"}\n", 433 | "model_name = \"gpt-3.5-turbo-1106\" #@param [\"gpt-3.5-turbo\",\"gpt-3.5-turbo-1106\",\"gpt-4\",\"gpt-4-1106-preview\"] {allow-input: true}\n", 434 | "temperature = 0 #@param {type:\"number\"}\n", 435 | "#@markdown ---\n", 436 | "#@markdown #### Target Language\n", 437 | "target_language = \"\\u7B80\\u4F53\\u4E2D\\u6587\" # @param [\"\\u7B80\\u4F53\\u4E2D\\u6587\", \"\\u7E41\\u9AD4\\u4E2D\\u6587\", \"\\u65E5\\u672C\\u8A9E\", \"English\", \"German\", \"French\"] {allow-input: true}\n", 438 | "#@markdown ---\n", 439 | "#@markdown #### Retry and Token Chunks\n", 440 | "translate_max_retry_times = 10 #@param {type:\"integer\"}\n", 441 | "count_of_sentence_send_once_limit = 5 #@param {type:\"integer\"}\n", 442 | "\n", 443 | "# This prompt is from https://twitter.com/dotey/status/1665476562219573249\n", 444 | "system_prompt = f\"\"\"You are a program responsible for translating subtitles. Your task is to translate the subtitles into {target_language}, maintaining a colloquial tone and style, avoiding long sentences, and ignoring verbal tics such as 'so', 'you know', etc.\n", 445 | "The input will be a JSON-formatted string array, which should be translated in accordance with the following steps:\n", 446 | "Step1: Join the string array to a sentence, then translate it to {target_language};\n", 447 | "Step2: Split the translated sentence to a string array, each item of which should correspond to an item in the original input array.\n", 448 | "Step3: Verify if the count of items in the output array equals that of the input array and no item is blank. If it doesn't, go back to Step 2 and try again.\n", 449 | "\n", 450 | "Respond with a JSON-formatted string array:\n", 451 | "\"\"\"\n", 452 | "import openai\n", 453 | "import json\n", 454 | "\n", 455 | "openai.api_key = api_key\n", 456 | "\n", 457 | "if api_type == \"azure\":\n", 458 | " openai.api_type = \"azure\"\n", 459 | " openai.api_base = api_base\n", 460 | " openai.api_version = api_version\n", 461 | "else:\n", 462 | " deployment_id = None\n", 463 | "\n", 464 | "\n", 465 | "def translate_by_chatgpt(sentences, max_retry_times=10, deployment_id=None, model_name=\"gpt-3.5-turbo\", temperature=0.7):\n", 466 | " system_msg = dict(role=\"system\", content=system_prompt)\n", 467 | " user_msg_content = json.dumps(sentences)\n", 468 | " user_msg = dict(role=\"user\", content=user_msg_content)\n", 469 | " current_retry_times = 0\n", 470 | " sentences_translated = []\n", 471 | "\n", 472 | " while True:\n", 473 | " try:\n", 474 | " chat_completion = openai.ChatCompletion.create(deployment_id=deployment_id,\n", 475 | " model=model_name,\n", 476 | " messages=[system_msg, user_msg],\n", 477 | " temperature=temperature)\n", 478 | " sentences_translated = json.loads(chat_completion.choices[0].message.content)\n", 479 | "\n", 480 | " if len(sentences_translated) != len(sentences) and current_retry_times < max_retry_times:\n", 481 | " current_retry_times = current_retry_times + 1\n", 482 | " print(f\"==Tranlate Retry with {current_retry_times} times, Reason: translated={len(sentences_translated)}, origin={len(sentences)}\")\n", 483 | " continue\n", 484 | "\n", 485 | " break\n", 486 | " except:\n", 487 | " if current_retry_times >= max_retry_times:\n", 488 | " break\n", 489 | " current_retry_times = current_retry_times + 1\n", 490 | " print(f\"==Tranlate Retry with {current_retry_times} times\")\n", 491 | " continue\n", 492 | " return sentences_translated\n", 493 | "\n", 494 | "def translate_fragments(fragments, sentence_send_limit=5):\n", 495 | " system_msg = dict(role=\"system\", content=system_prompt)\n", 496 | " fragments_translated = []\n", 497 | "\n", 498 | " # Todo: The count of tokens in sentences must be less than Max Tokens API allowed\n", 499 | " length = len(fragments)\n", 500 | " for n in range(0, length, sentence_send_limit):\n", 501 | " fragments_will_be_translated = fragments[n:n+sentence_send_limit]\n", 502 | " sentences_translated = translate_by_chatgpt(list(map(lambda x: x['text'], fragments_will_be_translated)),\n", 503 | " translate_max_retry_times,\n", 504 | " deployment_id,\n", 505 | " model_name)\n", 506 | "\n", 507 | " for i, sentence_translated in enumerate(sentences_translated):\n", 508 | " print(f\"{seconds_to_time_format(fragments_will_be_translated[i]['start'])} --> {seconds_to_time_format(fragments_will_be_translated[i]['end'])}\")\n", 509 | " print(\"Original : \" + fragments_will_be_translated[i]['text'].lstrip())\n", 510 | " print(\"Translated: \" + sentence_translated)\n", 511 | " print('\\n')\n", 512 | " fragments_will_be_translated[i]['text_translated'] = sentence_translated\n", 513 | "\n", 514 | " fragments_translated.extend(fragments_will_be_translated)\n", 515 | "\n", 516 | " return fragments_translated\n", 517 | "\n", 518 | "fragments_translated = translate_fragments(new_fragments, count_of_sentence_send_once_limit)\n", 519 | "\n", 520 | "# Save translation as json file\n", 521 | "json_translated_file_name = f\"{video_path_local.stem}-translated.json\"\n", 522 | "with open(json_translated_file_name, 'w') as f:\n", 523 | " f.write(json.dumps(new_fragments))\n", 524 | "display(Markdown(f\"**Translation JSON file created: {video_path_local.parent / json_translated_file_name}**\"))\n", 525 | "\n", 526 | "# Save translation as srt file\n", 527 | "srt_translated_file_name = f\"{video_path_local.stem}-translated.srt\"\n", 528 | "with open(srt_translated_file_name, 'w') as f:\n", 529 | " for sentence_idx, fragment in enumerate(fragments_translated):\n", 530 | " ts_start = seconds_to_time_format(fragment['start'])\n", 531 | " ts_end = seconds_to_time_format(fragment['end'])\n", 532 | " text = fragment.get('text', '')\n", 533 | " text_translated = fragment.get('text_translated', '')\n", 534 | " f.write(f\"{sentence_idx + 1}\\n\")\n", 535 | " f.write(f\"{ts_start} --> {ts_end}\\n\")\n", 536 | " f.write(f\"{text_translated.strip()}\\n\")\n", 537 | " f.write(f\"{text.strip()}\\n\\n\")\n", 538 | "\n", 539 | "try:\n", 540 | " shutil.copy(video_path_local.parent / srt_translated_file_name,\n", 541 | " drive_whisper_path / srt_translated_file_name\n", 542 | " )\n", 543 | " display(Markdown(f\"**Translated SRT file created: {drive_whisper_path / srt_translated_file_name}**\"))\n", 544 | "except:\n", 545 | " display(Markdown(f\"**Translated SRT file created: {video_path_local.parent / srt_translated_file_name}**\"))\n", 546 | "\n" 547 | ] 548 | } 549 | ], 550 | "metadata": { 551 | "accelerator": "GPU", 552 | "colab": { 553 | "provenance": [] 554 | }, 555 | "kernelspec": { 556 | "display_name": "Python 3 (ipykernel)", 557 | "language": "python", 558 | "name": "python3" 559 | }, 560 | "language_info": { 561 | "codemirror_mode": { 562 | "name": "ipython", 563 | "version": 3 564 | }, 565 | "file_extension": ".py", 566 | "mimetype": "text/x-python", 567 | "name": "python", 568 | "nbconvert_exporter": "python", 569 | "pygments_lexer": "ipython3", 570 | "version": "3.11.3" 571 | } 572 | }, 573 | "nbformat": 4, 574 | "nbformat_minor": 0 575 | } 576 | --------------------------------------------------------------------------------