├── .gitignore ├── LICENSE ├── README.md └── yt2srt.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Vitor Freitas 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YouTube Transcript --> .srt 2 | 3 | Convert YouTube transcriptions to `.srt` files 4 | 5 | 1. Copy the transcripts from YouTube and save in a `.txt` file in the same directory as the `yt2srt.py` script. 6 | 7 | Sample input file: 8 | 9 | **transcript.txt** 10 | 11 | ``` 12 | 00:00 13 | "Who Shouldn't Consume Curcumin or Turmeric" 14 | 00:07 15 | Following flax and wheatgrass, 16 | 00:09 17 | turmeric is the third best-selling 18 | 00:11 19 | botanical dietary supplement, 20 | ``` 21 | 22 | 2. Execute the `yt2srt.py` script. It will collect all `.txt` files in the same directory and try to convert to `.srt`: 23 | 24 | ``` 25 | python3 yt2srt.py 26 | ``` 27 | 28 | 3. An output file with the same name and `.srt` is created. 29 | 30 | Sample output file: 31 | 32 | **transcript.srt** 33 | 34 | ``` 35 | 1 36 | 00:00:00,000 --> 00:00:07,000 37 | "Who Shouldn't Consume Curcumin or Turmeric" 38 | 39 | 2 40 | 00:00:07,000 --> 00:00:09,000 41 | Following flax and wheatgrass, 42 | 43 | 3 44 | 00:00:09,000 --> 00:00:11,000 45 | turmeric is the third best-selling 46 | 47 | 4 48 | 00:00:11,000 --> 00:00:12,000 49 | botanical dietary supplement, 50 | ``` 51 | 52 | 4. Manually adjust the the end time of the last entry in the `.srt` file for a better result. 53 | -------------------------------------------------------------------------------- /yt2srt.py: -------------------------------------------------------------------------------- 1 | import glob 2 | 3 | 4 | def convert_youtube_subtitle_to_srt(youtube_subtitles_filename): 5 | base_filename = youtube_subtitles_filename.split('.')[0] 6 | srt_out_filename = '%s.srt' % base_filename 7 | subtitles = list() 8 | with open(youtube_subtitles_filename, 'r') as infile: 9 | lines = infile.readlines() 10 | previous = None 11 | for index, line in enumerate(lines): 12 | text = ' '.join(line.split()) 13 | if index % 2 == 0: 14 | entry = {'start_time': text} 15 | if previous is not None: 16 | previous['end_time'] = text 17 | else: 18 | entry['subtitle'] = text 19 | subtitles.append(entry) 20 | previous = entry 21 | if previous is not None: 22 | previous['end_time'] = previous['start_time'] 23 | 24 | with open(srt_out_filename, 'w') as outfile: 25 | for index, entry in enumerate(subtitles): 26 | outfile.write('{0}\n'.format(index + 1)) 27 | outfile.write('00:{0},000 --> 00:{1},000\n'.format(entry['start_time'], entry['end_time'])) 28 | outfile.write('{0}\n'.format(entry['subtitle'])) 29 | outfile.write('\n') 30 | 31 | 32 | def main(): 33 | files = glob.glob('*.txt') 34 | for filename in files: 35 | convert_youtube_subtitle_to_srt(filename) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | --------------------------------------------------------------------------------