├── .dockerignore ├── .editorconfig ├── .github ├── CODEOWNERS └── workflows │ ├── ci-main.yml │ └── ci.yml ├── .gitignore ├── .style.yapf ├── .vscode └── settings.json ├── Dockerfile ├── LICENSE ├── Makefile ├── Makefile.venv ├── README.md ├── app.py ├── entrypoint.sh ├── images ├── notion_screenshot.png └── youtube_screenshot.png ├── requirements.txt ├── tests └── youtube2notion │ ├── test_markdown.py │ ├── test_youtube2notion.py │ ├── test_youtube_info.py │ ├── test_youtube_subtitle.py │ └── test_youtube_video.py ├── youtube2notion.py └── youtube2notion ├── __init__.py ├── ffmpeg.py ├── markdown.py ├── youtube2notion.py ├── youtube_info.py ├── youtube_subtitle.py └── youtube_video.py /.dockerignore: -------------------------------------------------------------------------------- 1 | tmp/ 2 | .venv/ 3 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | indent_style = space 6 | indent_size = 4 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [Makefile] 12 | indent_style = tab 13 | indent_size = 4 14 | 15 | [*.{yml,yaml}] 16 | indent_style = space 17 | indent_size = 2 18 | 19 | [*.{md,markdown}] 20 | indent_style = space 21 | indent_size = 2 22 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @xissy @gnokoheat 2 | -------------------------------------------------------------------------------- /.github/workflows/ci-main.yml: -------------------------------------------------------------------------------- 1 | name: ci-main 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'main' 7 | 8 | jobs: 9 | build: 10 | name: Build and Push 11 | runs-on: ubuntu-20.04 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v2 15 | with: 16 | fetch-depth: 5 17 | 18 | - name: Set up QEMU 19 | uses: docker/setup-qemu-action@v1 20 | 21 | - name: Set up Docker Buildx 22 | uses: docker/setup-buildx-action@v1 23 | 24 | - name: Login to DockerHub 25 | uses: docker/login-action@v1 26 | with: 27 | username: ${{ secrets.DOCKERHUB_USERNAME }} 28 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 29 | 30 | - name: Build and push 31 | uses: docker/build-push-action@v2 32 | id: docker_build 33 | with: 34 | context: . 35 | platforms: linux/amd64,linux/arm64 36 | push: true 37 | tags: | 38 | taehoio/youtube2notion:latest 39 | taehoio/youtube2notion:${{ github.sha }} 40 | 41 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: 4 | push: 5 | branches: 6 | - '*' 7 | 8 | jobs: 9 | build: 10 | name: Build and Push 11 | runs-on: ubuntu-20.04 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v2 15 | with: 16 | fetch-depth: 5 17 | 18 | - name: Set up QEMU 19 | uses: docker/setup-qemu-action@v1 20 | 21 | - name: Set up Docker Buildx 22 | uses: docker/setup-buildx-action@v1 23 | 24 | - name: Set up Google Cloud SDK 25 | uses: google-github-actions/setup-gcloud@master 26 | with: 27 | project_id: taehoio-global 28 | service_account_key: ${{ secrets.GCP_SA_KEY }} 29 | export_default_credentials: true 30 | 31 | - name: Configure gcloud docker 32 | run: | 33 | gcloud auth configure-docker asia-northeast1-docker.pkg.dev 34 | 35 | - name: Build and push 36 | uses: docker/build-push-action@v2 37 | id: docker_build 38 | with: 39 | context: . 40 | platforms: linux/amd64,linux/arm64 41 | push: true 42 | tags: | 43 | asia-northeast1-docker.pkg.dev/taehoio-global/docker-registry/youtube2notion:${{ github.sha }} 44 | 45 | lint: 46 | name: Lint 47 | runs-on: ubuntu-20.04 48 | steps: 49 | - name: Set up Python3 50 | uses: actions/setup-python@v2 51 | with: 52 | python-version: '3.x' 53 | architecture: 'x64' 54 | 55 | - name: Checkout 56 | uses: actions/checkout@v2 57 | with: 58 | fetch-depth: 5 59 | 60 | - name: Cache dependencies 61 | uses: actions/cache@v2 62 | with: 63 | path: ~/.cache/pip 64 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 65 | restore-keys: | 66 | ${{ runner.os }}-pip- 67 | 68 | - name: Lint 69 | run: | 70 | make lint 71 | 72 | unittest: 73 | name: Unit tests 74 | runs-on: ubuntu-20.04 75 | steps: 76 | - name: Set up Python3 77 | uses: actions/setup-python@v2 78 | with: 79 | python-version: '3.x' 80 | architecture: 'x64' 81 | 82 | - name: Set up ffmpeg 83 | uses: FedericoCarboni/setup-ffmpeg@v1 84 | 85 | - name: Checkout 86 | uses: actions/checkout@v2 87 | with: 88 | fetch-depth: 5 89 | 90 | - name: Cache dependencies 91 | uses: actions/cache@v2 92 | with: 93 | path: ~/.cache/pip 94 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 95 | restore-keys: | 96 | ${{ runner.os }}-pip- 97 | 98 | - name: Test 99 | run: | 100 | make test 101 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | tmp/ 3 | 4 | # Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,vim,windows,linux,macos,jetbrains+all 5 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,vim,windows,linux,macos,jetbrains+all 6 | 7 | ### JetBrains+all ### 8 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 9 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 10 | 11 | # User-specific stuff 12 | .idea/**/workspace.xml 13 | .idea/**/tasks.xml 14 | .idea/**/usage.statistics.xml 15 | .idea/**/dictionaries 16 | .idea/**/shelf 17 | 18 | # Generated files 19 | .idea/**/contentModel.xml 20 | 21 | # Sensitive or high-churn files 22 | .idea/**/dataSources/ 23 | .idea/**/dataSources.ids 24 | .idea/**/dataSources.local.xml 25 | .idea/**/sqlDataSources.xml 26 | .idea/**/dynamic.xml 27 | .idea/**/uiDesigner.xml 28 | .idea/**/dbnavigator.xml 29 | 30 | # Gradle 31 | .idea/**/gradle.xml 32 | .idea/**/libraries 33 | 34 | # Gradle and Maven with auto-import 35 | # When using Gradle or Maven with auto-import, you should exclude module files, 36 | # since they will be recreated, and may cause churn. Uncomment if using 37 | # auto-import. 38 | # .idea/artifacts 39 | # .idea/compiler.xml 40 | # .idea/jarRepositories.xml 41 | # .idea/modules.xml 42 | # .idea/*.iml 43 | # .idea/modules 44 | # *.iml 45 | # *.ipr 46 | 47 | # CMake 48 | cmake-build-*/ 49 | 50 | # Mongo Explorer plugin 51 | .idea/**/mongoSettings.xml 52 | 53 | # File-based project format 54 | *.iws 55 | 56 | # IntelliJ 57 | out/ 58 | 59 | # mpeltonen/sbt-idea plugin 60 | .idea_modules/ 61 | 62 | # JIRA plugin 63 | atlassian-ide-plugin.xml 64 | 65 | # Cursive Clojure plugin 66 | .idea/replstate.xml 67 | 68 | # Crashlytics plugin (for Android Studio and IntelliJ) 69 | com_crashlytics_export_strings.xml 70 | crashlytics.properties 71 | crashlytics-build.properties 72 | fabric.properties 73 | 74 | # Editor-based Rest Client 75 | .idea/httpRequests 76 | 77 | # Android studio 3.1+ serialized cache file 78 | .idea/caches/build_file_checksums.ser 79 | 80 | ### JetBrains+all Patch ### 81 | # Ignores the whole .idea folder and all .iml files 82 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 83 | 84 | .idea/ 85 | 86 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 87 | 88 | *.iml 89 | modules.xml 90 | .idea/misc.xml 91 | *.ipr 92 | 93 | # Sonarlint plugin 94 | .idea/sonarlint 95 | 96 | ### Linux ### 97 | *~ 98 | 99 | # temporary files which can be created if a process still has a handle open of a deleted file 100 | .fuse_hidden* 101 | 102 | # KDE directory preferences 103 | .directory 104 | 105 | # Linux trash folder which might appear on any partition or disk 106 | .Trash-* 107 | 108 | # .nfs files are created when an open file is removed but is still being accessed 109 | .nfs* 110 | 111 | ### macOS ### 112 | # General 113 | .DS_Store 114 | .AppleDouble 115 | .LSOverride 116 | 117 | # Icon must end with two \r 118 | Icon 119 | 120 | 121 | # Thumbnails 122 | ._* 123 | 124 | # Files that might appear in the root of a volume 125 | .DocumentRevisions-V100 126 | .fseventsd 127 | .Spotlight-V100 128 | .TemporaryItems 129 | .Trashes 130 | .VolumeIcon.icns 131 | .com.apple.timemachine.donotpresent 132 | 133 | # Directories potentially created on remote AFP share 134 | .AppleDB 135 | .AppleDesktop 136 | Network Trash Folder 137 | Temporary Items 138 | .apdisk 139 | 140 | ### Python ### 141 | # Byte-compiled / optimized / DLL files 142 | __pycache__/ 143 | *.py[cod] 144 | *$py.class 145 | 146 | # C extensions 147 | *.so 148 | 149 | # Distribution / packaging 150 | .Python 151 | build/ 152 | develop-eggs/ 153 | dist/ 154 | downloads/ 155 | eggs/ 156 | .eggs/ 157 | parts/ 158 | sdist/ 159 | var/ 160 | wheels/ 161 | pip-wheel-metadata/ 162 | share/python-wheels/ 163 | *.egg-info/ 164 | .installed.cfg 165 | *.egg 166 | MANIFEST 167 | 168 | # PyInstaller 169 | # Usually these files are written by a python script from a template 170 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 171 | *.manifest 172 | *.spec 173 | 174 | # Installer logs 175 | pip-log.txt 176 | pip-delete-this-directory.txt 177 | 178 | # Unit test / coverage reports 179 | htmlcov/ 180 | .tox/ 181 | .nox/ 182 | .coverage 183 | .coverage.* 184 | .cache 185 | nosetests.xml 186 | coverage.xml 187 | *.cover 188 | *.py,cover 189 | .hypothesis/ 190 | .pytest_cache/ 191 | pytestdebug.log 192 | 193 | # Translations 194 | *.mo 195 | *.pot 196 | 197 | # Django stuff: 198 | *.log 199 | local_settings.py 200 | db.sqlite3 201 | db.sqlite3-journal 202 | 203 | # Flask stuff: 204 | instance/ 205 | .webassets-cache 206 | 207 | # Scrapy stuff: 208 | .scrapy 209 | 210 | # Sphinx documentation 211 | docs/_build/ 212 | doc/_build/ 213 | 214 | # PyBuilder 215 | target/ 216 | 217 | # Jupyter Notebook 218 | .ipynb_checkpoints 219 | 220 | # IPython 221 | profile_default/ 222 | ipython_config.py 223 | 224 | # pyenv 225 | .python-version 226 | 227 | # pipenv 228 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 229 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 230 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 231 | # install all needed dependencies. 232 | #Pipfile.lock 233 | 234 | # poetry 235 | #poetry.lock 236 | 237 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 238 | __pypackages__/ 239 | 240 | # Celery stuff 241 | celerybeat-schedule 242 | celerybeat.pid 243 | 244 | # SageMath parsed files 245 | *.sage.py 246 | 247 | # Environments 248 | # .env 249 | .env/ 250 | .venv/ 251 | env/ 252 | venv/ 253 | ENV/ 254 | env.bak/ 255 | venv.bak/ 256 | pythonenv* 257 | 258 | # Spyder project settings 259 | .spyderproject 260 | .spyproject 261 | 262 | # Rope project settings 263 | .ropeproject 264 | 265 | # mkdocs documentation 266 | /site 267 | 268 | # mypy 269 | .mypy_cache/ 270 | .dmypy.json 271 | dmypy.json 272 | 273 | # Pyre type checker 274 | .pyre/ 275 | 276 | # pytype static type analyzer 277 | .pytype/ 278 | 279 | # operating system-related files 280 | # file properties cache/storage on macOS 281 | *.DS_Store 282 | # thumbnail cache on Windows 283 | Thumbs.db 284 | 285 | # profiling data 286 | .prof 287 | 288 | 289 | ### Vim ### 290 | # Swap 291 | [._]*.s[a-v][a-z] 292 | !*.svg # comment out if you don't need vector files 293 | [._]*.sw[a-p] 294 | [._]s[a-rt-v][a-z] 295 | [._]ss[a-gi-z] 296 | [._]sw[a-p] 297 | 298 | # Session 299 | Session.vim 300 | Sessionx.vim 301 | 302 | # Temporary 303 | .netrwhist 304 | # Auto-generated tag files 305 | tags 306 | # Persistent undo 307 | [._]*.un~ 308 | 309 | ### VisualStudioCode ### 310 | .vscode/* 311 | !.vscode/settings.json 312 | !.vscode/tasks.json 313 | !.vscode/launch.json 314 | !.vscode/extensions.json 315 | *.code-workspace 316 | 317 | ### VisualStudioCode Patch ### 318 | # Ignore all local history of files 319 | .history 320 | .ionide 321 | 322 | ### Windows ### 323 | # Windows thumbnail cache files 324 | Thumbs.db:encryptable 325 | ehthumbs.db 326 | ehthumbs_vista.db 327 | 328 | # Dump file 329 | *.stackdump 330 | 331 | # Folder config file 332 | [Dd]esktop.ini 333 | 334 | # Recycle Bin used on file shares 335 | $RECYCLE.BIN/ 336 | 337 | # Windows Installer files 338 | *.cab 339 | *.msi 340 | *.msix 341 | *.msm 342 | *.msp 343 | 344 | # Windows shortcuts 345 | *.lnk 346 | 347 | # End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,vim,windows,linux,macos,jetbrains+all 348 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = yapf 3 | spaces_before_comment = 4 4 | indent_width: 4 5 | split_before_logical_operator = true 6 | column_limit = 79 7 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.formatting.provider": "yapf", 3 | } 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.5-alpine3.13 AS builder 2 | 3 | RUN apk add --no-cache libc6-compat build-base git 4 | 5 | # Google CloudRun changes HOME to /home for CMD where RUN uses /root 6 | # https://stackoverflow.com/questions/62276734/google-cloud-run-changes-home-to-home-for-cmd-where-run-uses-root 7 | # So add a new user and copy /home directory explictly to be compatible with Google CloudRun. 8 | RUN adduser -S youtube2notion 9 | USER youtube2notion 10 | 11 | COPY requirements.txt . 12 | RUN pip install --user -r requirements.txt 13 | 14 | 15 | FROM python:3.9.5-alpine3.13 AS runner 16 | 17 | RUN apk add --no-cache ffmpeg 18 | 19 | RUN adduser -S youtube2notion 20 | USER youtube2notion 21 | 22 | COPY --from=builder /home/youtube2notion/.local /home/youtube2notion/.local 23 | ENV PATH=/home/youtube2notion/.local/bin:$PATH 24 | 25 | COPY . . 26 | 27 | ENTRYPOINT [ "./entrypoint.sh" ] 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 taehoio 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include Makefile.venv 2 | 3 | .PHONY: test 4 | test: venv 5 | $(VENV)/python -m unittest -vvv tests/**/test_*.py 6 | 7 | .PHONY: coverage 8 | coverage: venv 9 | $(VENV)/coverage run --omit=".venv/*" -m unittest -vvv tests/**/test_*.py && \ 10 | $(VENV)/coverage report -m 11 | 12 | .PHONY: lint 13 | lint: venv 14 | $(VENV)/yapf --diff --recursive --verify youtube2notion tests 15 | $(VENV)/flake8 youtube2notion tests 16 | 17 | .PHONY: format 18 | format: venv 19 | $(VENV)/yapf --in-place --recursive --verify youtube2notion tests 20 | 21 | .PHONY: start-server-dev 22 | start-server-dev: venv 23 | $(VENV)/python app.py 24 | -------------------------------------------------------------------------------- /Makefile.venv: -------------------------------------------------------------------------------- 1 | # 2 | # SEAMLESSLY MANAGE PYTHON VIRTUAL ENVIRONMENT WITH A MAKEFILE 3 | # 4 | # https://github.com/sio/Makefile.venv v2020.08.14 5 | # 6 | # 7 | # Insert `include Makefile.venv` at the bottom of your Makefile to enable these 8 | # rules. 9 | # 10 | # When writing your Makefile use '$(VENV)/python' to refer to the Python 11 | # interpreter within virtual environment and '$(VENV)/executablename' for any 12 | # other executable in venv. 13 | # 14 | # This Makefile provides the following targets: 15 | # venv 16 | # Use this as a dependency for any target that requires virtual 17 | # environment to be created and configured 18 | # python, ipython 19 | # Use these to launch interactive Python shell within virtual environment 20 | # shell, bash, zsh 21 | # Launch interactive command line shell. "shell" target launches the 22 | # default shell Makefile executes its rules in (usually /bin/sh). 23 | # "bash" and "zsh" can be used to refer to the specific desired shell. 24 | # show-venv 25 | # Show versions of Python and pip, and the path to the virtual environment 26 | # clean-venv 27 | # Remove virtual environment 28 | # $(VENV)/executable_name 29 | # Install `executable_name` with pip. Only packages with names matching 30 | # the name of the corresponding executable are supported. 31 | # Use this as a lightweight mechanism for development dependencies 32 | # tracking. E.g. for one-off tools that are not required in every 33 | # developer's environment, therefore are not included into 34 | # requirements.txt or setup.py. 35 | # Note: 36 | # Rules using such target or dependency MUST be defined below 37 | # `include` directive to make use of correct $(VENV) value. 38 | # Example: 39 | # codestyle: $(VENV)/pyflakes 40 | # $(VENV)/pyflakes . 41 | # See `ipython` target below for another example. 42 | # 43 | # This Makefile can be configured via following variables: 44 | # PY 45 | # Command name for system Python interpreter. It is used only initialy to 46 | # create the virtual environment 47 | # Default: python3 48 | # REQUIREMENTS_TXT 49 | # Space separated list of paths to requirements.txt files. 50 | # Paths are resolved relative to current working directory. 51 | # Default: requirements.txt 52 | # WORKDIR 53 | # Parent directory for the virtual environment. 54 | # Default: current working directory. 55 | # VENVDIR 56 | # Python virtual environment directory. 57 | # Default: $(WORKDIR)/.venv 58 | # 59 | # This Makefile was written for GNU Make and may not work with other make 60 | # implementations. 61 | # 62 | # 63 | # Copyright (c) 2019-2020 Vitaly Potyarkin 64 | # 65 | # Licensed under the Apache License, Version 2.0 66 | # 67 | # 68 | 69 | 70 | # 71 | # Configuration variables 72 | # 73 | 74 | PY?=python3 75 | WORKDIR?=. 76 | VENVDIR?=$(WORKDIR)/.venv 77 | REQUIREMENTS_TXT?=$(wildcard requirements.txt) # Multiple paths are supported (space separated) 78 | MARKER=.initialized-with-Makefile.venv 79 | 80 | 81 | # 82 | # Internal variable resolution 83 | # 84 | 85 | VENV=$(VENVDIR)/bin 86 | EXE= 87 | # Detect windows 88 | ifeq (win32,$(shell $(PY) -c "import __future__, sys; print(sys.platform)")) 89 | VENV=$(VENVDIR)/Scripts 90 | EXE=.exe 91 | endif 92 | 93 | 94 | # 95 | # Virtual environment 96 | # 97 | 98 | .PHONY: venv 99 | venv: $(VENV)/$(MARKER) 100 | 101 | .PHONY: clean-venv 102 | clean-venv: 103 | -$(RM) -r "$(VENVDIR)" 104 | 105 | .PHONY: show-venv 106 | show-venv: venv 107 | @$(VENV)/python -c "import sys; print('Python ' + sys.version.replace('\n',''))" 108 | @$(VENV)/pip --version 109 | @echo venv: $(VENVDIR) 110 | 111 | .PHONY: debug-venv 112 | debug-venv: 113 | @$(MAKE) --version 114 | $(info PY="$(PY)") 115 | $(info REQUIREMENTS_TXT="$(REQUIREMENTS_TXT)") 116 | $(info VENVDIR="$(VENVDIR)") 117 | $(info VENVDEPENDS="$(VENVDEPENDS)") 118 | $(info WORKDIR="$(WORKDIR)") 119 | 120 | 121 | # 122 | # Dependencies 123 | # 124 | 125 | ifneq ($(strip $(REQUIREMENTS_TXT)),) 126 | VENVDEPENDS+=$(REQUIREMENTS_TXT) 127 | endif 128 | 129 | ifneq ($(wildcard setup.py),) 130 | VENVDEPENDS+=setup.py 131 | endif 132 | ifneq ($(wildcard setup.cfg),) 133 | VENVDEPENDS+=setup.cfg 134 | endif 135 | 136 | $(VENV): 137 | $(PY) -m venv $(VENVDIR) 138 | $(VENV)/python -m pip install --upgrade pip setuptools wheel 139 | 140 | $(VENV)/$(MARKER): $(VENVDEPENDS) | $(VENV) 141 | ifneq ($(strip $(REQUIREMENTS_TXT)),) 142 | $(VENV)/pip install $(foreach path,$(REQUIREMENTS_TXT),-r $(path)) 143 | endif 144 | ifneq ($(wildcard setup.py),) 145 | $(VENV)/pip install -e . 146 | endif 147 | touch $(VENV)/$(MARKER) 148 | 149 | 150 | # 151 | # Interactive shells 152 | # 153 | 154 | .PHONY: python 155 | python: venv 156 | exec $(VENV)/python 157 | 158 | .PHONY: ipython 159 | ipython: $(VENV)/ipython 160 | exec $(VENV)/ipython 161 | 162 | .PHONY: shell 163 | shell: venv 164 | . $(VENV)/activate && exec $(notdir $(SHELL)) 165 | 166 | .PHONY: bash zsh 167 | bash zsh: venv 168 | . $(VENV)/activate && exec $@ 169 | 170 | 171 | # 172 | # Commandline tools (wildcard rule, executable name must match package name) 173 | # 174 | 175 | ifneq ($(EXE),) 176 | $(VENV)/%: $(VENV)/%$(EXE) ; 177 | .PHONY: $(VENV)/% 178 | .PRECIOUS: $(VENV)/%$(EXE) 179 | endif 180 | 181 | $(VENV)/%$(EXE): $(VENV)/$(MARKER) 182 | $(VENV)/pip install --upgrade $* 183 | touch $@ 184 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # youtube2notion 2 | 3 | > For whom prefers reading than watching 4 | 5 | ## Example 6 | 7 | | YouTube Video | Notion Page | 8 | | ------------------------------------------------- | -------------------------------------------------------------------------------------------- | 9 | | ![YouTube Video](./images/youtube_screenshot.png) | ![Notion Page](./images/notion_screenshot.png) | 10 | | https://youtu.be/REeROakzwfU | https://www.notion.so/You-Will-Never-Be-Lazy-Again-Jim-Kwik-0894d54029404df7984650be014224f4 | 11 | 12 | ## How it works 13 | 14 | 1. Get a youtube video id, a notion token v2, a notion page id by user input 15 | 1. Download the youtube video by youtube-dl 16 | 1. Split the video into thumbnail images per second by ffmpeg 17 | 1. Download the youtube video's subtitle data by youtube_transcript_api 18 | 1. Generate a markdown formatted file with the thumbnail images and subtitle 19 | 1. Upload the markdown to notion by md2notion 20 | 21 | ## How to use 22 | 23 | ### CLI 24 | 25 | #### Run CLI 26 | 27 | ```sh 28 | docker run --rm -it taehoio/youtube2notion youtube2notion.py 29 | ``` 30 | 31 | #### Run CLI with arguments 32 | 33 | ```sh 34 | docker run --rm -it taehoio/youtube2notion youtube2notion.py YOUTUBE_VIDEO_ID -t NOTION_TOKEN_V2 -p NOTION_PAGE_URL -l SUBTITLE_LANGUAGE 35 | ``` 36 | 37 | ### API server 38 | 39 | #### Run CLI 40 | ```sh 41 | docker run -d -it --name youtube2notion -p 5000:5000 taehoio/youtube2notion app.py 42 | ``` 43 | 44 | #### Run with Docker compose 45 | ```yaml 46 | version: "2.1" 47 | services: 48 | youtube2notion: 49 | image: taehoio/youtube2notion 50 | container_name: youtube2notion 51 | ports: 52 | - 5000:5000/tcp 53 | command: 54 | - app.py 55 | tty: true 56 | ``` 57 | 58 | #### Call API endpoint 59 | 60 | ```sh 61 | curl --location --request POST 'http://localhost:5000/upload' \ 62 | --header 'Content-Type: application/json' \ 63 | --data-raw '{ 64 | "video_id": "YOUTUBE_VIDEO_ID", 65 | "notion_token_v2": "NOTION_TOKEN_V2", 66 | "notion_page_url": "NOTION_PAGE_URL", 67 | "subtitle_language": "SUBTITLE_LANGUAGE" 68 | }' 69 | ``` 70 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from youtube2notion.youtube2notion import Youtube2notion 2 | from flask import Flask, request 3 | from os.path import exists 4 | from shutil import rmtree 5 | from os import environ 6 | import googlecloudprofiler 7 | 8 | app = Flask(__name__) 9 | 10 | 11 | @app.route('/') 12 | def index(): 13 | return {} 14 | 15 | 16 | @app.route('/upload', methods=['POST']) 17 | def upload(): 18 | req = request.get_json() 19 | video_id = req.get('video_id') 20 | notion_token_v2 = req.get('notion_token_v2') 21 | notion_page_url = req.get('notion_page_url') 22 | subtitle_language = req.get('subtitle_language') 23 | 24 | if not video_id: 25 | return {'msg': 'invalid video_id'}, 400 26 | 27 | output_dir = './tmp/%s/' % video_id 28 | 29 | y2n = Youtube2notion( 30 | video_id=video_id, 31 | output_dir=output_dir, 32 | notion_token_v2=notion_token_v2, 33 | notion_page_url=notion_page_url, 34 | subtitle_language=subtitle_language) 35 | 36 | try: 37 | y2n.execute() 38 | except Exception as e: 39 | return {'msg': type(e).__name__ + str(e)}, 400 40 | finally: 41 | if exists(output_dir): 42 | rmtree(output_dir) 43 | 44 | return {} 45 | 46 | 47 | def shouldProfile() -> bool: 48 | return environ.get('SHOULD_PROFILE') == 'true' 49 | 50 | 51 | def setUpProfiler(serviceName: str): 52 | googlecloudprofiler.start(service=serviceName) 53 | 54 | 55 | def main(): 56 | serviceName: str = 'youtube2notion' 57 | 58 | if shouldProfile(): 59 | setUpProfiler(serviceName) 60 | 61 | app.run(host='0.0.0.0', port='5000', debug=True) 62 | 63 | 64 | if __name__ == "__main__": 65 | main() 66 | -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | FULL_ARGS=$@ 4 | 5 | python3.9 ${FULL_ARGS} 6 | -------------------------------------------------------------------------------- /images/notion_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taehoio/youtube2notion/a20405462f8f22b5ff491d86f5966cddf51b89b4/images/notion_screenshot.png -------------------------------------------------------------------------------- /images/youtube_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taehoio/youtube2notion/a20405462f8f22b5ff491d86f5966cddf51b89b4/images/youtube_screenshot.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.4 2 | beautifulsoup4==4.9.3 3 | bs4==0.0.1 4 | cached-property==1.5.2 5 | cachetools==4.2.2 6 | certifi==2021.5.30 7 | chardet==4.0.0 8 | click==8.0.1 9 | commonmark==0.9.1 10 | coverage==5.5 11 | dictdiffer==0.8.1 12 | flake8==3.9.2 13 | Flask==2.0.1 14 | future==0.18.2 15 | google-api-core==1.30.0 16 | google-api-python-client==2.9.0 17 | google-auth==1.31.0 18 | google-auth-httplib2==0.1.0 19 | google-cloud-profiler==3.0.4 20 | googleapis-common-protos==1.53.0 21 | httplib2==0.19.1 22 | idna==2.10 23 | itsdangerous==2.0.1 24 | Jinja2==3.0.1 25 | MarkupSafe==2.0.1 26 | mccabe==0.6.1 27 | md2notion==2.4.1 28 | mistletoe==0.7.2 29 | mypy-extensions==0.4.3 30 | git+https://github.com/gnokoheat/notion-py@4fabfb3 31 | packaging==20.9 32 | pathspec==0.8.1 33 | protobuf==3.17.3 34 | pyasn1==0.4.8 35 | pyasn1-modules==0.2.8 36 | pycodestyle==2.7.0 37 | pyflakes==2.3.1 38 | pyparsing==2.4.7 39 | python-slugify==5.0.2 40 | pytz==2021.1 41 | regex==2021.4.4 42 | requests==2.25.1 43 | rsa==4.7.2 44 | six==1.16.0 45 | soupsieve==2.2.1 46 | text-unidecode==1.3 47 | toml==0.10.2 48 | tzlocal==2.1 49 | uritemplate==3.0.1 50 | urllib3==1.26.5 51 | Werkzeug==2.0.1 52 | yapf==0.31.0 53 | youtube-dl==2021.6.6 54 | youtube-transcript-api==0.4.1 55 | -------------------------------------------------------------------------------- /tests/youtube2notion/test_markdown.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from youtube2notion.youtube_subtitle import SubtitleElement 3 | from youtube2notion.markdown import Markdown 4 | 5 | 6 | class TestMarkdown(unittest.TestCase): 7 | 8 | def test_generate(self): 9 | title: str = 'Hello, World!' 10 | subtitle_elements: list[SubtitleElement] = [ 11 | SubtitleElement(text='hi', start=12.34, duration=0.567), 12 | SubtitleElement(text='bye', start=45.12, duration=0.890), 13 | ] 14 | images_dir: str = './images' 15 | 16 | md = Markdown.generate( 17 | title=title, 18 | subtitle_elements=subtitle_elements, 19 | images_dir=images_dir) 20 | 21 | self.assertEqual( 22 | md, '''# Hello, World! 23 | 24 | ![](./imagesimage_00012.jpeg) 25 | 26 | hi 27 | 28 | ![](./imagesimage_00045.jpeg) 29 | 30 | bye''') 31 | -------------------------------------------------------------------------------- /tests/youtube2notion/test_youtube2notion.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from youtube2notion.youtube2notion import Youtube2notion 3 | 4 | 5 | class TestYoutube2notion(unittest.TestCase): 6 | 7 | def setUp(self): 8 | import warnings 9 | warnings.filterwarnings( 10 | "ignore", 11 | category=ResourceWarning, 12 | message="unclosed.*") 13 | 14 | video_id = 'Kc_cvAXCs4Y' 15 | output_dir = './tmp/%s/' % video_id 16 | notion_token_v2 = '' 17 | notion_page_url = '' 18 | info_title = '' 19 | info_author_name = '' 20 | info_author_url = '' 21 | 22 | self.y2n = Youtube2notion( 23 | video_id=video_id, 24 | output_dir=output_dir, 25 | notion_token_v2=notion_token_v2, 26 | notion_page_url=notion_page_url, 27 | info_title=info_title, 28 | info_author_name=info_author_name, 29 | info_author_url=info_author_url) 30 | 31 | def test_execute(self): 32 | self.y2n.execute() 33 | -------------------------------------------------------------------------------- /tests/youtube2notion/test_youtube_info.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from youtube2notion.youtube_info import YoutubeInfo 4 | 5 | 6 | class TestYoutubeInfo(unittest.TestCase): 7 | 8 | def setUp(self): 9 | self.video_id = '5oNcoj4G0xc' 10 | 11 | def test_get_information_element_1(self): 12 | self.assertIsNotNone( 13 | YoutubeInfo.get_information_element(self.video_id)) 14 | 15 | def test_get_information_element_2(self): 16 | self.assertIsNotNone( 17 | YoutubeInfo.get_information_element('Kc_cvAXCs4Y')) 18 | -------------------------------------------------------------------------------- /tests/youtube2notion/test_youtube_subtitle.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from youtube2notion.youtube_subtitle import YoutubeSubtitle 3 | from youtube_transcript_api._errors import TranslationLanguageNotAvailable 4 | 5 | 6 | class TestYoutubeSubtitle(unittest.TestCase): 7 | 8 | def setUp(self): 9 | import warnings 10 | warnings.filterwarnings( 11 | "ignore", 12 | category=ResourceWarning, 13 | message="unclosed.*") 14 | 15 | self.video_id = '5oNcoj4G0xc' 16 | 17 | def test_get_subtitle_elements(self): 18 | self.assertIsNotNone( 19 | YoutubeSubtitle.get_subtitle_elements(self.video_id)) 20 | 21 | def test_with_manually_created_subtitle(self): 22 | self.assertIsNotNone( 23 | YoutubeSubtitle.get_subtitle_elements('MY5SatbZMAo', ['en'])) 24 | 25 | def test_with_auto_generated_subtitle(self): 26 | self.assertIsNotNone( 27 | YoutubeSubtitle.get_subtitle_elements('5-MXyCr3y5M', ['ko'])) 28 | 29 | def test_with_auto_translated_subtitle(self): 30 | self.assertIsNotNone( 31 | YoutubeSubtitle.get_subtitle_elements('5-MXyCr3y5M', ['en'])) 32 | 33 | def test_with_translation_language_not_available_exception(self): 34 | with self.assertRaises(TranslationLanguageNotAvailable): 35 | YoutubeSubtitle.get_subtitle_elements(self.video_id, ['aa', 'bb']) 36 | -------------------------------------------------------------------------------- /tests/youtube2notion/test_youtube_video.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from youtube2notion.youtube_video import YoutubeVideo 3 | from os.path import isfile 4 | 5 | 6 | class TestYoutubeVideo(unittest.TestCase): 7 | 8 | def setUp(self): 9 | self.video_id = 'Kc_cvAXCs4Y' 10 | 11 | def test_get_output_filename(self): 12 | self.assertEqual( 13 | YoutubeVideo.get_output_filename(self.video_id, 14 | './tmp/%s/' % self.video_id), 15 | './tmp/Kc_cvAXCs4Y/Kc_cvAXCs4Y.mp4') 16 | 17 | def test_to_url(self): 18 | self.assertEqual( 19 | YoutubeVideo.to_url(self.video_id), 'https://youtu.be/Kc_cvAXCs4Y') 20 | 21 | def test_download(self): 22 | downloaded_video_filename = YoutubeVideo.download( 23 | video_id=self.video_id, 24 | output_dir=YoutubeVideo.get_output_filename( 25 | self.video_id, './tmp/%s/' % self.video_id)) 26 | 27 | self.assertTrue(isfile(downloaded_video_filename)) 28 | -------------------------------------------------------------------------------- /youtube2notion.py: -------------------------------------------------------------------------------- 1 | from youtube2notion.youtube2notion import Youtube2notion 2 | import click 3 | 4 | 5 | @click.command() 6 | @click.argument('video_id') 7 | @click.option('--output-dir', '-o', 'output_dir', required=False, type=str) 8 | @click.option( 9 | '--notion-token-v2', '-t', 'notion_token_v2', required=False, type=str) 10 | @click.option( 11 | '--notion-page-url', '-p', 'notion_page_url', required=False, type=str) 12 | @click.option( 13 | '--subtitle_language', 14 | '-l', 15 | 'subtitle_language', 16 | required=False, 17 | type=str, 18 | default='en') 19 | def youtube2notion(video_id: str, output_dir, notion_token_v2, notion_page_url, 20 | subtitle_language): 21 | if not output_dir: 22 | output_dir = './tmp/%s/' % video_id 23 | 24 | click.echo('video_id: %s' % video_id) 25 | click.echo('output_dir: %s' % output_dir) 26 | click.echo('notion_token_v2: %s' % notion_token_v2) 27 | click.echo('notion_page_url: %s' % notion_page_url) 28 | click.echo('subtitle_language: %s' % subtitle_language) 29 | 30 | y2n = Youtube2notion( 31 | video_id=video_id, 32 | output_dir=output_dir, 33 | notion_token_v2=notion_token_v2, 34 | notion_page_url=notion_page_url, 35 | subtitle_language=subtitle_language) 36 | 37 | try: 38 | y2n.execute() 39 | except Exception as e: 40 | print(e) 41 | 42 | 43 | if __name__ == '__main__': 44 | youtube2notion() 45 | -------------------------------------------------------------------------------- /youtube2notion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taehoio/youtube2notion/a20405462f8f22b5ff491d86f5966cddf51b89b4/youtube2notion/__init__.py -------------------------------------------------------------------------------- /youtube2notion/ffmpeg.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | 4 | class Ffmpeg: 5 | 6 | @classmethod 7 | def take_screenshots(cls, input_filename: str, output_dir: str = ''): 8 | cmd_args = [ 9 | 'ffmpeg', 10 | '-hide_banner', 11 | '-v', 12 | 'quiet', 13 | '-i', 14 | input_filename, 15 | '-vf', 16 | 'fps=1/1', # every second. e.g. every minite is 'fps=1/60' 17 | output_dir + 'image_%05d.jpeg', 18 | ] 19 | 20 | subprocess.call(cmd_args) 21 | -------------------------------------------------------------------------------- /youtube2notion/markdown.py: -------------------------------------------------------------------------------- 1 | from youtube2notion.youtube_subtitle import SubtitleElement 2 | 3 | 4 | class Markdown: 5 | 6 | @classmethod 7 | def generate(cls, title: str, subtitle_elements: list[SubtitleElement], 8 | images_dir: str) -> str: 9 | md = '' 10 | md += '# ' + title + '\n' 11 | md += '\n' 12 | 13 | for element in subtitle_elements: 14 | md += '![](' + images_dir + 'image_%(sec)05d' % { 15 | 'sec': int(element.start + (element.duration / 2)) 16 | } + '.jpeg)' 17 | md += '\n\n' 18 | md += element.text + '\n' 19 | md += '\n' 20 | 21 | md = md[:-2] 22 | 23 | return md 24 | -------------------------------------------------------------------------------- /youtube2notion/youtube2notion.py: -------------------------------------------------------------------------------- 1 | from youtube2notion.markdown import Markdown 2 | from youtube2notion.ffmpeg import Ffmpeg 3 | from youtube2notion.youtube_video import YoutubeVideo 4 | from youtube2notion.youtube_subtitle import SubtitleElement, YoutubeSubtitle 5 | from youtube2notion.youtube_info import InformationElement, YoutubeInfo 6 | from pathlib import Path 7 | from notion.client import NotionClient 8 | from notion.block import PageBlock 9 | from md2notion.upload import upload 10 | 11 | 12 | class Youtube2notion: 13 | 14 | def __init__( 15 | self, 16 | video_id: str, 17 | output_dir: str = '', 18 | notion_token_v2: str = '', 19 | notion_page_url: str = '', 20 | subtitle_language: str = 'ko', 21 | info_title: str = '', 22 | info_author_name: str = '', 23 | info_author_url: str = '', 24 | ): 25 | self.video_id = video_id 26 | self.output_dir = output_dir 27 | self.images_output_dir = self.output_dir + 'images/' 28 | 29 | self.notion_token_v2 = notion_token_v2 30 | self.notion_page_url = notion_page_url 31 | 32 | self.subtitle_language = subtitle_language 33 | 34 | self.info_title = info_title 35 | self.info_author_name = info_author_name 36 | self.info_author_url = info_author_url 37 | 38 | def _download_video(self) -> str: 39 | return YoutubeVideo.download(self.video_id, self.output_dir) 40 | 41 | def _get_subtitle_elements(self) -> list[SubtitleElement]: 42 | return YoutubeSubtitle.get_subtitle_elements(self.video_id, 43 | [self.subtitle_language]) 44 | 45 | def _get_info_element(self) -> InformationElement: 46 | info_element = YoutubeInfo.get_information_element(self.video_id) 47 | self.info_title = info_element.title 48 | self.info_author_name = info_element.author_name 49 | self.info_author_url = info_element.author_url 50 | 51 | def _take_screenshots(self, input_filename: str): 52 | Path(self.images_output_dir).mkdir(parents=True, exist_ok=True) 53 | 54 | Ffmpeg.take_screenshots(input_filename, self.images_output_dir) 55 | 56 | def _generage_markdown(self, title: str, 57 | subtitle_elements: list[SubtitleElement], 58 | images_dir: str) -> str: 59 | return Markdown.generate( 60 | title, 61 | subtitle_elements, 62 | images_dir, 63 | ) 64 | 65 | def _write_markdown_file(self, md: str, output_markdown_filename: str): 66 | f = open(output_markdown_filename, 'w') 67 | f.write(md) 68 | f.close() 69 | 70 | def _should_upload_to_notion(self) -> bool: 71 | return self.notion_token_v2 and self.notion_page_url 72 | 73 | def _upload_to_notion(self, md_file: str, notion_token_v2: str, 74 | notion_page_url: str): 75 | client = NotionClient(token_v2=notion_token_v2) 76 | page = client.get_block(notion_page_url) 77 | 78 | with open(md_file, 'r', encoding='utf-8') as f: 79 | new_page = page.children.add_new( 80 | PageBlock, 81 | title=self.info_title + '(' + self.subtitle_language + ')') 82 | upload(f, new_page) 83 | 84 | def execute(self): 85 | subtitle_elements = self._get_subtitle_elements() 86 | self._get_info_element() 87 | 88 | downloaded_video_filename = self._download_video() 89 | self._take_screenshots(downloaded_video_filename) 90 | 91 | md = self._generage_markdown( 92 | title=self.info_title + '(' + self.subtitle_language + ')', 93 | subtitle_elements=subtitle_elements, 94 | images_dir='./images/') 95 | 96 | md_filename = self.output_dir + self.video_id + '.md' 97 | self._write_markdown_file(md, md_filename) 98 | 99 | if self._should_upload_to_notion(): 100 | self._upload_to_notion( 101 | md_filename, 102 | notion_token_v2=self.notion_token_v2, 103 | notion_page_url=self.notion_page_url) 104 | -------------------------------------------------------------------------------- /youtube2notion/youtube_info.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | class InformationElement: 5 | 6 | def __init__( 7 | self, 8 | title: str = '', 9 | author_name: str = '', 10 | author_url: str = '', 11 | ): 12 | self._title = title 13 | self._author_name = author_name 14 | self._author_url = author_url 15 | 16 | @property 17 | def title(self): 18 | return self._title 19 | 20 | @property 21 | def author_name(self): 22 | return self._author_name 23 | 24 | @property 25 | def author_url(self): 26 | return self._author_url 27 | 28 | 29 | class YoutubeInfo: 30 | 31 | @classmethod 32 | def get_information_element(cls, video_id: str) -> InformationElement: 33 | 34 | params = { 35 | "format": "json", 36 | "url": "https://www.youtube.com/watch?v=%s" % video_id 37 | } 38 | res = requests.get("https://www.youtube.com/oembed", params=params) 39 | 40 | if res.status_code == 200: 41 | res_json = res.json() 42 | 43 | return InformationElement( 44 | title=res_json.get('title'), 45 | author_name=res_json.get('author_name'), 46 | author_url=res_json.get('author_url'), 47 | ) 48 | else: 49 | return InformationElement(title=video_id) 50 | -------------------------------------------------------------------------------- /youtube2notion/youtube_subtitle.py: -------------------------------------------------------------------------------- 1 | from youtube_transcript_api import YouTubeTranscriptApi 2 | from youtube_transcript_api._errors import NoTranscriptFound 3 | 4 | 5 | class SubtitleElement: 6 | 7 | def __init__(self, text: str, start: float, duration: float): 8 | self._text = text 9 | self._start = start 10 | self._duration = duration 11 | 12 | @property 13 | def text(self): 14 | return self._text 15 | 16 | @property 17 | def start(self): 18 | return self._start 19 | 20 | @property 21 | def duration(self): 22 | return self._duration 23 | 24 | 25 | class YoutubeSubtitle: 26 | 27 | @classmethod 28 | def __fetch_transcript(cls, 29 | video_id: str, 30 | language_codes: list[str] = ['ko', 'en']) -> list: 31 | """ 32 | Returns a fetched transcript from YouTube. It makes the best effort to 33 | get a proper transcript following below priorities: 34 | 1. a manually created transcript prior to a auto-generated one. 35 | 2. language codes in language_codes param will be traversed in order. 36 | 3. if there is neither created nor generated transcript, it will try to 37 | get a auto-translated one by the first language code of 38 | language_codes param. 39 | """ 40 | transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) 41 | 42 | try: 43 | return transcript_list.find_manually_created_transcript( 44 | language_codes).fetch() 45 | except NoTranscriptFound: 46 | pass 47 | 48 | try: 49 | return transcript_list.find_generated_transcript( 50 | language_codes).fetch() 51 | except NoTranscriptFound: 52 | pass 53 | 54 | if transcript_list and language_codes: 55 | for transcript in transcript_list: 56 | return transcript.translate(language_codes[0]).fetch() 57 | 58 | @classmethod 59 | def get_subtitle_elements( 60 | cls, 61 | video_id: str, 62 | language_code_candidates: list[str] = ['ko', 'en'] 63 | ) -> list[SubtitleElement]: 64 | fetched_transcript = YoutubeSubtitle.__fetch_transcript( 65 | video_id, language_code_candidates) 66 | 67 | subtitle_elements: list[SubtitleElement] = [] 68 | for sentence in fetched_transcript: 69 | subtitle_elements.append( 70 | SubtitleElement( 71 | text=sentence.get('text'), 72 | start=sentence.get('start'), 73 | duration=sentence.get('duration'))) 74 | 75 | return subtitle_elements 76 | -------------------------------------------------------------------------------- /youtube2notion/youtube_video.py: -------------------------------------------------------------------------------- 1 | import youtube_dl 2 | 3 | 4 | class YoutubeVideo: 5 | 6 | FORMAT_CODE = '134' 7 | """ 8 | youtube video format code 134 9 | 10 | extention: mp4 11 | resolution: 640x360 12 | note: 360p, 87k , mp4_dash container, avc1.4d401e@ 87k, 25fps, video only 13 | """ 14 | 15 | @classmethod 16 | def download(cls, video_id: str, output_dir: str = '') -> str: 17 | output_filename: str = YoutubeVideo.get_output_filename( 18 | video_id, output_dir, '.mp4') 19 | 20 | ydl_opts = { 21 | 'quiet': True, 22 | 'format': cls.FORMAT_CODE, 23 | 'writethumbnail': True, 24 | 'writeinfojson': True, 25 | 'outtmpl': output_filename, 26 | } 27 | 28 | with youtube_dl.YoutubeDL(ydl_opts) as ydl: 29 | ydl.download([YoutubeVideo.to_url(video_id)]) 30 | 31 | return output_filename 32 | 33 | @staticmethod 34 | def get_output_filename(video_id: str, 35 | output_dir: str, 36 | extention: str = '.mp4') -> str: 37 | return output_dir + video_id + extention 38 | 39 | @staticmethod 40 | def to_url(video_id: str) -> str: 41 | return 'https://youtu.be/' + video_id 42 | --------------------------------------------------------------------------------