├── test ├── __init__.py ├── testdata │ ├── certificate │ │ ├── ca.srl │ │ ├── ca.key │ │ ├── client.key │ │ ├── clientencrypted.key │ │ ├── client.csr │ │ ├── client.crt │ │ ├── ca.crt │ │ ├── clientwithkey.crt │ │ ├── instructions.md │ │ └── clientwithencryptedkey.crt │ ├── thumbnails │ │ └── foo %d bar │ │ │ └── foo_%d.webp │ ├── yt_dlp_plugins │ │ ├── extractor │ │ │ ├── _ignore.py │ │ │ ├── normal.py │ │ │ └── ignore.py │ │ └── postprocessor │ │ │ └── normal.py │ ├── zipped_plugins │ │ └── yt_dlp_plugins │ │ │ ├── extractor │ │ │ └── zipped.py │ │ │ └── postprocessor │ │ │ └── zipped.py │ ├── cookies │ │ ├── session_cookies.txt │ │ ├── httponly_cookies.txt │ │ └── malformed_cookies.txt │ ├── f4m │ │ └── custom_base_url.f4m │ ├── xspf │ │ └── foo_xspf.xspf │ └── mpd │ │ └── float_duration.mpd ├── test_netrc.py ├── test_youtube_misc.py ├── test_update.py.disabled ├── test_iqiyi_sdk_interpreter.py ├── parameters.json ├── test_age_restriction.py ├── versions.json └── test_cache.py ├── yt-dlp.cmd ├── devscripts ├── logo.ico ├── SizeOfImage.patch ├── __init__.py ├── SizeOfImage_w.patch ├── fish-completion.in ├── run_tests.sh ├── run_tests.bat ├── make_supportedsites.py ├── make_contributing.py ├── bash-completion.in ├── changelog_override.json ├── zsh-completion.in ├── bash-completion.py ├── set-variant.py ├── update-formulae.py ├── generate_aes_testdata.py ├── lazy_load_template.py ├── utils.py └── zsh-completion.py ├── yt_dlp ├── __pyinstaller │ ├── __init__.py │ └── hook-yt_dlp.py ├── version.py ├── extractor │ ├── sharevideos.py │ ├── engadget.py │ ├── ufctv.py │ ├── gigya.py │ ├── sibnet.py │ ├── usanetwork.py │ ├── m6.py │ ├── extractors.py │ ├── cinemax.py │ ├── videodetective.py │ ├── myvidster.py │ ├── formula1.py │ ├── nrl.py │ ├── outsidetv.py │ ├── ku6.py │ ├── vodpl.py │ ├── cliprs.py │ ├── kth.py │ ├── fuyintv.py │ ├── freespeech.py │ ├── savefrom.py │ ├── streamff.py │ ├── bibeltv.py │ ├── teachingchannel.py │ ├── lumni.py │ ├── ebaumsworld.py │ ├── kompas.py │ ├── people.py │ ├── cableav.py │ ├── nerdcubed.py │ ├── kommunetv.py │ ├── cam4.py │ ├── nonktube.py │ ├── lovehomeporn.py │ ├── gputechconf.py │ ├── maoritv.py │ ├── nuevo.py │ ├── unity.py │ ├── hentaistigma.py │ ├── googlesearch.py │ ├── hypergryph.py │ ├── atscaleconf.py │ ├── defense.py │ ├── adobeconnect.py │ ├── rottentomatoes.py │ ├── bandaichannel.py │ ├── syvdk.py │ ├── lci.py │ ├── europeantour.py │ ├── bfi.py │ ├── ebay.py │ ├── helsinki.py │ ├── echomsk.py │ ├── restudy.py │ ├── commonmistakes.py │ ├── moviezine.py │ ├── stretchinternet.py │ ├── bild.py │ ├── worldstarhiphop.py │ ├── breitbart.py │ ├── thestar.py │ ├── bundesliga.py │ ├── howcast.py │ ├── vh1.py │ ├── nzz.py │ ├── miaopai.py │ ├── academicearth.py │ ├── cozytv.py │ ├── hgtv.py │ ├── __init__.py │ ├── glide.py │ ├── yourupload.py │ ├── skylinewebcams.py │ ├── fox9.py │ ├── trunews.py │ ├── theholetv.py │ ├── filmweb.py │ ├── xbef.py │ ├── uktvplay.py │ ├── ehow.py │ ├── livejournal.py │ ├── oktoberfesttv.py │ ├── odatv.py │ ├── masters.py │ ├── ruhd.py │ ├── mychannels.py │ ├── thisamericanlife.py │ ├── tvland.py │ ├── goshgay.py │ ├── hypem.py │ ├── caltrans.py │ ├── livestreamfails.py │ ├── tvnoe.py │ ├── historicfilms.py │ ├── aliexpress.py │ ├── sztvhu.py │ ├── behindkink.py │ ├── vodplatform.py │ ├── webcamerapl.py │ ├── dreisat.py │ ├── lenta.py │ ├── weiqitv.py │ ├── megaphone.py │ └── reverbnation.py ├── __main__.py ├── compat │ ├── _deprecated.py │ ├── imghdr.py │ ├── functools.py │ └── shutil.py ├── postprocessor │ ├── __init__.py │ └── exec.py ├── downloader │ ├── fc2.py │ └── rtsp.py └── dependencies │ └── Cryptodome.py ├── yt-dlp.sh ├── .gitattributes ├── .editorconfig ├── requirements.txt ├── pyproject.toml ├── MANIFEST.in ├── .github ├── ISSUE_TEMPLATE │ └── config.yml ├── FUNDING.yml └── workflows │ ├── quick-test.yml │ ├── core.yml │ ├── release-nightly.yml │ └── download.yml ├── setup.cfg ├── LICENSE ├── public.key └── .gitignore /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yt-dlp.cmd: -------------------------------------------------------------------------------- 1 | @py -bb -Werror -Xdev "%~dp0yt_dlp\__main__.py" %* 2 | -------------------------------------------------------------------------------- /test/testdata/certificate/ca.srl: -------------------------------------------------------------------------------- 1 | 4A260C33C4D34612646E6321E1E767DF1A95EF0B 2 | -------------------------------------------------------------------------------- /devscripts/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ping/yt-dlp/master/devscripts/logo.ico -------------------------------------------------------------------------------- /devscripts/SizeOfImage.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ping/yt-dlp/master/devscripts/SizeOfImage.patch -------------------------------------------------------------------------------- /devscripts/__init__.py: -------------------------------------------------------------------------------- 1 | # Empty file needed to make devscripts.utils properly importable from outside 2 | -------------------------------------------------------------------------------- /devscripts/SizeOfImage_w.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ping/yt-dlp/master/devscripts/SizeOfImage_w.patch -------------------------------------------------------------------------------- /yt_dlp/__pyinstaller/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def get_hook_dirs(): 5 | return [os.path.dirname(__file__)] 6 | -------------------------------------------------------------------------------- /yt-dlp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | exec "${PYTHON:-python3}" -bb -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@" 3 | -------------------------------------------------------------------------------- /test/testdata/thumbnails/foo %d bar/foo_%d.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ping/yt-dlp/master/test/testdata/thumbnails/foo %d bar/foo_%d.webp -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | 3 | Makefile* text whitespace=-tab-in-indent 4 | *.sh text eol=lf 5 | *.md diff=markdown 6 | *.py diff=python 7 | -------------------------------------------------------------------------------- /test/testdata/yt_dlp_plugins/extractor/_ignore.py: -------------------------------------------------------------------------------- 1 | from yt_dlp.extractor.common import InfoExtractor 2 | 3 | 4 | class IgnorePluginIE(InfoExtractor): 5 | pass 6 | -------------------------------------------------------------------------------- /devscripts/fish-completion.in: -------------------------------------------------------------------------------- 1 | 2 | {{commands}} 3 | 4 | 5 | complete --command yt-dlp --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" 6 | -------------------------------------------------------------------------------- /test/testdata/yt_dlp_plugins/postprocessor/normal.py: -------------------------------------------------------------------------------- 1 | from yt_dlp.postprocessor.common import PostProcessor 2 | 3 | 4 | class NormalPluginPP(PostProcessor): 5 | pass 6 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [**.py] 4 | charset = utf-8 5 | indent_size = 4 6 | indent_style = space 7 | trim_trailing_whitespace = true 8 | insert_final_newline = true 9 | -------------------------------------------------------------------------------- /test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py: -------------------------------------------------------------------------------- 1 | from yt_dlp.extractor.common import InfoExtractor 2 | 3 | 4 | class ZippedPluginIE(InfoExtractor): 5 | pass 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mutagen 2 | pycryptodomex 3 | websockets 4 | brotli; platform_python_implementation=='CPython' 5 | brotlicffi; platform_python_implementation!='CPython' 6 | certifi 7 | -------------------------------------------------------------------------------- /test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py: -------------------------------------------------------------------------------- 1 | from yt_dlp.postprocessor.common import PostProcessor 2 | 3 | 4 | class ZippedPluginPP(PostProcessor): 5 | pass 6 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | build-backend = 'setuptools.build_meta' 3 | # https://github.com/yt-dlp/yt-dlp/issues/5941 4 | # https://github.com/pypa/distutils/issues/17 5 | requires = ['setuptools > 50'] 6 | -------------------------------------------------------------------------------- /test/testdata/yt_dlp_plugins/extractor/normal.py: -------------------------------------------------------------------------------- 1 | from yt_dlp.extractor.common import InfoExtractor 2 | 3 | 4 | class NormalPluginIE(InfoExtractor): 5 | pass 6 | 7 | 8 | class _IgnoreUnderscorePluginIE(InfoExtractor): 9 | pass 10 | -------------------------------------------------------------------------------- /yt_dlp/version.py: -------------------------------------------------------------------------------- 1 | # Autogenerated by devscripts/update-version.py 2 | 3 | __version__ = '2023.03.04' 4 | 5 | RELEASE_GIT_HEAD = '392389b7df7b818f794b231f14dc396d4875fbad' 6 | 7 | VARIANT = None 8 | 9 | UPDATE_HINT = None 10 | 11 | CHANNEL = 'stable' 12 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS 2 | include Changelog.md 3 | include LICENSE 4 | include README.md 5 | include completions/*/* 6 | include supportedsites.md 7 | include yt-dlp.1 8 | include requirements.txt 9 | recursive-include devscripts * 10 | recursive-include test * 11 | -------------------------------------------------------------------------------- /test/testdata/certificate/ca.key: -------------------------------------------------------------------------------- 1 | -----BEGIN EC PRIVATE KEY----- 2 | MHcCAQEEIG2L1bHdl3PnaLiJ7Zm8aAGCj4GiVbSbXQcrJAdL+yqOoAoGCCqGSM49 3 | AwEHoUQDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCH 4 | YxFUKpcCfVt9aueRyUFi1TNkkkEZ9D6fbg== 5 | -----END EC PRIVATE KEY----- 6 | -------------------------------------------------------------------------------- /test/testdata/certificate/client.key: -------------------------------------------------------------------------------- 1 | -----BEGIN EC PRIVATE KEY----- 2 | MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 3 | AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird 4 | m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== 5 | -----END EC PRIVATE KEY----- 6 | -------------------------------------------------------------------------------- /test/testdata/yt_dlp_plugins/extractor/ignore.py: -------------------------------------------------------------------------------- 1 | from yt_dlp.extractor.common import InfoExtractor 2 | 3 | 4 | class IgnoreNotInAllPluginIE(InfoExtractor): 5 | pass 6 | 7 | 8 | class InAllPluginIE(InfoExtractor): 9 | pass 10 | 11 | 12 | __all__ = ['InAllPluginIE'] 13 | -------------------------------------------------------------------------------- /yt_dlp/extractor/sharevideos.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class ShareVideosEmbedIE(InfoExtractor): 5 | _VALID_URL = False 6 | _EMBED_REGEX = [r']+?\bsrc\s*=\s*(["\'])(?P(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1'] 7 | -------------------------------------------------------------------------------- /test/testdata/cookies/session_cookies.txt: -------------------------------------------------------------------------------- 1 | # Netscape HTTP Cookie File 2 | # http://curl.haxx.se/rfc/cookie_spec.html 3 | # This is a generated file! Do not edit. 4 | 5 | www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue 6 | www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value 7 | -------------------------------------------------------------------------------- /test/testdata/cookies/httponly_cookies.txt: -------------------------------------------------------------------------------- 1 | # Netscape HTTP Cookie File 2 | # http://curl.haxx.se/rfc/cookie_spec.html 3 | # This is a generated file! Do not edit. 4 | 5 | #HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE 6 | www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE 7 | -------------------------------------------------------------------------------- /test/testdata/certificate/clientencrypted.key: -------------------------------------------------------------------------------- 1 | -----BEGIN EC PRIVATE KEY----- 2 | Proc-Type: 4,ENCRYPTED 3 | DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 4 | 5 | 96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS 6 | rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn 7 | IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= 8 | -----END EC PRIVATE KEY----- 9 | -------------------------------------------------------------------------------- /devscripts/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | if [ -z "$1" ]; then 4 | test_set='test' 5 | elif [ "$1" = 'core' ]; then 6 | test_set="-m not download" 7 | elif [ "$1" = 'download' ]; then 8 | test_set="-m download" 9 | else 10 | echo 'Invalid test type "'"$1"'". Use "core" | "download"' 11 | exit 1 12 | fi 13 | 14 | python3 -bb -Werror -m pytest "$test_set" 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Get help from the community on Discord 4 | url: https://discord.gg/H5MNcFW63r 5 | about: Join the yt-dlp Discord for community-powered support! 6 | - name: Matrix Bridge to the Discord server 7 | url: https://matrix.to/#/#yt-dlp:matrix.org 8 | about: For those who do not want to use Discord 9 | -------------------------------------------------------------------------------- /test/testdata/certificate/client.csr: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE REQUEST----- 2 | MIHQMHcCAQAwFTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqG 3 | SM49AwEHA0IABKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq 4 | 3ZuZ7rubyuMSXNuH+2Cl9msSpJB2LhJs5kegADAKBggqhkjOPQQDAgNJADBGAiEA 5 | 1LZ72mtPmVxhGtdMvpZ0fyA68H2RC5IMHpLq18T55UcCIQDKpkXXVTvAzS0JioCq 6 | 6kiYq8Oxx6ZMoI+11k75/Kip1g== 7 | -----END CERTIFICATE REQUEST----- 8 | -------------------------------------------------------------------------------- /test/testdata/cookies/malformed_cookies.txt: -------------------------------------------------------------------------------- 1 | # Netscape HTTP Cookie File 2 | # http://curl.haxx.se/rfc/cookie_spec.html 3 | # This is a generated file! Do not edit. 4 | 5 | # Cookie file entry with invalid number of fields - 6 instead of 7 6 | www.foobar.foobar FALSE / FALSE 0 COOKIE 7 | 8 | # Cookie file entry with invalid expires at 9 | www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE 10 | -------------------------------------------------------------------------------- /devscripts/run_tests.bat: -------------------------------------------------------------------------------- 1 | @setlocal 2 | @echo off 3 | cd /d %~dp0.. 4 | 5 | if ["%~1"]==[""] ( 6 | set "test_set="test"" 7 | ) else if ["%~1"]==["core"] ( 8 | set "test_set="-m not download"" 9 | ) else if ["%~1"]==["download"] ( 10 | set "test_set="-m "download"" 11 | ) else ( 12 | echo.Invalid test type "%~1". Use "core" ^| "download" 13 | exit /b 1 14 | ) 15 | 16 | set PYTHONWARNINGS=error 17 | pytest %test_set% 18 | -------------------------------------------------------------------------------- /yt_dlp/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Execute with 4 | # $ python -m yt_dlp 5 | 6 | import sys 7 | 8 | if __package__ is None and not getattr(sys, 'frozen', False): 9 | # direct call of __main__.py 10 | import os.path 11 | path = os.path.realpath(os.path.abspath(__file__)) 12 | sys.path.insert(0, os.path.dirname(os.path.dirname(path))) 13 | 14 | import yt_dlp 15 | 16 | if __name__ == '__main__': 17 | yt_dlp.main() 18 | -------------------------------------------------------------------------------- /test/testdata/certificate/client.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG 3 | A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow 4 | FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA 5 | BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS 6 | XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD 7 | aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY 8 | D0dB8M1kJw== 9 | -----END CERTIFICATE----- 10 | -------------------------------------------------------------------------------- /yt_dlp/compat/_deprecated.py: -------------------------------------------------------------------------------- 1 | """Deprecated - New code should avoid these""" 2 | 3 | import base64 4 | import urllib.error 5 | import urllib.parse 6 | 7 | compat_str = str 8 | 9 | compat_b64decode = base64.b64decode 10 | 11 | compat_HTTPError = urllib.error.HTTPError 12 | compat_urlparse = urllib.parse 13 | compat_parse_qs = urllib.parse.parse_qs 14 | compat_urllib_parse_unquote = urllib.parse.unquote 15 | compat_urllib_parse_urlencode = urllib.parse.urlencode 16 | compat_urllib_parse_urlparse = urllib.parse.urlparse 17 | -------------------------------------------------------------------------------- /yt_dlp/extractor/engadget.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class EngadgetIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P[^/?#]+)' 6 | 7 | _TESTS = [{ 8 | # video with vidible ID 9 | 'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/', 10 | 'only_matching': True, 11 | }] 12 | 13 | def _real_extract(self, url): 14 | video_id = self._match_id(url) 15 | return self.url_result('aol-video:%s' % video_id) 16 | -------------------------------------------------------------------------------- /yt_dlp/extractor/ufctv.py: -------------------------------------------------------------------------------- 1 | from .imggaming import ImgGamingBaseIE 2 | 3 | 4 | class UFCTVIE(ImgGamingBaseIE): 5 | _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com' 6 | _NETRC_MACHINE = 'ufctv' 7 | _REALM = 'ufc' 8 | 9 | 10 | class UFCArabiaIE(ImgGamingBaseIE): 11 | _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)' 12 | _NETRC_MACHINE = 'ufcarabia' 13 | _REALM = 'admufc' 14 | -------------------------------------------------------------------------------- /devscripts/make_supportedsites.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Allow direct execution 4 | import os 5 | import sys 6 | 7 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 8 | 9 | 10 | from devscripts.utils import get_filename_args, write_file 11 | from yt_dlp.extractor import list_extractor_classes 12 | 13 | 14 | def main(): 15 | out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False) 16 | write_file(get_filename_args(), f'# Supported sites\n{out}\n') 17 | 18 | 19 | if __name__ == '__main__': 20 | main() 21 | -------------------------------------------------------------------------------- /test/testdata/certificate/ca.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIBfDCCASOgAwIBAgIUUgngoxFpuWft8gjj3uEFoqJyoJowCgYIKoZIzj0EAwIw 3 | FDESMBAGA1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEwMVoXDTM4MTAxNTAz 4 | MDEwMVowFDESMBAGA1UEAwwJeXRkbHB0ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0D 5 | AQcDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCHYxFU 6 | KpcCfVt9aueRyUFi1TNkkkEZ9D6fbqNTMFEwHQYDVR0OBBYEFBdY2rVNLFGM6r1F 7 | iuamNDaiq0QoMB8GA1UdIwQYMBaAFBdY2rVNLFGM6r1FiuamNDaiq0QoMA8GA1Ud 8 | EwEB/wQFMAMBAf8wCgYIKoZIzj0EAwIDRwAwRAIgXJg2jio1kow2g/iP54Qq+iI2 9 | m4EAvZiY0Im/Ni3PHawCIC6KCl6QcHANbeq8ckOXNGusjl6OWhvEM3uPBPhqskq1 10 | -----END CERTIFICATE----- 11 | -------------------------------------------------------------------------------- /yt_dlp/compat/imghdr.py: -------------------------------------------------------------------------------- 1 | tests = { 2 | 'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP', 3 | 'png': lambda h: h[:8] == b'\211PNG\r\n\032\n', 4 | 'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'), 5 | 'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'), 6 | } 7 | 8 | 9 | def what(file=None, h=None): 10 | """Detect format of image (Currently supports jpeg, png, webp, gif only) 11 | Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py 12 | """ 13 | if h is None: 14 | with open(file, 'rb') as f: 15 | h = f.read(12) 16 | return next((type_ for type_, test in tests.items() if test(h)), None) 17 | -------------------------------------------------------------------------------- /yt_dlp/extractor/gigya.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | from ..utils import ( 4 | ExtractorError, 5 | urlencode_postdata, 6 | ) 7 | 8 | 9 | class GigyaBaseIE(InfoExtractor): 10 | def _gigya_login(self, auth_data): 11 | auth_info = self._download_json( 12 | 'https://accounts.eu1.gigya.com/accounts.login', None, 13 | note='Logging in', errnote='Unable to log in', 14 | data=urlencode_postdata(auth_data)) 15 | 16 | error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage') 17 | if error_message: 18 | raise ExtractorError( 19 | 'Unable to login: %s' % error_message, expected=True) 20 | return auth_info 21 | -------------------------------------------------------------------------------- /yt_dlp/extractor/sibnet.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class SibnetEmbedIE(InfoExtractor): 5 | # Ref: https://help.sibnet.ru/?sibnet_video_embed 6 | _VALID_URL = False 7 | _EMBED_REGEX = [r']+\bsrc=(["\'])(?P(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1'] 8 | _WEBPAGE_TESTS = [{ 9 | 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html', 10 | 'info_dict': { 11 | 'id': 'shell', # FIXME? 12 | 'ext': 'mp4', 13 | 'age_limit': 0, 14 | 'thumbnail': 'https://video.sibnet.ru/upload/cover/video_1887072_0.jpg', 15 | 'title': 'КВН Москва не сразу строилась - Девушка впервые играет в Mortal Kombat', 16 | } 17 | }] 18 | -------------------------------------------------------------------------------- /test/testdata/certificate/clientwithkey.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG 3 | A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow 4 | FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA 5 | BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS 6 | XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD 7 | aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY 8 | D0dB8M1kJw== 9 | -----END CERTIFICATE----- 10 | -----BEGIN EC PRIVATE KEY----- 11 | MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 12 | AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird 13 | m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== 14 | -----END EC PRIVATE KEY----- 15 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | 13 | custom: ['https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators'] 14 | -------------------------------------------------------------------------------- /yt_dlp/compat/functools.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F405 2 | from functools import * # noqa: F403 3 | 4 | from .compat_utils import passthrough_module 5 | 6 | passthrough_module(__name__, 'functools') 7 | del passthrough_module 8 | 9 | try: 10 | cache # >= 3.9 11 | except NameError: 12 | cache = lru_cache(maxsize=None) 13 | 14 | try: 15 | cached_property # >= 3.8 16 | except NameError: 17 | class cached_property: 18 | def __init__(self, func): 19 | update_wrapper(self, func) 20 | self.func = func 21 | 22 | def __get__(self, instance, _): 23 | if instance is None: 24 | return self 25 | setattr(instance, self.func.__name__, self.func(instance)) 26 | return getattr(instance, self.func.__name__) 27 | -------------------------------------------------------------------------------- /test/testdata/certificate/instructions.md: -------------------------------------------------------------------------------- 1 | # Generate certificates for client cert tests 2 | 3 | ## CA 4 | ```sh 5 | openssl ecparam -name prime256v1 -genkey -noout -out ca.key 6 | openssl req -new -x509 -sha256 -days 6027 -key ca.key -out ca.crt -subj "/CN=ytdlptest" 7 | ``` 8 | 9 | ## Client 10 | ```sh 11 | openssl ecparam -name prime256v1 -genkey -noout -out client.key 12 | openssl ec -in client.key -out clientencrypted.key -passout pass:foobar -aes256 13 | openssl req -new -sha256 -key client.key -out client.csr -subj "/CN=ytdlptest2" 14 | openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 6027 -sha256 15 | cp client.crt clientwithkey.crt 16 | cp client.crt clientwithencryptedkey.crt 17 | cat client.key >> clientwithkey.crt 18 | cat clientencrypted.key >> clientwithencryptedkey.crt 19 | ``` -------------------------------------------------------------------------------- /test/test_netrc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Allow direct execution 4 | import os 5 | import sys 6 | import unittest 7 | 8 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | 10 | 11 | from yt_dlp.extractor import gen_extractor_classes 12 | from yt_dlp.extractor.common import InfoExtractor 13 | 14 | NO_LOGIN = InfoExtractor._perform_login 15 | 16 | 17 | class TestNetRc(unittest.TestCase): 18 | def test_netrc_present(self): 19 | for ie in gen_extractor_classes(): 20 | if ie._perform_login is NO_LOGIN: 21 | continue 22 | self.assertTrue( 23 | ie._NETRC_MACHINE, 24 | 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME) 25 | 26 | 27 | if __name__ == '__main__': 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /test/testdata/certificate/clientwithencryptedkey.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG 3 | A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow 4 | FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA 5 | BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS 6 | XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD 7 | aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY 8 | D0dB8M1kJw== 9 | -----END CERTIFICATE----- 10 | -----BEGIN EC PRIVATE KEY----- 11 | Proc-Type: 4,ENCRYPTED 12 | DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 13 | 14 | 96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS 15 | rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn 16 | IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= 17 | -----END EC PRIVATE KEY----- 18 | -------------------------------------------------------------------------------- /yt_dlp/extractor/usanetwork.py: -------------------------------------------------------------------------------- 1 | from .nbc import NBCIE 2 | 3 | 4 | class USANetworkIE(NBCIE): # XXX: Do not subclass from concrete IE 5 | _VALID_URL = r'https?(?P://(?:www\.)?usanetwork\.com/(?:[^/]+/videos?|movies?)/(?:[^/]+/)?(?P\d+))' 6 | _TESTS = [{ 7 | 'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302', 8 | 'info_dict': { 9 | 'id': '4185302', 10 | 'ext': 'mp4', 11 | 'title': 'Intelligence (Trailer)', 12 | 'description': 'A maverick NSA agent enlists the help of a junior systems analyst in a workplace power grab.', 13 | 'upload_date': '20200715', 14 | 'timestamp': 1594785600, 15 | 'uploader': 'NBCU-MPAT', 16 | }, 17 | 'params': { 18 | # m3u8 download 19 | 'skip_download': True, 20 | }, 21 | }] 22 | -------------------------------------------------------------------------------- /devscripts/make_contributing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import optparse 4 | import re 5 | 6 | 7 | def main(): 8 | return # This is unused in yt-dlp 9 | 10 | parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') 11 | options, args = parser.parse_args() 12 | if len(args) != 2: 13 | parser.error('Expected an input and an output filename') 14 | 15 | infile, outfile = args 16 | 17 | with open(infile, encoding='utf-8') as inf: 18 | readme = inf.read() 19 | 20 | bug_text = re.search( 21 | r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) 22 | dev_text = re.search( 23 | r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING yt-dlp', readme).group(1) 24 | 25 | out = bug_text + dev_text 26 | 27 | with open(outfile, 'w', encoding='utf-8') as outf: 28 | outf.write(out) 29 | 30 | 31 | if __name__ == '__main__': 32 | main() 33 | -------------------------------------------------------------------------------- /yt_dlp/extractor/m6.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class M6IE(InfoExtractor): 5 | IE_NAME = 'm6' 6 | _VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P\d+)-[^\.]+\.html' 7 | 8 | _TEST = { 9 | 'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html', 10 | 'md5': '242994a87de2c316891428e0176bcb77', 11 | 'info_dict': { 12 | 'id': '11323908', 13 | 'ext': 'mp4', 14 | 'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »', 15 | 'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2', 16 | 'duration': 100, 17 | } 18 | } 19 | 20 | def _real_extract(self, url): 21 | video_id = self._match_id(url) 22 | return self.url_result('6play:%s' % video_id, 'SixPlay', video_id) 23 | -------------------------------------------------------------------------------- /devscripts/bash-completion.in: -------------------------------------------------------------------------------- 1 | __yt_dlp() 2 | { 3 | local cur prev opts fileopts diropts keywords 4 | COMPREPLY=() 5 | cur="${COMP_WORDS[COMP_CWORD]}" 6 | prev="${COMP_WORDS[COMP_CWORD-1]}" 7 | opts="{{flags}}" 8 | keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" 9 | fileopts="-a|--batch-file|--download-archive|--cookies|--load-info" 10 | diropts="--cache-dir" 11 | 12 | if [[ ${prev} =~ ${fileopts} ]]; then 13 | COMPREPLY=( $(compgen -f -- ${cur}) ) 14 | return 0 15 | elif [[ ${prev} =~ ${diropts} ]]; then 16 | COMPREPLY=( $(compgen -d -- ${cur}) ) 17 | return 0 18 | fi 19 | 20 | if [[ ${cur} =~ : ]]; then 21 | COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) 22 | return 0 23 | elif [[ ${cur} == * ]] ; then 24 | COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) 25 | return 0 26 | fi 27 | } 28 | 29 | complete -F __yt_dlp yt-dlp 30 | -------------------------------------------------------------------------------- /yt_dlp/extractor/extractors.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import os 3 | 4 | from ..plugins import load_plugins 5 | 6 | # NB: Must be before other imports so that plugins can be correctly injected 7 | _PLUGIN_CLASSES = load_plugins('extractor', 'IE') 8 | 9 | _LAZY_LOADER = False 10 | if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): 11 | with contextlib.suppress(ImportError): 12 | from .lazy_extractors import * # noqa: F403 13 | from .lazy_extractors import _ALL_CLASSES 14 | _LAZY_LOADER = True 15 | 16 | if not _LAZY_LOADER: 17 | from ._extractors import * # noqa: F403 18 | _ALL_CLASSES = [ # noqa: F811 19 | klass 20 | for name, klass in globals().items() 21 | if name.endswith('IE') and name != 'GenericIE' 22 | ] 23 | _ALL_CLASSES.append(GenericIE) # noqa: F405 24 | 25 | globals().update(_PLUGIN_CLASSES) 26 | _ALL_CLASSES[:0] = _PLUGIN_CLASSES.values() 27 | 28 | from .common import _PLUGIN_OVERRIDES # noqa: F401 29 | -------------------------------------------------------------------------------- /yt_dlp/extractor/cinemax.py: -------------------------------------------------------------------------------- 1 | from .hbo import HBOBaseIE 2 | 3 | 4 | class CinemaxIE(HBOBaseIE): 5 | _VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P[^/]+/video/[0-9a-z-]+-(?P\d+))' 6 | _TESTS = [{ 7 | 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903', 8 | 'md5': '82e0734bba8aa7ef526c9dd00cf35a05', 9 | 'info_dict': { 10 | 'id': '20126903', 11 | 'ext': 'mp4', 12 | 'title': 'S1 Ep 1: Recap', 13 | }, 14 | 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'], 15 | }, { 16 | 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903.embed', 17 | 'only_matching': True, 18 | }] 19 | 20 | def _real_extract(self, url): 21 | path, video_id = self._match_valid_url(url).groups() 22 | info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id) 23 | info['id'] = video_id 24 | return info 25 | -------------------------------------------------------------------------------- /yt_dlp/compat/shutil.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F405 2 | from shutil import * # noqa: F403 3 | 4 | from .compat_utils import passthrough_module 5 | 6 | passthrough_module(__name__, 'shutil') 7 | del passthrough_module 8 | 9 | 10 | import sys 11 | 12 | if sys.platform.startswith('freebsd'): 13 | import errno 14 | import os 15 | import shutil 16 | 17 | # Workaround for PermissionError when using restricted ACL mode on FreeBSD 18 | def copy2(src, dst, *args, **kwargs): 19 | if os.path.isdir(dst): 20 | dst = os.path.join(dst, os.path.basename(src)) 21 | shutil.copyfile(src, dst, *args, **kwargs) 22 | try: 23 | shutil.copystat(src, dst, *args, **kwargs) 24 | except PermissionError as e: 25 | if e.errno != getattr(errno, 'EPERM', None): 26 | raise 27 | return dst 28 | 29 | def move(*args, copy_function=copy2, **kwargs): 30 | return shutil.move(*args, copy_function=copy_function, **kwargs) 31 | -------------------------------------------------------------------------------- /devscripts/changelog_override.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "action": "add", 4 | "when": "776d1c3f0c9b00399896dd2e40e78e9a43218109", 5 | "short": "[priority] **A new release type has been added!**\n * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`" 6 | }, 7 | { 8 | "action": "add", 9 | "when": "776d1c3f0c9b00399896dd2e40e78e9a43218109", 10 | "short": "[priority] **YouTube throttling fixes!**" 11 | } 12 | ] 13 | -------------------------------------------------------------------------------- /devscripts/zsh-completion.in: -------------------------------------------------------------------------------- 1 | #compdef yt-dlp 2 | 3 | __yt_dlp() { 4 | local curcontext="$curcontext" fileopts diropts cur prev 5 | typeset -A opt_args 6 | fileopts="{{fileopts}}" 7 | diropts="{{diropts}}" 8 | cur=$words[CURRENT] 9 | case $cur in 10 | :) 11 | _arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)' 12 | ;; 13 | *) 14 | prev=$words[CURRENT-1] 15 | if [[ ${prev} =~ ${fileopts} ]]; then 16 | _path_files 17 | elif [[ ${prev} =~ ${diropts} ]]; then 18 | _path_files -/ 19 | elif [[ ${prev} == "--remux-video" ]]; then 20 | _arguments '*: :(mp4 mkv)' 21 | elif [[ ${prev} == "--recode-video" ]]; then 22 | _arguments '*: :(mp4 flv ogg webm mkv)' 23 | else 24 | _arguments '*: :({{flags}})' 25 | fi 26 | ;; 27 | esac 28 | } 29 | 30 | __yt_dlp -------------------------------------------------------------------------------- /yt_dlp/extractor/videodetective.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from .internetvideoarchive import InternetVideoArchiveIE 3 | 4 | 5 | class VideoDetectiveIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P\d+)' 7 | 8 | _TEST = { 9 | 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', 10 | 'info_dict': { 11 | 'id': '194487', 12 | 'ext': 'mp4', 13 | 'title': 'Kick-Ass 2', 14 | 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', 15 | }, 16 | 'params': { 17 | # m3u8 download 18 | 'skip_download': True, 19 | }, 20 | } 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | query = 'customerid=69249&publishedid=' + video_id 25 | return self.url_result( 26 | InternetVideoArchiveIE._build_json_url(query), 27 | ie=InternetVideoArchiveIE.ie_key()) 28 | -------------------------------------------------------------------------------- /devscripts/bash-completion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Allow direct execution 4 | import os 5 | import sys 6 | 7 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 8 | 9 | 10 | import yt_dlp 11 | 12 | BASH_COMPLETION_FILE = "completions/bash/yt-dlp" 13 | BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" 14 | 15 | 16 | def build_completion(opt_parser): 17 | opts_flag = [] 18 | for group in opt_parser.option_groups: 19 | for option in group.option_list: 20 | # for every long flag 21 | opts_flag.append(option.get_opt_string()) 22 | with open(BASH_COMPLETION_TEMPLATE) as f: 23 | template = f.read() 24 | with open(BASH_COMPLETION_FILE, "w") as f: 25 | # just using the special char 26 | filled_template = template.replace("{{flags}}", " ".join(opts_flag)) 27 | f.write(filled_template) 28 | 29 | 30 | parser = yt_dlp.parseOpts(ignore_config_files=True)[0] 31 | build_completion(parser) 32 | -------------------------------------------------------------------------------- /yt_dlp/extractor/myvidster.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class MyVidsterIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?myvidster\.com/video/(?P\d+)/' 6 | 7 | _TEST = { 8 | 'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making', 9 | 'md5': '95296d0231c1363222c3441af62dc4ca', 10 | 'info_dict': { 11 | 'id': '3685814', 12 | 'title': 'md5:7d8427d6d02c4fbcef50fe269980c749', 13 | 'upload_date': '20141027', 14 | 'uploader': 'utkualp', 15 | 'ext': 'mp4', 16 | 'age_limit': 18, 17 | }, 18 | 'add_ie': ['XHamster'], 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | webpage = self._download_webpage(url, video_id) 24 | 25 | return self.url_result(self._html_search_regex( 26 | r'rel="videolink" href="(?P.*)">', 27 | webpage, 'real video url')) 28 | -------------------------------------------------------------------------------- /test/testdata/f4m/custom_base_url.f4m: -------------------------------------------------------------------------------- 1 | 2 | 3 | recorded 4 | http://vod.livestream.com/events/0000000000673980/ 5 | 269.293 6 | AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA= 7 | 8 | AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ== 9 | 10 | 11 | -------------------------------------------------------------------------------- /yt_dlp/extractor/formula1.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class Formula1IE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P\d+)\.html' 6 | _TEST = { 7 | 'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html', 8 | 'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8', 9 | 'info_dict': { 10 | 'id': '6060988138001', 11 | 'ext': 'mp4', 12 | 'title': 'Race highlights - Spain 2016', 13 | 'timestamp': 1463332814, 14 | 'upload_date': '20160515', 15 | 'uploader_id': '6057949432001', 16 | }, 17 | 'add_ie': ['BrightcoveNew'], 18 | } 19 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s' 20 | 21 | def _real_extract(self, url): 22 | bc_id = self._match_id(url) 23 | return self.url_result( 24 | self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id) 25 | -------------------------------------------------------------------------------- /yt_dlp/extractor/nrl.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class NRLTVIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?nrl\.com/tv(/[^/]+)*/(?P[^/?&#]+)' 6 | _TEST = { 7 | 'url': 'https://www.nrl.com/tv/news/match-highlights-titans-v-knights-862805/', 8 | 'info_dict': { 9 | 'id': 'YyNnFuaDE6kPJqlDhG4CGQ_w89mKTau4', 10 | 'ext': 'mp4', 11 | 'title': 'Match Highlights: Titans v Knights', 12 | }, 13 | 'params': { 14 | # m3u8 download 15 | 'skip_download': True, 16 | }, 17 | } 18 | 19 | def _real_extract(self, url): 20 | display_id = self._match_id(url) 21 | webpage = self._download_webpage(url, display_id) 22 | q_data = self._parse_json(self._html_search_regex( 23 | r'(?s)q-data="({.+?})"', webpage, 'player data'), display_id) 24 | ooyala_id = q_data['videoId'] 25 | return self.url_result( 26 | 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title')) 27 | -------------------------------------------------------------------------------- /yt_dlp/extractor/outsidetv.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class OutsideTVIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?outsidetv\.com/(?:[^/]+/)*?play/[a-zA-Z0-9]{8}/\d+/\d+/(?P[a-zA-Z0-9]{8})' 6 | _TESTS = [{ 7 | 'url': 'http://www.outsidetv.com/category/snow/play/ZjQYboH6/1/10/Hdg0jukV/4', 8 | 'md5': '192d968fedc10b2f70ec31865ffba0da', 9 | 'info_dict': { 10 | 'id': 'Hdg0jukV', 11 | 'ext': 'mp4', 12 | 'title': 'Home - Jackson Ep 1 | Arbor Snowboards', 13 | 'description': 'md5:41a12e94f3db3ca253b04bb1e8d8f4cd', 14 | 'upload_date': '20181225', 15 | 'timestamp': 1545742800, 16 | } 17 | }, { 18 | 'url': 'http://www.outsidetv.com/home/play/ZjQYboH6/1/10/Hdg0jukV/4', 19 | 'only_matching': True, 20 | }] 21 | 22 | def _real_extract(self, url): 23 | jw_media_id = self._match_id(url) 24 | return self.url_result( 25 | 'jwplatform:' + jw_media_id, 'JWPlatform', jw_media_id) 26 | -------------------------------------------------------------------------------- /yt_dlp/__pyinstaller/hook-yt_dlp.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from PyInstaller.utils.hooks import collect_submodules 4 | 5 | 6 | def pycryptodome_module(): 7 | try: 8 | import Cryptodome # noqa: F401 9 | except ImportError: 10 | try: 11 | import Crypto # noqa: F401 12 | print('WARNING: Using Crypto since Cryptodome is not available. ' 13 | 'Install with: pip install pycryptodomex', file=sys.stderr) 14 | return 'Crypto' 15 | except ImportError: 16 | pass 17 | return 'Cryptodome' 18 | 19 | 20 | def get_hidden_imports(): 21 | yield 'yt_dlp.compat._legacy' 22 | yield pycryptodome_module() 23 | yield from collect_submodules('websockets') 24 | # These are auto-detected, but explicitly add them just in case 25 | yield from ('mutagen', 'brotli', 'certifi') 26 | 27 | 28 | hiddenimports = list(get_hidden_imports()) 29 | print(f'Adding imports: {hiddenimports}') 30 | 31 | excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'] 32 | -------------------------------------------------------------------------------- /test/test_youtube_misc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Allow direct execution 4 | import os 5 | import sys 6 | import unittest 7 | 8 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | 10 | 11 | from yt_dlp.extractor import YoutubeIE 12 | 13 | 14 | class TestYoutubeMisc(unittest.TestCase): 15 | def test_youtube_extract(self): 16 | assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) 17 | assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') 18 | assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') 19 | assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') 20 | assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') 21 | assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') 22 | assertExtractId('BaW_jenozKc', 'BaW_jenozKc') 23 | 24 | 25 | if __name__ == '__main__': 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /yt_dlp/extractor/ku6.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class Ku6IE(InfoExtractor): 5 | _VALID_URL = r'https?://v\.ku6\.com/show/(?P[a-zA-Z0-9\-\_]+)(?:\.)*html' 6 | _TEST = { 7 | 'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html', 8 | 'md5': '01203549b9efbb45f4b87d55bdea1ed1', 9 | 'info_dict': { 10 | 'id': 'JG-8yS14xzBr4bCn1pu0xw', 11 | 'ext': 'f4v', 12 | 'title': 'techniques test', 13 | } 14 | } 15 | 16 | def _real_extract(self, url): 17 | video_id = self._match_id(url) 18 | webpage = self._download_webpage(url, video_id) 19 | 20 | title = self._html_search_regex( 21 | r'

(.*?)

', webpage, 'title') 22 | dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id 23 | jsonData = self._download_json(dataUrl, video_id) 24 | downloadUrl = jsonData['data']['f'] 25 | 26 | return { 27 | 'id': video_id, 28 | 'title': title, 29 | 'url': downloadUrl 30 | } 31 | -------------------------------------------------------------------------------- /yt_dlp/extractor/vodpl.py: -------------------------------------------------------------------------------- 1 | from .onet import OnetBaseIE 2 | 3 | 4 | class VODPlIE(OnetBaseIE): 5 | _VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P[0-9a-zA-Z]+)' 6 | 7 | _TESTS = [{ 8 | 'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns', 9 | 'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74', 10 | 'info_dict': { 11 | 'id': '3ep3jns', 12 | 'ext': 'mp4', 13 | 'title': 'Chłopaki nie płaczą', 14 | 'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224', 15 | 'timestamp': 1463415154, 16 | 'duration': 5765, 17 | 'upload_date': '20160516', 18 | }, 19 | }, { 20 | 'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh', 21 | 'only_matching': True, 22 | }] 23 | 24 | def _real_extract(self, url): 25 | video_id = self._match_id(url) 26 | webpage = self._download_webpage(url, video_id) 27 | info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage) 28 | info_dict['id'] = video_id 29 | return info_dict 30 | -------------------------------------------------------------------------------- /.github/workflows/quick-test.yml: -------------------------------------------------------------------------------- 1 | name: Quick Test 2 | on: [push, pull_request] 3 | permissions: 4 | contents: read 5 | 6 | jobs: 7 | tests: 8 | name: Core Test 9 | if: "!contains(github.event.head_commit.message, 'ci skip all')" 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Set up Python 3.11 14 | uses: actions/setup-python@v4 15 | with: 16 | python-version: '3.11' 17 | - name: Install test requirements 18 | run: pip install pytest pycryptodomex 19 | - name: Run tests 20 | run: | 21 | python3 -m yt_dlp -v || true 22 | ./devscripts/run_tests.sh core 23 | flake8: 24 | name: Linter 25 | if: "!contains(github.event.head_commit.message, 'ci skip all')" 26 | runs-on: ubuntu-latest 27 | steps: 28 | - uses: actions/checkout@v3 29 | - uses: actions/setup-python@v4 30 | - name: Install flake8 31 | run: pip install flake8 32 | - name: Make lazy extractors 33 | run: python devscripts/make_lazy_extractors.py 34 | - name: Run flake8 35 | run: flake8 . 36 | -------------------------------------------------------------------------------- /yt_dlp/extractor/cliprs.py: -------------------------------------------------------------------------------- 1 | from .onet import OnetBaseIE 2 | 3 | 4 | class ClipRsIE(OnetBaseIE): 5 | _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P[^/]+)/\d+' 6 | _TEST = { 7 | 'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732', 8 | 'md5': 'c412d57815ba07b56f9edc7b5d6a14e5', 9 | 'info_dict': { 10 | 'id': '1488842.1399140381', 11 | 'ext': 'mp4', 12 | 'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli', 13 | 'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026', 14 | 'duration': 229, 15 | 'timestamp': 1459850243, 16 | 'upload_date': '20160405', 17 | } 18 | } 19 | 20 | def _real_extract(self, url): 21 | display_id = self._match_id(url) 22 | 23 | webpage = self._download_webpage(url, display_id) 24 | 25 | mvp_id = self._search_mvp_id(webpage) 26 | 27 | info_dict = self._extract_from_id(mvp_id, webpage) 28 | info_dict['display_id'] = display_id 29 | 30 | return info_dict 31 | -------------------------------------------------------------------------------- /devscripts/set-variant.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Allow direct execution 4 | import os 5 | import sys 6 | 7 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 8 | 9 | 10 | import argparse 11 | import functools 12 | import re 13 | 14 | from devscripts.utils import compose_functions, read_file, write_file 15 | 16 | VERSION_FILE = 'yt_dlp/version.py' 17 | 18 | 19 | def parse_options(): 20 | parser = argparse.ArgumentParser(description='Set the build variant of the package') 21 | parser.add_argument('variant', help='Name of the variant') 22 | parser.add_argument('-M', '--update-message', default=None, help='Message to show in -U') 23 | return parser.parse_args() 24 | 25 | 26 | def property_setter(name, value): 27 | return functools.partial(re.sub, rf'(?m)^{name}\s*=\s*.+$', f'{name} = {value!r}') 28 | 29 | 30 | opts = parse_options() 31 | transform = compose_functions( 32 | property_setter('VARIANT', opts.variant), 33 | property_setter('UPDATE_HINT', opts.update_message) 34 | ) 35 | 36 | write_file(VERSION_FILE, transform(read_file(VERSION_FILE))) 37 | -------------------------------------------------------------------------------- /yt_dlp/extractor/kth.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import smuggle_url 3 | 4 | 5 | class KTHIE(InfoExtractor): 6 | _VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P[a-z0-9_]+)' 7 | _TEST = { 8 | 'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9', 9 | 'md5': 'd83ada6d00ca98b73243a88efe19e8a6', 10 | 'info_dict': { 11 | 'id': '0_uoop6oz9', 12 | 'ext': 'mp4', 13 | 'title': 'md5:bd1d6931facb6828762a33e6ce865f37', 14 | 'thumbnail': 're:https?://.+/thumbnail/.+', 15 | 'duration': 3516, 16 | 'timestamp': 1647345358, 17 | 'upload_date': '20220315', 18 | 'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f', 19 | } 20 | } 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | result = self.url_result( 25 | smuggle_url('kaltura:308:%s' % video_id, { 26 | 'service_url': 'https://api.kaltura.nordu.net'}), 27 | 'Kaltura') 28 | return result 29 | -------------------------------------------------------------------------------- /yt_dlp/extractor/fuyintv.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import traverse_obj 3 | 4 | 5 | class FuyinTVIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?fuyin\.tv/html/(?:\d+)/(?P\d+)\.html' 7 | _TESTS = [{ 8 | 'url': 'https://www.fuyin.tv/html/2733/44129.html', 9 | 'info_dict': { 10 | 'id': '44129', 11 | 'ext': 'mp4', 12 | 'title': '第1集', 13 | 'description': 'md5:21a3d238dc8d49608e1308e85044b9c3', 14 | } 15 | }] 16 | 17 | def _real_extract(self, url): 18 | video_id = self._match_id(url) 19 | json_data = self._download_json( 20 | 'https://www.fuyin.tv/api/api/tv.movie/url', 21 | video_id, query={'urlid': f'{video_id}'}) 22 | webpage = self._download_webpage(url, video_id, fatal=False) 23 | 24 | return { 25 | 'id': video_id, 26 | 'title': traverse_obj(json_data, ('data', 'title')), 27 | 'url': json_data['data']['url'], 28 | 'ext': 'mp4', 29 | 'description': self._html_search_meta('description', webpage), 30 | } 31 | -------------------------------------------------------------------------------- /yt_dlp/extractor/freespeech.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from .youtube import YoutubeIE 3 | 4 | 5 | class FreespeechIE(InfoExtractor): 6 | IE_NAME = 'freespeech.org' 7 | _VALID_URL = r'https?://(?:www\.)?freespeech\.org/stories/(?P.+)' 8 | _TEST = { 9 | 'add_ie': ['Youtube'], 10 | 'url': 'http://www.freespeech.org/stories/fcc-announces-net-neutrality-rollback-whats-stake/', 11 | 'info_dict': { 12 | 'id': 'waRk6IPqyWM', 13 | 'ext': 'mp4', 14 | 'title': 'What\'s At Stake - Net Neutrality Special', 15 | 'description': 'Presented by MNN and FSTV', 16 | 'upload_date': '20170728', 17 | 'uploader_id': 'freespeechtv', 18 | 'uploader': 'freespeechtv', 19 | }, 20 | } 21 | 22 | def _real_extract(self, url): 23 | display_id = self._match_id(url) 24 | webpage = self._download_webpage(url, display_id) 25 | youtube_url = self._search_regex( 26 | r'data-video-url="([^"]+)"', 27 | webpage, 'youtube url') 28 | 29 | return self.url_result(youtube_url, YoutubeIE.ie_key()) 30 | -------------------------------------------------------------------------------- /yt_dlp/extractor/savefrom.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class SaveFromIE(InfoExtractor): 7 | IE_NAME = 'savefrom.net' 8 | _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P.*)$' 9 | 10 | _TEST = { 11 | 'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com', 12 | 'info_dict': { 13 | 'id': 'UlVRAPW2WJY', 14 | 'ext': 'mp4', 15 | 'title': 'About Team Radical MMA | MMA Fighting', 16 | 'upload_date': '20120816', 17 | 'uploader': 'Howcast', 18 | 'uploader_id': 'Howcast', 19 | 'description': r're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*', 20 | }, 21 | 'params': { 22 | 'skip_download': True 23 | } 24 | } 25 | 26 | def _real_extract(self, url): 27 | mobj = self._match_valid_url(url) 28 | video_id = os.path.splitext(url.split('/')[-1])[0] 29 | 30 | return self.url_result(mobj.group('url'), video_id=video_id) 31 | -------------------------------------------------------------------------------- /yt_dlp/extractor/streamff.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import int_or_none, parse_iso8601 3 | 4 | 5 | class StreamFFIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?streamff\.com/v/(?P[a-zA-Z0-9]+)' 7 | 8 | _TESTS = [{ 9 | 'url': 'https://streamff.com/v/55cc94', 10 | 'md5': '8745a67bb5e5c570738efe7983826370', 11 | 'info_dict': { 12 | 'id': '55cc94', 13 | 'ext': 'mp4', 14 | 'title': '55cc94', 15 | 'timestamp': 1634764643, 16 | 'upload_date': '20211020', 17 | 'view_count': int, 18 | } 19 | }] 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | json_data = self._download_json(f'https://streamff.com/api/videos/{video_id}', video_id) 24 | return { 25 | 'id': video_id, 26 | 'title': json_data.get('name') or video_id, 27 | 'url': 'https://streamff.com/%s' % json_data['videoLink'], 28 | 'view_count': int_or_none(json_data.get('views')), 29 | 'timestamp': parse_iso8601(json_data.get('date')), 30 | } 31 | -------------------------------------------------------------------------------- /yt_dlp/extractor/bibeltv.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class BibelTVIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P\d+)' 6 | _TESTS = [{ 7 | 'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch', 8 | 'md5': '252f908192d611de038b8504b08bf97f', 9 | 'info_dict': { 10 | 'id': 'ref:329703', 11 | 'ext': 'mp4', 12 | 'title': 'Sprachkurs in Malaiisch', 13 | 'description': 'md5:3e9f197d29ee164714e67351cf737dfe', 14 | 'timestamp': 1608316701, 15 | 'uploader_id': '5840105145001', 16 | 'upload_date': '20201218', 17 | } 18 | }, { 19 | 'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374', 20 | 'only_matching': True, 21 | }] 22 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s' 23 | 24 | def _real_extract(self, url): 25 | crn_id = self._match_id(url) 26 | return self.url_result( 27 | self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew') 28 | -------------------------------------------------------------------------------- /yt_dlp/extractor/teachingchannel.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class TeachingChannelIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos?/(?P[^/?&#]+)' 6 | 7 | _TEST = { 8 | 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', 9 | 'info_dict': { 10 | 'id': '3swwlzkT', 11 | 'ext': 'mp4', 12 | 'title': 'A History of Teaming', 13 | 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', 14 | 'duration': 422, 15 | 'upload_date': '20170316', 16 | 'timestamp': 1489691297, 17 | }, 18 | 'params': { 19 | 'skip_download': True, 20 | }, 21 | 'add_ie': ['JWPlatform'], 22 | } 23 | 24 | def _real_extract(self, url): 25 | display_id = self._match_id(url) 26 | webpage = self._download_webpage(url, display_id) 27 | mid = self._search_regex( 28 | r'(?:data-mid=["\']|id=["\']jw-video-player-)([a-zA-Z0-9]{8})', 29 | webpage, 'media id') 30 | 31 | return self.url_result('jwplatform:' + mid, 'JWPlatform', mid) 32 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal = true 3 | 4 | 5 | [flake8] 6 | exclude = build,venv,.tox,.git,.pytest_cache 7 | ignore = E402,E501,E731,E741,W503 8 | max_line_length = 120 9 | per_file_ignores = 10 | devscripts/lazy_load_template.py: F401 11 | 12 | 13 | [autoflake] 14 | ignore-init-module-imports = true 15 | ignore-pass-after-docstring = true 16 | remove-all-unused-imports = true 17 | remove-duplicate-keys = true 18 | remove-unused-variables = true 19 | 20 | 21 | [tool:pytest] 22 | addopts = -ra -v --strict-markers 23 | markers = 24 | download 25 | 26 | 27 | [tox:tox] 28 | skipsdist = true 29 | envlist = py{36,37,38,39,310,311},pypy{36,37,38,39} 30 | skip_missing_interpreters = true 31 | 32 | [testenv] # tox 33 | deps = 34 | pytest 35 | commands = pytest {posargs:"-m not download"} 36 | passenv = HOME # For test_compat_expanduser 37 | setenv = 38 | # PYTHONWARNINGS = error # Catches PIP's warnings too 39 | 40 | 41 | [isort] 42 | py_version = 37 43 | multi_line_output = VERTICAL_HANGING_INDENT 44 | line_length = 80 45 | reverse_relative = true 46 | ensure_newline_before_comments = true 47 | include_trailing_comma = true 48 | known_first_party = 49 | test 50 | -------------------------------------------------------------------------------- /yt_dlp/extractor/lumni.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from .francetv import FranceTVIE 3 | 4 | 5 | class LumniIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?lumni\.fr/video/(?P[\w-]+)' 7 | _TESTS = [{ 8 | 'url': 'https://www.lumni.fr/video/l-homme-et-son-environnement-dans-la-revolution-industrielle', 9 | 'md5': '960e8240c4f2c7a20854503a71e52f5e', 10 | 'info_dict': { 11 | 'id': 'd2b9a4e5-a526-495b-866c-ab72737e3645', 12 | 'ext': 'mp4', 13 | 'title': "L'homme et son environnement dans la révolution industrielle - L'ère de l'homme", 14 | 'thumbnail': 'https://assets.webservices.francetelevisions.fr/v1/assets/images/a7/17/9f/a7179f5f-63a5-4e11-8d4d-012ab942d905.jpg', 15 | 'duration': 230, 16 | } 17 | }] 18 | 19 | def _real_extract(self, url): 20 | display_id = self._match_id(url) 21 | webpage = self._download_webpage(url, display_id) 22 | video_id = self._html_search_regex( 23 | r']+data-factoryid\s*=\s*["\']([^"\']+)', webpage, 'video id') 24 | return self.url_result(f'francetv:{video_id}', FranceTVIE, video_id) 25 | -------------------------------------------------------------------------------- /yt_dlp/extractor/ebaumsworld.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class EbaumsWorldIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?ebaumsworld\.com/videos/[^/]+/(?P\d+)' 6 | 7 | _TEST = { 8 | 'url': 'http://www.ebaumsworld.com/videos/a-giant-python-opens-the-door/83367677/', 9 | 'info_dict': { 10 | 'id': '83367677', 11 | 'ext': 'mp4', 12 | 'title': 'A Giant Python Opens The Door', 13 | 'description': 'This is how nightmares start...', 14 | 'uploader': 'jihadpizza', 15 | }, 16 | } 17 | 18 | def _real_extract(self, url): 19 | video_id = self._match_id(url) 20 | config = self._download_xml( 21 | 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id) 22 | video_url = config.find('file').text 23 | 24 | return { 25 | 'id': video_id, 26 | 'title': config.find('title').text, 27 | 'url': video_url, 28 | 'description': config.find('description').text, 29 | 'thumbnail': config.find('image').text, 30 | 'uploader': config.find('username').text, 31 | } 32 | -------------------------------------------------------------------------------- /test/test_update.py.disabled: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Allow direct execution 4 | import os 5 | import sys 6 | import unittest 7 | 8 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | 10 | 11 | import json 12 | 13 | from yt_dlp.update import rsa_verify 14 | 15 | 16 | class TestUpdate(unittest.TestCase): 17 | def test_rsa_verify(self): 18 | UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) 19 | with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'versions.json'), 'rb') as f: 20 | versions_info = f.read().decode() 21 | versions_info = json.loads(versions_info) 22 | signature = versions_info['signature'] 23 | del versions_info['signature'] 24 | self.assertTrue(rsa_verify( 25 | json.dumps(versions_info, sort_keys=True).encode(), 26 | signature, UPDATES_RSA_KEY)) 27 | 28 | 29 | if __name__ == '__main__': 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /yt_dlp/extractor/kompas.py: -------------------------------------------------------------------------------- 1 | from .jixie import JixieBaseIE 2 | 3 | 4 | class KompasVideoIE(JixieBaseIE): 5 | _VALID_URL = r'https?://video\.kompas\.com/\w+/(?P\d+)/(?P[\w-]+)' 6 | _TESTS = [{ 7 | 'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel', 8 | 'info_dict': { 9 | 'id': '164474', 10 | 'ext': 'mp4', 11 | 'title': 'Kim Jong Un Siap Kirim Nuklir Lawan AS dan Korsel', 12 | 'description': 'md5:262530c4fb7462398235f9a5dba92456', 13 | 'uploader_id': '9262bf2590d558736cac4fff7978fcb1', 14 | 'display_id': 'kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel', 15 | 'duration': 85.066667, 16 | 'categories': ['news'], 17 | 'thumbnail': 'https://video.jixie.media/1001/164474/164474_1280x720.jpg', 18 | 'tags': 'count:9', 19 | } 20 | }] 21 | 22 | def _real_extract(self, url): 23 | video_id, display_id = self._match_valid_url(url).group('id', 'slug') 24 | webpage = self._download_webpage(url, display_id) 25 | 26 | return self._extract_data_from_jixie_id(display_id, video_id, webpage) 27 | -------------------------------------------------------------------------------- /test/test_iqiyi_sdk_interpreter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Allow direct execution 4 | import os 5 | import sys 6 | import unittest 7 | 8 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | 10 | 11 | from test.helper import FakeYDL, is_download_test 12 | from yt_dlp.extractor import IqiyiIE 13 | 14 | 15 | class WarningLogger: 16 | def __init__(self): 17 | self.messages = [] 18 | 19 | def warning(self, msg): 20 | self.messages.append(msg) 21 | 22 | def debug(self, msg): 23 | pass 24 | 25 | def error(self, msg): 26 | pass 27 | 28 | 29 | @is_download_test 30 | class TestIqiyiSDKInterpreter(unittest.TestCase): 31 | def test_iqiyi_sdk_interpreter(self): 32 | ''' 33 | Test the functionality of IqiyiSDKInterpreter by trying to log in 34 | 35 | If `sign` is incorrect, /validate call throws an HTTP 556 error 36 | ''' 37 | logger = WarningLogger() 38 | ie = IqiyiIE(FakeYDL({'logger': logger})) 39 | ie._perform_login('foo', 'bar') 40 | self.assertTrue('unable to log in:' in logger.messages[0]) 41 | 42 | 43 | if __name__ == '__main__': 44 | unittest.main() 45 | -------------------------------------------------------------------------------- /yt_dlp/extractor/people.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class PeopleIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P\d+),00\.html' 6 | 7 | _TEST = { 8 | 'url': 'http://www.people.com/people/videos/0,,20995451,00.html', 9 | 'info_dict': { 10 | 'id': 'ref:20995451', 11 | 'ext': 'mp4', 12 | 'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”', 13 | 'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack', 14 | 'thumbnail': r're:^https?://.*\.jpg', 15 | 'duration': 246.318, 16 | 'timestamp': 1458720585, 17 | 'upload_date': '20160323', 18 | 'uploader_id': '416418724', 19 | }, 20 | 'params': { 21 | 'skip_download': True, 22 | }, 23 | 'add_ie': ['BrightcoveNew'], 24 | } 25 | 26 | def _real_extract(self, url): 27 | return self.url_result( 28 | 'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s' 29 | % self._match_id(url), 'BrightcoveNew') 30 | -------------------------------------------------------------------------------- /yt_dlp/extractor/cableav.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class CableAVIE(InfoExtractor): 5 | _VALID_URL = r'https://cableav\.tv/(?P[a-zA-Z0-9]+)' 6 | _TESTS = [{ 7 | 'url': 'https://cableav.tv/lS4iR9lWjN8/', 8 | 'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18', 9 | 'info_dict': { 10 | 'id': 'lS4iR9lWjN8', 11 | 'ext': 'mp4', 12 | 'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV', 13 | 'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家', 14 | 'thumbnail': r're:^https?://.*\.jpg$', 15 | } 16 | }] 17 | 18 | def _real_extract(self, url): 19 | video_id = self._match_id(url) 20 | webpage = self._download_webpage(url, video_id) 21 | 22 | video_url = self._og_search_video_url(webpage, secure=False) 23 | 24 | formats = self._extract_m3u8_formats(video_url, video_id, 'mp4') 25 | 26 | return { 27 | 'id': video_id, 28 | 'title': self._og_search_title(webpage), 29 | 'description': self._og_search_description(webpage), 30 | 'thumbnail': self._og_search_thumbnail(webpage), 31 | 'formats': formats, 32 | } 33 | -------------------------------------------------------------------------------- /yt_dlp/extractor/nerdcubed.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class NerdCubedFeedIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json' 8 | _TEST = { 9 | 'url': 'http://www.nerdcubed.co.uk/feed.json', 10 | 'info_dict': { 11 | 'id': 'nerdcubed-feed', 12 | 'title': 'nerdcubed.co.uk feed', 13 | }, 14 | 'playlist_mincount': 1300, 15 | } 16 | 17 | def _real_extract(self, url): 18 | feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed') 19 | 20 | entries = [{ 21 | '_type': 'url', 22 | 'title': feed_entry['title'], 23 | 'uploader': feed_entry['source']['name'] if feed_entry['source'] else None, 24 | 'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'), 25 | 'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'], 26 | } for feed_entry in feed] 27 | 28 | return { 29 | '_type': 'playlist', 30 | 'title': 'nerdcubed.co.uk feed', 31 | 'id': 'nerdcubed-feed', 32 | 'entries': entries, 33 | } 34 | -------------------------------------------------------------------------------- /yt_dlp/extractor/kommunetv.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import update_url 3 | 4 | 5 | class KommunetvIE(InfoExtractor): 6 | _VALID_URL = r'https://(\w+).kommunetv.no/archive/(?P\w+)' 7 | _TEST = { 8 | 'url': 'https://oslo.kommunetv.no/archive/921', 9 | 'md5': '5f102be308ee759be1e12b63d5da4bbc', 10 | 'info_dict': { 11 | 'id': '921', 12 | 'title': 'Bystyremøte', 13 | 'ext': 'mp4' 14 | } 15 | } 16 | 17 | def _real_extract(self, url): 18 | video_id = self._match_id(url) 19 | headers = { 20 | 'Accept': 'application/json' 21 | } 22 | data = self._download_json('https://oslo.kommunetv.no/api/streams?streamType=1&id=%s' % video_id, video_id, headers=headers) 23 | title = data['stream']['title'] 24 | file = data['playlist'][0]['playlist'][0]['file'] 25 | url = update_url(file, query=None, fragment=None) 26 | formats = self._extract_m3u8_formats(url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) 27 | return { 28 | 'id': video_id, 29 | 'formats': formats, 30 | 'title': title 31 | } 32 | -------------------------------------------------------------------------------- /yt_dlp/extractor/cam4.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class CAM4IE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:[^/]+\.)?cam4\.com/(?P[a-z0-9_]+)' 6 | _TEST = { 7 | 'url': 'https://www.cam4.com/foxynesss', 8 | 'info_dict': { 9 | 'id': 'foxynesss', 10 | 'ext': 'mp4', 11 | 'title': 're:^foxynesss [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 12 | 'age_limit': 18, 13 | 'live_status': 'is_live', 14 | 'thumbnail': 'https://snapshots.xcdnpro.com/thumbnails/foxynesss', 15 | } 16 | } 17 | 18 | def _real_extract(self, url): 19 | channel_id = self._match_id(url) 20 | m3u8_playlist = self._download_json('https://www.cam4.com/rest/v1.0/profile/{}/streamInfo'.format(channel_id), channel_id).get('cdnURL') 21 | 22 | formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True) 23 | 24 | return { 25 | 'id': channel_id, 26 | 'title': channel_id, 27 | 'is_live': True, 28 | 'age_limit': 18, 29 | 'formats': formats, 30 | 'thumbnail': f'https://snapshots.xcdnpro.com/thumbnails/{channel_id}', 31 | } 32 | -------------------------------------------------------------------------------- /devscripts/update-formulae.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Usage: python3 ./devscripts/update-formulae.py 5 | version can be either 0-aligned (yt-dlp version) or normalized (PyPi version) 6 | """ 7 | 8 | # Allow direct execution 9 | import os 10 | import sys 11 | 12 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 13 | 14 | 15 | import json 16 | import re 17 | import urllib.request 18 | 19 | from devscripts.utils import read_file, write_file 20 | 21 | filename, version = sys.argv[1:] 22 | 23 | normalized_version = '.'.join(str(int(x)) for x in version.split('.')) 24 | 25 | pypi_release = json.loads(urllib.request.urlopen( 26 | 'https://pypi.org/pypi/yt-dlp/%s/json' % normalized_version 27 | ).read().decode()) 28 | 29 | tarball_file = next(x for x in pypi_release['urls'] if x['filename'].endswith('.tar.gz')) 30 | 31 | sha256sum = tarball_file['digests']['sha256'] 32 | url = tarball_file['url'] 33 | 34 | formulae_text = read_file(filename) 35 | 36 | formulae_text = re.sub(r'sha256 "[0-9a-f]*?"', 'sha256 "%s"' % sha256sum, formulae_text, count=1) 37 | formulae_text = re.sub(r'url "[^"]*?"', 'url "%s"' % url, formulae_text, count=1) 38 | 39 | write_file(filename, formulae_text) 40 | -------------------------------------------------------------------------------- /yt_dlp/extractor/nonktube.py: -------------------------------------------------------------------------------- 1 | from .nuevo import NuevoBaseIE 2 | 3 | 4 | class NonkTubeIE(NuevoBaseIE): 5 | _VALID_URL = r'https?://(?:www\.)?nonktube\.com/(?:(?:video|embed)/|media/nuevo/embed\.php\?.*?\bid=)(?P\d+)' 6 | _TESTS = [{ 7 | 'url': 'https://www.nonktube.com/video/118636/sensual-wife-uncensored-fucked-in-hairy-pussy-and-facialized', 8 | 'info_dict': { 9 | 'id': '118636', 10 | 'ext': 'mp4', 11 | 'title': 'Sensual Wife Uncensored Fucked In Hairy Pussy And Facialized', 12 | 'age_limit': 18, 13 | 'duration': 1150.98, 14 | }, 15 | 'params': { 16 | 'skip_download': True, 17 | } 18 | }, { 19 | 'url': 'https://www.nonktube.com/embed/118636', 20 | 'only_matching': True, 21 | }] 22 | 23 | def _real_extract(self, url): 24 | video_id = self._match_id(url) 25 | 26 | webpage = self._download_webpage(url, video_id) 27 | 28 | title = self._og_search_title(webpage) 29 | info = self._parse_html5_media_entries(url, webpage, video_id)[0] 30 | 31 | info.update({ 32 | 'id': video_id, 33 | 'title': title, 34 | 'age_limit': 18, 35 | }) 36 | return info 37 | -------------------------------------------------------------------------------- /yt_dlp/extractor/lovehomeporn.py: -------------------------------------------------------------------------------- 1 | from .nuevo import NuevoBaseIE 2 | 3 | 4 | class LoveHomePornIE(NuevoBaseIE): 5 | _VALID_URL = r'https?://(?:www\.)?lovehomeporn\.com/video/(?P\d+)(?:/(?P[^/?#&]+))?' 6 | _TEST = { 7 | 'url': 'http://lovehomeporn.com/video/48483/stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick#menu', 8 | 'info_dict': { 9 | 'id': '48483', 10 | 'display_id': 'stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick', 11 | 'ext': 'mp4', 12 | 'title': 'Stunning busty brunette girlfriend sucking and riding a big dick', 13 | 'age_limit': 18, 14 | 'duration': 238.47, 15 | }, 16 | 'params': { 17 | 'skip_download': True, 18 | } 19 | } 20 | 21 | def _real_extract(self, url): 22 | mobj = self._match_valid_url(url) 23 | video_id = mobj.group('id') 24 | display_id = mobj.group('display_id') 25 | 26 | info = self._extract_nuevo( 27 | 'http://lovehomeporn.com/media/nuevo/config.php?key=%s' % video_id, 28 | video_id) 29 | info.update({ 30 | 'display_id': display_id, 31 | 'age_limit': 18 32 | }) 33 | return info 34 | -------------------------------------------------------------------------------- /yt_dlp/extractor/gputechconf.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class GPUTechConfIE(InfoExtractor): 5 | _VALID_URL = r'https?://on-demand\.gputechconf\.com/gtc/2015/video/S(?P\d+)\.html' 6 | _TEST = { 7 | 'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html', 8 | 'md5': 'a8862a00a0fd65b8b43acc5b8e33f798', 9 | 'info_dict': { 10 | 'id': '5156', 11 | 'ext': 'mp4', 12 | 'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis', 13 | 'duration': 1219, 14 | } 15 | } 16 | 17 | def _real_extract(self, url): 18 | video_id = self._match_id(url) 19 | webpage = self._download_webpage(url, video_id) 20 | 21 | root_path = self._search_regex( 22 | r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 23 | default='http://evt.dispeak.com/nvidia/events/gtc15/') 24 | xml_file_id = self._search_regex( 25 | r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id') 26 | 27 | return { 28 | '_type': 'url_transparent', 29 | 'id': video_id, 30 | 'url': '%sxml/%s.xml' % (root_path, xml_file_id), 31 | 'ie_key': 'DigitallySpeaking', 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /.github/workflows/core.yml: -------------------------------------------------------------------------------- 1 | name: Core Tests 2 | on: [push, pull_request] 3 | permissions: 4 | contents: read 5 | 6 | jobs: 7 | tests: 8 | name: Core Tests 9 | if: "!contains(github.event.head_commit.message, 'ci skip')" 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | os: [ubuntu-latest] 15 | # CPython 3.11 is in quick-test 16 | python-version: ['3.8', '3.9', '3.10', pypy-3.7, pypy-3.8] 17 | run-tests-ext: [sh] 18 | include: 19 | # atleast one of each CPython/PyPy tests must be in windows 20 | - os: windows-latest 21 | python-version: '3.7' 22 | run-tests-ext: bat 23 | - os: windows-latest 24 | python-version: pypy-3.9 25 | run-tests-ext: bat 26 | steps: 27 | - uses: actions/checkout@v3 28 | - name: Set up Python ${{ matrix.python-version }} 29 | uses: actions/setup-python@v4 30 | with: 31 | python-version: ${{ matrix.python-version }} 32 | - name: Install pytest 33 | run: pip install pytest 34 | - name: Run tests 35 | continue-on-error: False 36 | run: | 37 | python3 -m yt_dlp -v || true # Print debug head 38 | ./devscripts/run_tests.${{ matrix.run-tests-ext }} core 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/maoritv.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class MaoriTVIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P[^/?&#]+)' 6 | _TEST = { 7 | 'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54', 8 | 'md5': '5ade8ef53851b6a132c051b1cd858899', 9 | 'info_dict': { 10 | 'id': '4774724855001', 11 | 'ext': 'mp4', 12 | 'title': 'Kōrero Mai, Series 1 Episode 54', 13 | 'upload_date': '20160226', 14 | 'timestamp': 1456455018, 15 | 'description': 'md5:59bde32fd066d637a1a55794c56d8dcb', 16 | 'uploader_id': '1614493167001', 17 | }, 18 | } 19 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s' 20 | 21 | def _real_extract(self, url): 22 | display_id = self._match_id(url) 23 | webpage = self._download_webpage(url, display_id) 24 | brightcove_id = self._search_regex( 25 | r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id') 26 | return self.url_result( 27 | self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 28 | 'BrightcoveNew', brightcove_id) 29 | -------------------------------------------------------------------------------- /yt_dlp/extractor/nuevo.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | from ..utils import ( 4 | float_or_none, 5 | xpath_text 6 | ) 7 | 8 | 9 | class NuevoBaseIE(InfoExtractor): 10 | def _extract_nuevo(self, config_url, video_id, headers={}): 11 | config = self._download_xml( 12 | config_url, video_id, transform_source=lambda s: s.strip(), 13 | headers=headers) 14 | 15 | title = xpath_text(config, './title', 'title', fatal=True).strip() 16 | video_id = xpath_text(config, './mediaid', default=video_id) 17 | thumbnail = xpath_text(config, ['./image', './thumb']) 18 | duration = float_or_none(xpath_text(config, './duration')) 19 | 20 | formats = [] 21 | for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')): 22 | video_url = xpath_text(config, element_name) 23 | if video_url: 24 | formats.append({ 25 | 'url': video_url, 26 | 'format_id': format_id, 27 | }) 28 | self._check_formats(formats, video_id) 29 | 30 | return { 31 | 'id': video_id, 32 | 'title': title, 33 | 'thumbnail': thumbnail, 34 | 'duration': duration, 35 | 'formats': formats 36 | } 37 | -------------------------------------------------------------------------------- /yt_dlp/extractor/unity.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from .youtube import YoutubeIE 3 | 4 | 5 | class UnityIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?unity3d\.com/learn/tutorials/(?:[^/]+/)*(?P[^/?#&]+)' 7 | _TESTS = [{ 8 | 'url': 'https://unity3d.com/learn/tutorials/topics/animation/animate-anything-mecanim', 9 | 'info_dict': { 10 | 'id': 'jWuNtik0C8E', 11 | 'ext': 'mp4', 12 | 'title': 'Live Training 22nd September 2014 - Animate Anything', 13 | 'description': 'md5:e54913114bd45a554c56cdde7669636e', 14 | 'duration': 2893, 15 | 'uploader': 'Unity', 16 | 'uploader_id': 'Unity3D', 17 | 'upload_date': '20140926', 18 | } 19 | }, { 20 | 'url': 'https://unity3d.com/learn/tutorials/projects/2d-ufo-tutorial/following-player-camera?playlist=25844', 21 | 'only_matching': True, 22 | }] 23 | 24 | def _real_extract(self, url): 25 | video_id = self._match_id(url) 26 | webpage = self._download_webpage(url, video_id) 27 | youtube_id = self._search_regex( 28 | r'data-video-id="([_0-9a-zA-Z-]+)"', 29 | webpage, 'youtube ID') 30 | return self.url_result(youtube_id, ie=YoutubeIE.ie_key(), video_id=video_id) 31 | -------------------------------------------------------------------------------- /yt_dlp/extractor/hentaistigma.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class HentaiStigmaIE(InfoExtractor): 5 | _VALID_URL = r'^https?://hentai\.animestigma\.com/(?P[^/]+)' 6 | _TEST = { 7 | 'url': 'http://hentai.animestigma.com/inyouchuu-etsu-bonus/', 8 | 'md5': '4e3d07422a68a4cc363d8f57c8bf0d23', 9 | 'info_dict': { 10 | 'id': 'inyouchuu-etsu-bonus', 11 | 'ext': 'mp4', 12 | 'title': 'Inyouchuu Etsu Bonus', 13 | 'age_limit': 18, 14 | } 15 | } 16 | 17 | def _real_extract(self, url): 18 | video_id = self._match_id(url) 19 | 20 | webpage = self._download_webpage(url, video_id) 21 | 22 | title = self._html_search_regex( 23 | r']+class="posttitle"[^>]*>]*>([^<]+)', 24 | webpage, 'title') 25 | wrap_url = self._html_search_regex( 26 | r']+src="([^"]+mp4)"', webpage, 'wrapper url') 27 | wrap_webpage = self._download_webpage(wrap_url, video_id) 28 | 29 | video_url = self._html_search_regex( 30 | r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url') 31 | 32 | return { 33 | 'id': video_id, 34 | 'url': video_url, 35 | 'title': title, 36 | 'age_limit': 18, 37 | } 38 | -------------------------------------------------------------------------------- /yt_dlp/extractor/googlesearch.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import re 3 | 4 | from .common import SearchInfoExtractor 5 | 6 | 7 | class GoogleSearchIE(SearchInfoExtractor): 8 | IE_DESC = 'Google Video search' 9 | IE_NAME = 'video.google:search' 10 | _SEARCH_KEY = 'gvsearch' 11 | _TESTS = [{ 12 | 'url': 'gvsearch15:python language', 13 | 'info_dict': { 14 | 'id': 'python language', 15 | 'title': 'python language', 16 | }, 17 | 'playlist_count': 15, 18 | }] 19 | _PAGE_SIZE = 100 20 | 21 | def _search_results(self, query): 22 | for pagenum in itertools.count(): 23 | webpage = self._download_webpage( 24 | 'http://www.google.com/search', f'gvsearch:{query}', 25 | note=f'Downloading result page {pagenum + 1}', 26 | query={ 27 | 'tbm': 'vid', 28 | 'q': query, 29 | 'start': pagenum * self._PAGE_SIZE, 30 | 'num': self._PAGE_SIZE, 31 | 'hl': 'en', 32 | }) 33 | 34 | for url in re.findall(r']* class="dXiKIc"[^>]*>\d+)' 7 | _TESTS = [{ 8 | 'url': 'https://monster-siren.hypergryph.com/music/514562', 9 | 'info_dict': { 10 | 'id': '514562', 11 | 'ext': 'wav', 12 | 'artist': ['塞壬唱片-MSR'], 13 | 'album': 'Flame Shadow', 14 | 'title': 'Flame Shadow', 15 | } 16 | }] 17 | 18 | def _real_extract(self, url): 19 | audio_id = self._match_id(url) 20 | webpage = self._download_webpage(url, audio_id) 21 | json_data = self._search_json( 22 | r'window\.g_initialProps\s*=', webpage, 'data', audio_id, transform_source=js_to_json) 23 | 24 | return { 25 | 'id': audio_id, 26 | 'title': traverse_obj(json_data, ('player', 'songDetail', 'name')), 27 | 'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')), 28 | 'ext': 'wav', 29 | 'vcodec': 'none', 30 | 'artist': traverse_obj(json_data, ('player', 'songDetail', 'artists')), 31 | 'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name')) 32 | } 33 | -------------------------------------------------------------------------------- /devscripts/generate_aes_testdata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Allow direct execution 4 | import os 5 | import sys 6 | 7 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 8 | 9 | 10 | import codecs 11 | import subprocess 12 | 13 | from yt_dlp.aes import aes_encrypt, key_expansion 14 | from yt_dlp.utils import intlist_to_bytes 15 | 16 | secret_msg = b'Secret message goes here' 17 | 18 | 19 | def hex_str(int_list): 20 | return codecs.encode(intlist_to_bytes(int_list), 'hex') 21 | 22 | 23 | def openssl_encode(algo, key, iv): 24 | cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)] 25 | prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 26 | out, _ = prog.communicate(secret_msg) 27 | return out 28 | 29 | 30 | iv = key = [0x20, 0x15] + 14 * [0] 31 | 32 | r = openssl_encode('aes-128-cbc', key, iv) 33 | print('aes_cbc_decrypt') 34 | print(repr(r)) 35 | 36 | password = key 37 | new_key = aes_encrypt(password, key_expansion(password)) 38 | r = openssl_encode('aes-128-ctr', new_key, iv) 39 | print('aes_decrypt_text 16') 40 | print(repr(r)) 41 | 42 | password = key + 16 * [0] 43 | new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16) 44 | r = openssl_encode('aes-256-ctr', new_key, iv) 45 | print('aes_decrypt_text 32') 46 | print(repr(r)) 47 | -------------------------------------------------------------------------------- /yt_dlp/extractor/atscaleconf.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class AtScaleConfEventIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:www\.)?atscaleconference\.com/events/(?P[^/&$?]+)' 8 | 9 | _TESTS = [{ 10 | 'url': 'https://atscaleconference.com/events/data-scale-spring-2022/', 11 | 'playlist_mincount': 13, 12 | 'info_dict': { 13 | 'id': 'data-scale-spring-2022', 14 | 'title': 'Data @Scale Spring 2022', 15 | 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' 16 | }, 17 | }, { 18 | 'url': 'https://atscaleconference.com/events/video-scale-2021/', 19 | 'playlist_mincount': 14, 20 | 'info_dict': { 21 | 'id': 'video-scale-2021', 22 | 'title': 'Video @Scale 2021', 23 | 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' 24 | }, 25 | }] 26 | 27 | def _real_extract(self, url): 28 | id = self._match_id(url) 29 | webpage = self._download_webpage(url, id) 30 | 31 | return self.playlist_from_matches( 32 | re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage), 33 | ie='Generic', playlist_id=id, 34 | title=self._og_search_title(webpage), description=self._og_search_description(webpage)) 35 | -------------------------------------------------------------------------------- /yt_dlp/extractor/defense.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class DefenseGouvFrIE(InfoExtractor): 5 | IE_NAME = 'defense.gouv.fr' 6 | _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P[^/?#]*)' 7 | 8 | _TEST = { 9 | 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', 10 | 'md5': '75bba6124da7e63d2d60b5244ec9430c', 11 | 'info_dict': { 12 | 'id': '11213', 13 | 'ext': 'mp4', 14 | 'title': 'attaque-chimique-syrienne-du-21-aout-2013-1' 15 | } 16 | } 17 | 18 | def _real_extract(self, url): 19 | title = self._match_id(url) 20 | webpage = self._download_webpage(url, title) 21 | 22 | video_id = self._search_regex( 23 | r"flashvars.pvg_id=\"(\d+)\";", 24 | webpage, 'ID') 25 | 26 | json_url = ( 27 | 'http://static.videos.gouv.fr/brightcovehub/export/json/%s' % 28 | video_id) 29 | info = self._download_json(json_url, title, 'Downloading JSON config') 30 | video_url = info['renditions'][0]['url'] 31 | 32 | return { 33 | 'id': video_id, 34 | 'ext': 'mp4', 35 | 'url': video_url, 36 | 'title': title, 37 | } 38 | -------------------------------------------------------------------------------- /yt_dlp/extractor/adobeconnect.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..compat import ( 3 | compat_parse_qs, 4 | compat_urlparse, 5 | ) 6 | 7 | 8 | class AdobeConnectIE(InfoExtractor): 9 | _VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P[\w-]+)' 10 | 11 | def _real_extract(self, url): 12 | video_id = self._match_id(url) 13 | webpage = self._download_webpage(url, video_id) 14 | title = self._html_extract_title(webpage) 15 | qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) 16 | is_live = qs.get('isLive', ['false'])[0] == 'true' 17 | formats = [] 18 | for con_string in qs['conStrings'][0].split(','): 19 | formats.append({ 20 | 'format_id': con_string.split('://')[0], 21 | 'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]), 22 | 'ext': 'flv', 23 | 'play_path': 'mp4:' + qs['streamName'][0], 24 | 'rtmp_conn': 'S:' + qs['ticket'][0], 25 | 'rtmp_live': is_live, 26 | 'url': con_string, 27 | }) 28 | 29 | return { 30 | 'id': video_id, 31 | 'title': title, 32 | 'formats': formats, 33 | 'is_live': is_live, 34 | } 35 | -------------------------------------------------------------------------------- /yt_dlp/extractor/rottentomatoes.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from .internetvideoarchive import InternetVideoArchiveIE 3 | 4 | 5 | class RottenTomatoesIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P\d+)' 7 | 8 | _TEST = { 9 | 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', 10 | 'info_dict': { 11 | 'id': '11028566', 12 | 'ext': 'mp4', 13 | 'title': 'Toy Story 3', 14 | 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', 15 | 'thumbnail': r're:^https?://.*\.jpg$', 16 | }, 17 | } 18 | 19 | def _real_extract(self, url): 20 | video_id = self._match_id(url) 21 | webpage = self._download_webpage(url, video_id) 22 | iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id') 23 | 24 | return { 25 | '_type': 'url_transparent', 26 | 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id, 27 | 'ie_key': InternetVideoArchiveIE.ie_key(), 28 | 'id': video_id, 29 | 'title': self._og_search_title(webpage), 30 | } 31 | -------------------------------------------------------------------------------- /yt_dlp/extractor/bandaichannel.py: -------------------------------------------------------------------------------- 1 | from .brightcove import BrightcoveNewBaseIE 2 | from ..utils import extract_attributes 3 | 4 | 5 | class BandaiChannelIE(BrightcoveNewBaseIE): 6 | IE_NAME = 'bandaichannel' 7 | _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P\d+/\d+)' 8 | _TESTS = [{ 9 | 'url': 'https://www.b-ch.com/titles/514/001', 10 | 'md5': 'a0f2d787baa5729bed71108257f613a4', 11 | 'info_dict': { 12 | 'id': '6128044564001', 13 | 'ext': 'mp4', 14 | 'title': 'メタルファイターMIKU 第1話', 15 | 'timestamp': 1580354056, 16 | 'uploader_id': '5797077852001', 17 | 'upload_date': '20200130', 18 | 'duration': 1387.733, 19 | }, 20 | 'params': { 21 | 'skip_download': True, 22 | }, 23 | }] 24 | 25 | def _real_extract(self, url): 26 | video_id = self._match_id(url) 27 | webpage = self._download_webpage(url, video_id) 28 | attrs = extract_attributes(self._search_regex( 29 | r'(]+\bid="bcplayer"[^>]*>)', webpage, 'player')) 30 | bc = self._download_json( 31 | 'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'], 32 | video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc'] 33 | return self._parse_brightcove_metadata(bc, bc['id']) 34 | -------------------------------------------------------------------------------- /yt_dlp/extractor/syvdk.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import traverse_obj 3 | 4 | 5 | class SYVDKIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?24syv\.dk/episode/(?P[\w-]+)' 7 | 8 | _TESTS = [{ 9 | 'url': 'https://24syv.dk/episode/isabella-arendt-stiller-op-for-de-konservative-2', 10 | 'md5': '429ce5a423dd4b1e1d0bf3a569558089', 11 | 'info_dict': { 12 | 'id': '12215', 13 | 'display_id': 'isabella-arendt-stiller-op-for-de-konservative-2', 14 | 'ext': 'mp3', 15 | 'title': 'Isabella Arendt stiller op for De Konservative', 16 | 'description': 'md5:f5fa6a431813bf37284f3412ad7c6c06' 17 | } 18 | }] 19 | 20 | def _real_extract(self, url): 21 | video_id = self._match_id(url) 22 | webpage = self._download_webpage(url, video_id) 23 | info_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['episodeDetails'][0] 24 | 25 | return { 26 | 'id': str(info_data['id']), 27 | 'vcodec': 'none', 28 | 'ext': 'mp3', 29 | 'url': info_data['details']['enclosure'], 30 | 'display_id': video_id, 31 | 'title': traverse_obj(info_data, ('title', 'rendered')), 32 | 'description': traverse_obj(info_data, ('details', 'post_title')), 33 | } 34 | -------------------------------------------------------------------------------- /devscripts/lazy_load_template.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import random 3 | import re 4 | 5 | from ..utils import ( 6 | age_restricted, 7 | bug_reports_message, 8 | classproperty, 9 | write_string, 10 | ) 11 | 12 | # These bloat the lazy_extractors, so allow them to passthrough silently 13 | ALLOWED_CLASSMETHODS = {'extract_from_webpage', 'get_testcases', 'get_webpage_testcases'} 14 | _WARNED = False 15 | 16 | 17 | class LazyLoadMetaClass(type): 18 | def __getattr__(cls, name): 19 | global _WARNED 20 | if ('_real_class' not in cls.__dict__ 21 | and name not in ALLOWED_CLASSMETHODS and not _WARNED): 22 | _WARNED = True 23 | write_string('WARNING: Falling back to normal extractor since lazy extractor ' 24 | f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n') 25 | return getattr(cls.real_class, name) 26 | 27 | 28 | class LazyLoadExtractor(metaclass=LazyLoadMetaClass): 29 | @classproperty 30 | def real_class(cls): 31 | if '_real_class' not in cls.__dict__: 32 | cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__) 33 | return cls._real_class 34 | 35 | def __new__(cls, *args, **kwargs): 36 | instance = cls.real_class.__new__(cls.real_class) 37 | instance.__init__(*args, **kwargs) 38 | return instance 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/lci.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class LCIIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P\d+)\.html' 6 | _TESTS = [{ 7 | 'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', 8 | 'info_dict': { 9 | 'id': '13875948', 10 | 'ext': 'mp4', 11 | 'title': 'md5:660df5481fd418bc3bbb0d070e6fdb5a', 12 | 'thumbnail': 'https://photos.tf1.fr/1280/720/presidentielle-2022-marine-le-pen-et-emmanuel-macron-invites-de-lci-ce-vendredi-9c0e73-e1a036-0@1x.jpg', 13 | 'upload_date': '20220422', 14 | 'duration': 33, 15 | }, 16 | 'params': { 17 | 'skip_download': True, 18 | }, 19 | }, { 20 | 'url': 'https://www.lci.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', 21 | 'only_matching': True, 22 | }] 23 | 24 | def _real_extract(self, url): 25 | video_id = self._match_id(url) 26 | webpage = self._download_webpage(url, video_id) 27 | wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id') 28 | return self.url_result('wat:' + wat_id, 'Wat', wat_id) 29 | -------------------------------------------------------------------------------- /yt_dlp/extractor/europeantour.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class EuropeanTourIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:www\.)?europeantour\.com/dpworld-tour/news/video/(?P[^/&?#$]+)' 8 | 9 | _TESTS = [{ 10 | 'url': 'https://www.europeantour.com/dpworld-tour/news/video/the-best-shots-of-the-2021-seasons/', 11 | 'info_dict': { 12 | 'id': '6287788195001', 13 | 'ext': 'mp4', 14 | 'title': 'The best shots of the 2021 seasons', 15 | 'duration': 2416.512, 16 | 'timestamp': 1640010141, 17 | 'uploader_id': '5136026580001', 18 | 'tags': ['prod-imported'], 19 | 'thumbnail': 'md5:fdac52bc826548860edf8145ee74e71a', 20 | 'upload_date': '20211220' 21 | }, 22 | 'params': {'skip_download': True} 23 | }] 24 | 25 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' 26 | 27 | def _real_extract(self, url): 28 | id = self._match_id(url) 29 | webpage = self._download_webpage(url, id) 30 | vid, aid = re.search(r'(?s)brightcove-player\s?video-id="([^"]+)".*"ACCOUNT_ID":"([^"]+)"', webpage).groups() 31 | if not aid: 32 | aid = '5136026580001' 33 | return self.url_result( 34 | self.BRIGHTCOVE_URL_TEMPLATE % (aid, vid), 'BrightcoveNew') 35 | -------------------------------------------------------------------------------- /yt_dlp/extractor/bfi.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from .common import InfoExtractor 4 | from ..utils import extract_attributes 5 | 6 | 7 | class BFIPlayerIE(InfoExtractor): 8 | IE_NAME = 'bfi:player' 9 | _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P[\w-]+)-online' 10 | _TEST = { 11 | 'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online', 12 | 'md5': 'e8783ebd8e061ec4bc6e9501ed547de8', 13 | 'info_dict': { 14 | 'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63', 15 | 'ext': 'mp4', 16 | 'title': 'Computer Doctor', 17 | 'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b', 18 | }, 19 | 'skip': 'BFI Player films cannot be played outside of the UK', 20 | } 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | webpage = self._download_webpage(url, video_id) 25 | entries = [] 26 | for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage): 27 | player_attr = extract_attributes(player_el) 28 | ooyala_id = player_attr.get('data-video-id') 29 | if not ooyala_id: 30 | continue 31 | entries.append(self.url_result( 32 | 'ooyala:' + ooyala_id, 'Ooyala', 33 | ooyala_id, player_attr.get('data-label'))) 34 | return self.playlist_result(entries) 35 | -------------------------------------------------------------------------------- /yt_dlp/extractor/ebay.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import remove_end 3 | 4 | 5 | class EbayIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?ebay\.com/itm/(?P\d+)' 7 | _TESTS = [{ 8 | 'url': 'https://www.ebay.com/itm/194509326719', 9 | 'info_dict': { 10 | 'id': '194509326719', 11 | 'ext': 'mp4', 12 | 'title': 'WiFi internal antenna adhesive for wifi 2.4GHz wifi 5 wifi 6 wifi 6E full bands', 13 | }, 14 | 'params': {'skip_download': 'm3u8'} 15 | }] 16 | 17 | def _real_extract(self, url): 18 | video_id = self._match_id(url) 19 | webpage = self._download_webpage(url, video_id) 20 | 21 | video_json = self._search_json(r'"video":', webpage, 'video json', video_id) 22 | 23 | formats = [] 24 | for key, url in video_json['playlistMap'].items(): 25 | if key == 'HLS': 26 | formats.extend(self._extract_m3u8_formats(url, video_id, fatal=False)) 27 | elif key == 'DASH': 28 | formats.extend(self._extract_mpd_formats(url, video_id, fatal=False)) 29 | else: 30 | self.report_warning(f'Unsupported format {key}', video_id) 31 | 32 | return { 33 | 'id': video_id, 34 | 'title': remove_end(self._html_extract_title(webpage), ' | eBay'), 35 | 'formats': formats 36 | } 37 | -------------------------------------------------------------------------------- /yt_dlp/extractor/helsinki.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import js_to_json 3 | 4 | 5 | class HelsinkiIE(InfoExtractor): 6 | IE_DESC = 'helsinki.fi' 7 | _VALID_URL = r'https?://video\.helsinki\.fi/Arkisto/flash\.php\?id=(?P\d+)' 8 | _TEST = { 9 | 'url': 'http://video.helsinki.fi/Arkisto/flash.php?id=20258', 10 | 'info_dict': { 11 | 'id': '20258', 12 | 'ext': 'mp4', 13 | 'title': 'Tietotekniikkafoorumi-iltapäivä', 14 | 'description': 'md5:f5c904224d43c133225130fe156a5ee0', 15 | }, 16 | 'params': { 17 | 'skip_download': True, # RTMP 18 | } 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | webpage = self._download_webpage(url, video_id) 24 | 25 | params = self._parse_json(self._html_search_regex( 26 | r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);', 27 | webpage, 'player code'), video_id, transform_source=js_to_json) 28 | formats = [{ 29 | 'url': s['file'], 30 | 'ext': 'mp4', 31 | } for s in params['sources']] 32 | 33 | return { 34 | 'id': video_id, 35 | 'title': self._og_search_title(webpage).replace('Video: ', ''), 36 | 'description': self._og_search_description(webpage), 37 | 'formats': formats, 38 | } 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/echomsk.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class EchoMskIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P\d+)' 8 | _TEST = { 9 | 'url': 'http://www.echo.msk.ru/sounds/1464134.html', 10 | 'md5': '2e44b3b78daff5b458e4dbc37f191f7c', 11 | 'info_dict': { 12 | 'id': '1464134', 13 | 'ext': 'mp3', 14 | 'title': 'Особое мнение - 29 декабря 2014, 19:08', 15 | }, 16 | } 17 | 18 | def _real_extract(self, url): 19 | video_id = self._match_id(url) 20 | 21 | webpage = self._download_webpage(url, video_id) 22 | 23 | audio_url = self._search_regex( 24 | r'', webpage, 'audio URL') 25 | 26 | title = self._html_search_regex( 27 | r'([^<]+)', 28 | webpage, 'title') 29 | 30 | air_date = self._html_search_regex( 31 | r'(?s)
(.+?)
', 32 | webpage, 'date', fatal=False, default=None) 33 | 34 | if air_date: 35 | air_date = re.sub(r'(\s)\1+', r'\1', air_date) 36 | if air_date: 37 | title = '%s - %s' % (title, air_date) 38 | 39 | return { 40 | 'id': video_id, 41 | 'url': audio_url, 42 | 'title': title, 43 | } 44 | -------------------------------------------------------------------------------- /yt_dlp/extractor/restudy.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class RestudyIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P[0-9]+)' 6 | _TESTS = [{ 7 | 'url': 'https://www.restudy.dk/video/play/id/1637', 8 | 'info_dict': { 9 | 'id': '1637', 10 | 'ext': 'flv', 11 | 'title': 'Leiden-frosteffekt', 12 | 'description': 'Denne video er et eksperiment med flydende kvælstof.', 13 | }, 14 | 'params': { 15 | # rtmp download 16 | 'skip_download': True, 17 | } 18 | }, { 19 | 'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637', 20 | 'only_matching': True, 21 | }] 22 | 23 | def _real_extract(self, url): 24 | video_id = self._match_id(url) 25 | 26 | webpage = self._download_webpage(url, video_id) 27 | 28 | title = self._og_search_title(webpage).strip() 29 | description = self._og_search_description(webpage).strip() 30 | 31 | formats = self._extract_smil_formats( 32 | 'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id, 33 | video_id) 34 | 35 | return { 36 | 'id': video_id, 37 | 'title': title, 38 | 'description': description, 39 | 'formats': formats, 40 | } 41 | -------------------------------------------------------------------------------- /test/parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "check_formats": false, 3 | "consoletitle": false, 4 | "continuedl": true, 5 | "forcedescription": false, 6 | "forcefilename": false, 7 | "forceformat": false, 8 | "forcethumbnail": false, 9 | "forcetitle": false, 10 | "forceurl": false, 11 | "force_write_download_archive": false, 12 | "format": "b/bv", 13 | "ignoreerrors": false, 14 | "listformats": null, 15 | "logtostderr": false, 16 | "matchtitle": null, 17 | "max_downloads": null, 18 | "overwrites": null, 19 | "nopart": false, 20 | "noprogress": false, 21 | "outtmpl": "%(id)s.%(ext)s", 22 | "password": null, 23 | "playliststart": 1, 24 | "prefer_free_formats": false, 25 | "quiet": false, 26 | "ratelimit": null, 27 | "rejecttitle": null, 28 | "retries": 10, 29 | "simulate": false, 30 | "subtitleslang": null, 31 | "subtitlesformat": "best", 32 | "test": true, 33 | "updatetime": true, 34 | "usenetrc": false, 35 | "username": null, 36 | "verbose": true, 37 | "writedescription": false, 38 | "writeinfojson": true, 39 | "writeannotations": false, 40 | "writelink": false, 41 | "writeurllink": false, 42 | "writewebloclink": false, 43 | "writedesktoplink": false, 44 | "writesubtitles": false, 45 | "allsubtitles": false, 46 | "listsubtitles": false, 47 | "fixup": "never", 48 | "allow_playlist_files": false 49 | } 50 | -------------------------------------------------------------------------------- /yt_dlp/extractor/commonmistakes.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import ExtractorError 3 | 4 | 5 | class CommonMistakesIE(InfoExtractor): 6 | IE_DESC = False # Do not list 7 | _VALID_URL = r'(?:url|URL|yt-dlp)$' 8 | 9 | _TESTS = [{ 10 | 'url': 'url', 11 | 'only_matching': True, 12 | }, { 13 | 'url': 'URL', 14 | 'only_matching': True, 15 | }] 16 | 17 | def _real_extract(self, url): 18 | msg = ( 19 | 'You\'ve asked yt-dlp to download the URL "%s". ' 20 | 'That doesn\'t make any sense. ' 21 | 'Simply remove the parameter in your command or configuration.' 22 | ) % url 23 | if not self.get_param('verbose'): 24 | msg += ' Add -v to the command line to see what arguments and configuration yt-dlp has' 25 | raise ExtractorError(msg, expected=True) 26 | 27 | 28 | class UnicodeBOMIE(InfoExtractor): 29 | IE_DESC = False 30 | _VALID_URL = r'(?P\ufeff)(?P.*)$' 31 | 32 | _TESTS = [{ 33 | 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', 34 | 'only_matching': True, 35 | }] 36 | 37 | def _real_extract(self, url): 38 | real_url = self._match_id(url) 39 | self.report_warning( 40 | 'Your URL starts with a Byte Order Mark (BOM). ' 41 | 'Removing the BOM and looking for "%s" ...' % real_url) 42 | return self.url_result(real_url) 43 | -------------------------------------------------------------------------------- /yt_dlp/extractor/moviezine.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class MoviezineIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P[^?#]+)' 6 | 7 | _TEST = { 8 | 'url': 'http://www.moviezine.se/video/205866', 9 | 'info_dict': { 10 | 'id': '205866', 11 | 'ext': 'mp4', 12 | 'title': 'Oculus - Trailer 1', 13 | 'description': 'md5:40cc6790fc81d931850ca9249b40e8a4', 14 | 'thumbnail': r're:http://.*\.jpg', 15 | }, 16 | } 17 | 18 | def _real_extract(self, url): 19 | mobj = self._match_valid_url(url) 20 | video_id = mobj.group('id') 21 | 22 | webpage = self._download_webpage(url, video_id) 23 | jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player') 24 | 25 | formats = [{ 26 | 'format_id': 'sd', 27 | 'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'), 28 | 'quality': 0, 29 | 'ext': 'mp4', 30 | }] 31 | 32 | return { 33 | 'id': video_id, 34 | 'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'), 35 | 'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'), 36 | 'formats': formats, 37 | 'description': self._og_search_description(webpage), 38 | } 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/stretchinternet.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class StretchInternetIE(InfoExtractor): 5 | _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/(?:portal|full)\.htm\?.*?\beventId=(?P\d+)' 6 | _TEST = { 7 | 'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=573272&streamType=video', 8 | 'info_dict': { 9 | 'id': '573272', 10 | 'ext': 'mp4', 11 | 'title': 'UNIVERSITY OF MARY WRESTLING VS UPPER IOWA', 12 | # 'timestamp': 1575668361, 13 | # 'upload_date': '20191206', 14 | 'uploader_id': '99997', 15 | } 16 | } 17 | 18 | def _real_extract(self, url): 19 | video_id = self._match_id(url) 20 | 21 | media_url = self._download_json( 22 | 'https://core.stretchlive.com/trinity/event/tcg/' + video_id, 23 | video_id)[0]['media'][0]['url'] 24 | event = self._download_json( 25 | 'https://neo-client.stretchinternet.com/portal-ws/getEvent.json', 26 | video_id, query={'eventID': video_id, 'token': 'asdf'})['event'] 27 | 28 | return { 29 | 'id': video_id, 30 | 'title': event['title'], 31 | # TODO: parse US timezone abbreviations 32 | # 'timestamp': event.get('dateTimeString'), 33 | 'url': 'https://' + media_url, 34 | 'uploader_id': event.get('ownerID'), 35 | } 36 | -------------------------------------------------------------------------------- /.github/workflows/release-nightly.yml: -------------------------------------------------------------------------------- 1 | name: Release (nightly) 2 | on: 3 | push: 4 | branches: 5 | - master 6 | paths: 7 | - "yt_dlp/**.py" 8 | - "!yt_dlp/version.py" 9 | concurrency: 10 | group: release-nightly 11 | cancel-in-progress: true 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | prepare: 17 | if: vars.BUILD_NIGHTLY != '' 18 | runs-on: ubuntu-latest 19 | outputs: 20 | version: ${{ steps.get_version.outputs.version }} 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Get version 25 | id: get_version 26 | run: | 27 | python devscripts/update-version.py "$(date -u +"%H%M%S")" | grep -Po "version=\d+(\.\d+){3}" >> "$GITHUB_OUTPUT" 28 | 29 | build: 30 | needs: prepare 31 | uses: ./.github/workflows/build.yml 32 | with: 33 | version: ${{ needs.prepare.outputs.version }} 34 | channel: nightly 35 | permissions: 36 | contents: read 37 | packages: write # For package cache 38 | secrets: 39 | GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} 40 | 41 | publish: 42 | needs: [prepare, build] 43 | uses: ./.github/workflows/publish.yml 44 | secrets: 45 | ARCHIVE_REPO_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }} 46 | permissions: 47 | contents: write 48 | with: 49 | nightly: true 50 | version: ${{ needs.prepare.outputs.version }} 51 | target_commitish: ${{ github.sha }} 52 | -------------------------------------------------------------------------------- /yt_dlp/extractor/bild.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import ( 3 | int_or_none, 4 | unescapeHTML, 5 | ) 6 | 7 | 8 | class BildIE(InfoExtractor): 9 | _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P[^/]+)-(?P\d+)(?:,auto=true)?\.bild\.html' 10 | IE_DESC = 'Bild.de' 11 | _TEST = { 12 | 'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html', 13 | 'md5': 'dd495cbd99f2413502a1713a1156ac8a', 14 | 'info_dict': { 15 | 'id': '38184146', 16 | 'ext': 'mp4', 17 | 'title': 'Das können die neuen iPads', 18 | 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f', 19 | 'thumbnail': r're:^https?://.*\.jpg$', 20 | 'duration': 196, 21 | } 22 | } 23 | 24 | def _real_extract(self, url): 25 | video_id = self._match_id(url) 26 | 27 | video_data = self._download_json( 28 | url.split('.bild.html')[0] + ',view=json.bild.html', video_id) 29 | 30 | return { 31 | 'id': video_id, 32 | 'title': unescapeHTML(video_data['title']).strip(), 33 | 'description': unescapeHTML(video_data.get('description')), 34 | 'url': video_data['clipList'][0]['srces'][0]['src'], 35 | 'thumbnail': video_data.get('poster'), 36 | 'duration': int_or_none(video_data.get('durationSec')), 37 | } 38 | -------------------------------------------------------------------------------- /yt_dlp/extractor/worldstarhiphop.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class WorldStarHipHopIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?.*?\bv=(?P[^&]+)' 6 | _TESTS = [{ 7 | 'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO', 8 | 'md5': '9d04de741161603bf7071bbf4e883186', 9 | 'info_dict': { 10 | 'id': 'wshh6a7q1ny0G34ZwuIO', 11 | 'ext': 'mp4', 12 | 'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!' 13 | } 14 | }, { 15 | 'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO', 16 | 'only_matching': True, 17 | }] 18 | 19 | def _real_extract(self, url): 20 | video_id = self._match_id(url) 21 | webpage = self._download_webpage(url, video_id) 22 | 23 | entries = self._parse_html5_media_entries(url, webpage, video_id) 24 | 25 | if not entries: 26 | return self.url_result(url, 'Generic') 27 | 28 | title = self._html_search_regex( 29 | [r'(?s)
\s*

(.*?)

', 30 | r']+class="tc-sp-pinned-title">(.*)'], 31 | webpage, 'title') 32 | 33 | info = entries[0] 34 | info.update({ 35 | 'id': video_id, 36 | 'title': title, 37 | }) 38 | return info 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/breitbart.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class BreitBartIE(InfoExtractor): 5 | _VALID_URL = r'https?:\/\/(?:www\.)breitbart.com/videos/v/(?P[^/]+)' 6 | _TESTS = [{ 7 | 'url': 'https://www.breitbart.com/videos/v/5cOz1yup/?pl=Ij6NDOji', 8 | 'md5': '0aa6d1d6e183ac5ca09207fe49f17ade', 9 | 'info_dict': { 10 | 'id': '5cOz1yup', 11 | 'ext': 'mp4', 12 | 'title': 'Watch \u2013 Clyburn: Statues in Congress Have to Go Because they Are Honoring Slavery', 13 | 'description': 'md5:bac35eb0256d1cb17f517f54c79404d5', 14 | 'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg', 15 | 'age_limit': 0, 16 | } 17 | }, { 18 | 'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/', 19 | 'only_matching': True, 20 | }] 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | webpage = self._download_webpage(url, video_id) 25 | 26 | formats = self._extract_m3u8_formats(f'https://cdn.jwplayer.com/manifests/{video_id}.m3u8', video_id, ext='mp4') 27 | return { 28 | 'id': video_id, 29 | 'title': self._generic_title('', webpage), 30 | 'description': self._og_search_description(webpage), 31 | 'thumbnail': self._og_search_thumbnail(webpage), 32 | 'age_limit': self._rta_search(webpage), 33 | 'formats': formats 34 | } 35 | -------------------------------------------------------------------------------- /yt_dlp/extractor/thestar.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class TheStarIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?thestar\.com/(?:[^/]+/)*(?P.+)\.html' 6 | _TEST = { 7 | 'url': 'http://www.thestar.com/life/2016/02/01/mankind-why-this-woman-started-a-men-s-skincare-line.html', 8 | 'md5': '2c62dd4db2027e35579fefb97a8b6554', 9 | 'info_dict': { 10 | 'id': '4732393888001', 11 | 'ext': 'mp4', 12 | 'title': 'Mankind: Why this woman started a men\'s skin care line', 13 | 'description': 'Robert Cribb talks to Young Lee, the founder of Uncle Peter\'s MAN.', 14 | 'uploader_id': '794267642001', 15 | 'timestamp': 1454353482, 16 | 'upload_date': '20160201', 17 | }, 18 | 'params': { 19 | # m3u8 download 20 | 'skip_download': True, 21 | } 22 | } 23 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/794267642001/default_default/index.html?videoId=%s' 24 | 25 | def _real_extract(self, url): 26 | display_id = self._match_id(url) 27 | webpage = self._download_webpage(url, display_id) 28 | brightcove_id = self._search_regex( 29 | r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)', 30 | webpage, 'brightcove id') 31 | return self.url_result( 32 | self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 33 | 'BrightcoveNew', brightcove_id) 34 | -------------------------------------------------------------------------------- /yt_dlp/extractor/bundesliga.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from .jwplatform import JWPlatformIE 3 | 4 | 5 | class BundesligaIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?bundesliga\.com/[a-z]{2}/bundesliga/videos(?:/[^?]+)?\?vid=(?P[a-zA-Z0-9]{8})' 7 | _TESTS = [ 8 | { 9 | 'url': 'https://www.bundesliga.com/en/bundesliga/videos?vid=bhhHkKyN', 10 | 'md5': '8fc3b25cd12440e3a8cdc51f1493849c', 11 | 'info_dict': { 12 | 'id': 'bhhHkKyN', 13 | 'ext': 'mp4', 14 | 'title': 'Watch: Alphonso Davies and Jeremie Frimpong head-to-head', 15 | 'thumbnail': 'https://cdn.jwplayer.com/v2/media/bhhHkKyN/poster.jpg?width=720', 16 | 'upload_date': '20220928', 17 | 'duration': 146, 18 | 'timestamp': 1664366511, 19 | 'description': 'md5:803d4411bd134140c774021dd4b7598b' 20 | } 21 | }, 22 | { 23 | 'url': 'https://www.bundesliga.com/en/bundesliga/videos/latest-features/T8IKc8TX?vid=ROHjs06G', 24 | 'only_matching': True 25 | }, 26 | { 27 | 'url': 'https://www.bundesliga.com/en/bundesliga/videos/goals?vid=mOG56vWA', 28 | 'only_matching': True 29 | } 30 | ] 31 | 32 | def _real_extract(self, url): 33 | video_id = self._match_id(url) 34 | return self.url_result(f'jwplatform:{video_id}', JWPlatformIE, video_id) 35 | -------------------------------------------------------------------------------- /yt_dlp/postprocessor/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F401 2 | 3 | from .common import PostProcessor 4 | from .embedthumbnail import EmbedThumbnailPP 5 | from .exec import ExecAfterDownloadPP, ExecPP 6 | from .ffmpeg import ( 7 | FFmpegConcatPP, 8 | FFmpegCopyStreamPP, 9 | FFmpegEmbedSubtitlePP, 10 | FFmpegExtractAudioPP, 11 | FFmpegFixupDuplicateMoovPP, 12 | FFmpegFixupDurationPP, 13 | FFmpegFixupM3u8PP, 14 | FFmpegFixupM4aPP, 15 | FFmpegFixupStretchedPP, 16 | FFmpegFixupTimestampPP, 17 | FFmpegMergerPP, 18 | FFmpegMetadataPP, 19 | FFmpegPostProcessor, 20 | FFmpegSplitChaptersPP, 21 | FFmpegSubtitlesConvertorPP, 22 | FFmpegThumbnailsConvertorPP, 23 | FFmpegVideoConvertorPP, 24 | FFmpegVideoRemuxerPP, 25 | ) 26 | from .metadataparser import ( 27 | MetadataFromFieldPP, 28 | MetadataFromTitlePP, 29 | MetadataParserPP, 30 | ) 31 | from .modify_chapters import ModifyChaptersPP 32 | from .movefilesafterdownload import MoveFilesAfterDownloadPP 33 | from .sponskrub import SponSkrubPP 34 | from .sponsorblock import SponsorBlockPP 35 | from .xattrpp import XAttrMetadataPP 36 | from ..plugins import load_plugins 37 | 38 | _PLUGIN_CLASSES = load_plugins('postprocessor', 'PP') 39 | 40 | 41 | def get_postprocessor(key): 42 | return globals()[key + 'PP'] 43 | 44 | 45 | globals().update(_PLUGIN_CLASSES) 46 | __all__ = [name for name in globals().keys() if name.endswith('PP')] 47 | __all__.extend(('PostProcessor', 'FFmpegPostProcessor')) 48 | -------------------------------------------------------------------------------- /yt_dlp/extractor/howcast.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import parse_iso8601 3 | 4 | 5 | class HowcastIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P\d+)' 7 | _TEST = { 8 | 'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', 9 | 'md5': '7d45932269a288149483144f01b99789', 10 | 'info_dict': { 11 | 'id': '390161', 12 | 'ext': 'mp4', 13 | 'title': 'How to Tie a Square Knot Properly', 14 | 'description': 'md5:dbe792e5f6f1489027027bf2eba188a3', 15 | 'timestamp': 1276081287, 16 | 'upload_date': '20100609', 17 | 'duration': 56.823, 18 | }, 19 | 'params': { 20 | 'skip_download': True, 21 | }, 22 | 'add_ie': ['Ooyala'], 23 | } 24 | 25 | def _real_extract(self, url): 26 | video_id = self._match_id(url) 27 | 28 | webpage = self._download_webpage(url, video_id) 29 | 30 | embed_code = self._search_regex( 31 | r']+src="[^"]+\bembed_code=([^\b]+)\b', 32 | webpage, 'ooyala embed code') 33 | 34 | return { 35 | '_type': 'url_transparent', 36 | 'ie_key': 'Ooyala', 37 | 'url': 'ooyala:%s' % embed_code, 38 | 'id': video_id, 39 | 'timestamp': parse_iso8601(self._html_search_meta( 40 | 'article:published_time', webpage, 'timestamp')), 41 | } 42 | -------------------------------------------------------------------------------- /yt_dlp/extractor/vh1.py: -------------------------------------------------------------------------------- 1 | from .mtv import MTVServicesInfoExtractor 2 | 3 | # TODO Remove - Reason: Outdated Site 4 | 5 | 6 | class VH1IE(MTVServicesInfoExtractor): 7 | IE_NAME = 'vh1.com' 8 | _FEED_URL = 'http://www.vh1.com/feeds/mrss/' 9 | _TESTS = [{ 10 | 'url': 'https://www.vh1.com/episodes/0aqivv/nick-cannon-presents-wild-n-out-foushee-season-16-ep-12', 11 | 'info_dict': { 12 | 'title': 'Fousheé', 13 | 'description': 'Fousheé joins Team Evolutions fight against Nick and Team Revolution in Baby Daddy, Baby Mama; Kick Em Out the Classroom; Backseat of My Ride and Wildstyle; and Fousheé performs.', 14 | }, 15 | 'playlist_mincount': 4, 16 | 'skip': '404 Not found', 17 | }, { 18 | # Clip 19 | 'url': 'https://www.vh1.com/video-clips/e0sja0/nick-cannon-presents-wild-n-out-foushee-clap-for-him', 20 | 'info_dict': { 21 | 'id': 'a07563f7-a37b-4e7f-af68-85855c2c7cc3', 22 | 'ext': 'mp4', 23 | 'title': 'Fousheé - "clap for him"', 24 | 'description': 'Singer Fousheé hits the Wild N Out: In the Dark stage with a performance of the tongue-in-cheek track "clap for him" from her 2021 album "time machine."', 25 | 'upload_date': '20210826', 26 | }, 27 | 'params': { 28 | # m3u8 download 29 | 'skip_download': True, 30 | }, 31 | }] 32 | 33 | _VALID_URL = r'https?://(?:www\.)?vh1\.com/(?:video-clips|episodes)/(?P[^/?#.]+)' 34 | -------------------------------------------------------------------------------- /devscripts/utils.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import functools 3 | import subprocess 4 | 5 | 6 | def read_file(fname): 7 | with open(fname, encoding='utf-8') as f: 8 | return f.read() 9 | 10 | 11 | def write_file(fname, content, mode='w'): 12 | with open(fname, mode, encoding='utf-8') as f: 13 | return f.write(content) 14 | 15 | 16 | def read_version(fname='yt_dlp/version.py'): 17 | """Get the version without importing the package""" 18 | exec(compile(read_file(fname), fname, 'exec')) 19 | return locals()['__version__'] 20 | 21 | 22 | def get_filename_args(has_infile=False, default_outfile=None): 23 | parser = argparse.ArgumentParser() 24 | if has_infile: 25 | parser.add_argument('infile', help='Input file') 26 | kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {} 27 | parser.add_argument('outfile', **kwargs, help='Output file') 28 | 29 | opts = parser.parse_args() 30 | if has_infile: 31 | return opts.infile, opts.outfile 32 | return opts.outfile 33 | 34 | 35 | def compose_functions(*functions): 36 | return lambda x: functools.reduce(lambda y, f: f(y), functions, x) 37 | 38 | 39 | def run_process(*args, **kwargs): 40 | kwargs.setdefault('text', True) 41 | kwargs.setdefault('check', True) 42 | kwargs.setdefault('capture_output', True) 43 | if kwargs['text']: 44 | kwargs.setdefault('encoding', 'utf-8') 45 | kwargs.setdefault('errors', 'replace') 46 | return subprocess.run(args, **kwargs) 47 | -------------------------------------------------------------------------------- /yt_dlp/extractor/nzz.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from .common import InfoExtractor 4 | from ..utils import ( 5 | extract_attributes, 6 | ) 7 | 8 | 9 | class NZZIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P\d+)' 11 | _TESTS = [{ 12 | 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153', 13 | 'info_dict': { 14 | 'id': '9153', 15 | }, 16 | 'playlist_mincount': 6, 17 | }, { 18 | 'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112', 19 | 'info_dict': { 20 | 'id': '1368112', 21 | }, 22 | 'playlist_count': 1, 23 | }] 24 | 25 | def _real_extract(self, url): 26 | page_id = self._match_id(url) 27 | webpage = self._download_webpage(url, page_id) 28 | 29 | entries = [] 30 | for player_element in re.findall( 31 | r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage): 32 | player_params = extract_attributes(player_element) 33 | if player_params.get('data-type') not in ('kaltura_singleArticle',): 34 | self.report_warning('Unsupported player type') 35 | continue 36 | entry_id = player_params['data-id'] 37 | entries.append(self.url_result( 38 | 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id)) 39 | 40 | return self.playlist_result(entries, page_id) 41 | -------------------------------------------------------------------------------- /yt_dlp/downloader/fc2.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | from .common import FileDownloader 4 | from .external import FFmpegFD 5 | 6 | 7 | class FC2LiveFD(FileDownloader): 8 | """ 9 | Downloads FC2 live without being stopped.
10 | Note, this is not a part of public API, and will be removed without notice. 11 | DO NOT USE 12 | """ 13 | 14 | def real_download(self, filename, info_dict): 15 | ws = info_dict['ws'] 16 | 17 | heartbeat_lock = threading.Lock() 18 | heartbeat_state = [None, 1] 19 | 20 | def heartbeat(): 21 | if heartbeat_state[1] < 0: 22 | return 23 | 24 | try: 25 | heartbeat_state[1] += 1 26 | ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1]) 27 | except Exception: 28 | self.to_screen('[fc2:live] Heartbeat failed') 29 | 30 | with heartbeat_lock: 31 | heartbeat_state[0] = threading.Timer(30, heartbeat) 32 | heartbeat_state[0]._daemonic = True 33 | heartbeat_state[0].start() 34 | 35 | heartbeat() 36 | 37 | new_info_dict = info_dict.copy() 38 | new_info_dict.update({ 39 | 'ws': None, 40 | 'protocol': 'live_ffmpeg', 41 | }) 42 | try: 43 | return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) 44 | finally: 45 | # stop heartbeating 46 | heartbeat_state[1] = -1 47 | -------------------------------------------------------------------------------- /yt_dlp/extractor/miaopai.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class MiaoPaiIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P[-A-Za-z0-9~_]+)' 6 | _TEST = { 7 | 'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm', 8 | 'md5': '095ed3f1cd96b821add957bdc29f845b', 9 | 'info_dict': { 10 | 'id': 'n~0hO7sfV1nBEw4Y29-Hqg__', 11 | 'ext': 'mp4', 12 | 'title': '西游记音乐会的秒拍视频', 13 | 'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg', 14 | } 15 | } 16 | 17 | _USER_AGENT_IPAD = 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1' 18 | 19 | def _real_extract(self, url): 20 | video_id = self._match_id(url) 21 | webpage = self._download_webpage( 22 | url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD}) 23 | 24 | title = self._html_extract_title(webpage) 25 | thumbnail = self._html_search_regex( 26 | r']+class=(?P[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P[\'"])(?P[^\'"]+)(?P=q2)', 27 | webpage, 'thumbnail', fatal=False, group='url') 28 | videos = self._parse_html5_media_entries(url, webpage, video_id) 29 | info = videos[0] 30 | 31 | info.update({ 32 | 'id': video_id, 33 | 'title': title, 34 | 'thumbnail': thumbnail, 35 | }) 36 | return info 37 | -------------------------------------------------------------------------------- /test/testdata/xspf/foo_xspf.xspf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 2018-03-09T18:01:43Z 4 | 5 | 6 | cd1/track%201.mp3 7 | Pandemonium 8 | Foilverb 9 | Visit http://bigbrother404.bandcamp.com 10 | Pandemonium EP 11 | 1 12 | 202416 13 | 14 | 15 | ../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3 16 | Final Cartridge (Nichico Twelve Remix) 17 | Visit http://bigbrother404.bandcamp.com 18 | Foilverb 19 | Pandemonium EP 20 | 2 21 | 255857 22 | 23 | 24 | track3.mp3 25 | https://example.com/track3.mp3 26 | Rebuilding Nightingale 27 | Visit http://bigbrother404.bandcamp.com 28 | Foilverb 29 | Pandemonium EP 30 | 3 31 | 287915 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /yt_dlp/extractor/academicearth.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class AcademicEarthCourseIE(InfoExtractor): 7 | _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P[^?#/]+)' 8 | IE_NAME = 'AcademicEarth:Course' 9 | _TEST = { 10 | 'url': 'http://academicearth.org/playlists/laws-of-nature/', 11 | 'info_dict': { 12 | 'id': 'laws-of-nature', 13 | 'title': 'Laws of Nature', 14 | 'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.', 15 | }, 16 | 'playlist_count': 3, 17 | } 18 | 19 | def _real_extract(self, url): 20 | playlist_id = self._match_id(url) 21 | 22 | webpage = self._download_webpage(url, playlist_id) 23 | title = self._html_search_regex( 24 | r'

]*?>(.*?)

', webpage, 'title') 25 | description = self._html_search_regex( 26 | r'

]*?>(.*?)

', 27 | webpage, 'description', fatal=False) 28 | urls = re.findall( 29 | r'
  • \s*?', 30 | webpage) 31 | entries = [self.url_result(u) for u in urls] 32 | 33 | return { 34 | '_type': 'playlist', 35 | 'id': playlist_id, 36 | 'title': title, 37 | 'description': description, 38 | 'entries': entries, 39 | } 40 | -------------------------------------------------------------------------------- /yt_dlp/extractor/cozytv.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import unified_strdate 3 | 4 | 5 | class CozyTVIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?cozy\.tv/(?P[^/]+)/replays/(?P[^/$#&?]+)' 7 | 8 | _TESTS = [{ 9 | 'url': 'https://cozy.tv/beardson/replays/2021-11-19_1', 10 | 'info_dict': { 11 | 'id': 'beardson-2021-11-19_1', 12 | 'ext': 'mp4', 13 | 'title': 'pokemon pt2', 14 | 'uploader': 'beardson', 15 | 'upload_date': '20211119', 16 | 'was_live': True, 17 | 'duration': 7981, 18 | }, 19 | 'params': {'skip_download': True} 20 | }] 21 | 22 | def _real_extract(self, url): 23 | uploader, date = self._match_valid_url(url).groups() 24 | id = f'{uploader}-{date}' 25 | data_json = self._download_json(f'https://api.cozy.tv/cache/{uploader}/replay/{date}', id) 26 | formats, subtitles = self._extract_m3u8_formats_and_subtitles( 27 | f'https://cozycdn.foxtrotstream.xyz/replays/{uploader}/{date}/index.m3u8', id, ext='mp4') 28 | return { 29 | 'id': id, 30 | 'title': data_json.get('title'), 31 | 'uploader': data_json.get('user') or uploader, 32 | 'upload_date': unified_strdate(data_json.get('date')), 33 | 'was_live': True, 34 | 'duration': data_json.get('duration'), 35 | 'formats': formats, 36 | 'subtitles': subtitles, 37 | } 38 | -------------------------------------------------------------------------------- /yt_dlp/extractor/hgtv.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class HGTVComShowIE(InfoExtractor): 5 | IE_NAME = 'hgtv.com:show' 6 | _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P[^/?#&]+)' 7 | _TESTS = [{ 8 | # data-module="video" 9 | 'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos', 10 | 'info_dict': { 11 | 'id': 'flip-or-flop-full-episodes-season-4-videos', 12 | 'title': 'Flip or Flop Full Episodes', 13 | }, 14 | 'playlist_mincount': 15, 15 | }, { 16 | # data-deferred-module="video" 17 | 'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift', 18 | 'only_matching': True, 19 | }] 20 | 21 | def _real_extract(self, url): 22 | display_id = self._match_id(url) 23 | 24 | webpage = self._download_webpage(url, display_id) 25 | 26 | config = self._parse_json( 27 | self._search_regex( 28 | r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?]+type=["\']text/x-config["\'][^>]*>(.+?)[A-Za-z0-9\-=_+]+)' 7 | _TEST = { 8 | 'url': 'http://share.glide.me/UZF8zlmuQbe4mr+7dCiQ0w==', 9 | 'md5': '4466372687352851af2d131cfaa8a4c7', 10 | 'info_dict': { 11 | 'id': 'UZF8zlmuQbe4mr+7dCiQ0w==', 12 | 'ext': 'mp4', 13 | 'title': "Damon's Glide message", 14 | 'thumbnail': r're:^https?://.*?\.cloudfront\.net/.*\.jpg$', 15 | } 16 | } 17 | 18 | def _real_extract(self, url): 19 | video_id = self._match_id(url) 20 | 21 | webpage = self._download_webpage(url, video_id) 22 | 23 | title = self._generic_title('', webpage) 24 | video_url = self._proto_relative_url(self._search_regex( 25 | r']+src=(["\'])(?P.+?)\1', 26 | webpage, 'video URL', default=None, 27 | group='url')) or self._og_search_video_url(webpage) 28 | thumbnail = self._proto_relative_url(self._search_regex( 29 | r']+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P.+?)\1', 30 | webpage, 'thumbnail url', default=None, 31 | group='url')) or self._og_search_thumbnail(webpage) 32 | 33 | return { 34 | 'id': video_id, 35 | 'title': title, 36 | 'url': video_url, 37 | 'thumbnail': thumbnail, 38 | } 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/yourupload.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import urljoin 3 | 4 | 5 | class YourUploadIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P[A-Za-z0-9]+)' 7 | _TESTS = [{ 8 | 'url': 'http://yourupload.com/watch/14i14h', 9 | 'md5': '5e2c63385454c557f97c4c4131a393cd', 10 | 'info_dict': { 11 | 'id': '14i14h', 12 | 'ext': 'mp4', 13 | 'title': 'BigBuckBunny_320x180.mp4', 14 | 'thumbnail': r're:^https?://.*\.jpe?g', 15 | } 16 | }, { 17 | 'url': 'http://www.yourupload.com/embed/14i14h', 18 | 'only_matching': True, 19 | }, { 20 | 'url': 'http://embed.yourupload.com/14i14h', 21 | 'only_matching': True, 22 | }] 23 | 24 | def _real_extract(self, url): 25 | video_id = self._match_id(url) 26 | 27 | embed_url = 'http://www.yourupload.com/embed/%s' % video_id 28 | 29 | webpage = self._download_webpage(embed_url, video_id) 30 | 31 | title = self._og_search_title(webpage) 32 | video_url = urljoin(embed_url, self._og_search_video_url(webpage)) 33 | thumbnail = self._og_search_thumbnail(webpage, default=None) 34 | 35 | return { 36 | 'id': video_id, 37 | 'title': title, 38 | 'url': video_url, 39 | 'thumbnail': thumbnail, 40 | 'http_headers': { 41 | 'Referer': embed_url, 42 | }, 43 | } 44 | -------------------------------------------------------------------------------- /yt_dlp/extractor/skylinewebcams.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class SkylineWebcamsIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P[^/]+)\.html' 6 | _TEST = { 7 | 'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html', 8 | 'info_dict': { 9 | 'id': 'scalinata-piazza-di-spagna-barcaccia', 10 | 'ext': 'mp4', 11 | 'title': 're:^Live Webcam Scalinata di Piazza di Spagna - La Barcaccia [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 12 | 'description': 'Roma, veduta sulla Scalinata di Piazza di Spagna e sulla Barcaccia', 13 | 'is_live': True, 14 | }, 15 | 'params': { 16 | 'skip_download': True, 17 | } 18 | } 19 | 20 | def _real_extract(self, url): 21 | video_id = self._match_id(url) 22 | 23 | webpage = self._download_webpage(url, video_id) 24 | 25 | stream_url = self._search_regex( 26 | r'(?:url|source)\s*:\s*(["\'])(?P(?:https?:)?//.+?\.m3u8.*?)\1', webpage, 27 | 'stream url', group='url') 28 | 29 | title = self._og_search_title(webpage) 30 | description = self._og_search_description(webpage) 31 | 32 | return { 33 | 'id': video_id, 34 | 'url': stream_url, 35 | 'ext': 'mp4', 36 | 'title': title, 37 | 'description': description, 38 | 'is_live': True, 39 | } 40 | -------------------------------------------------------------------------------- /yt_dlp/extractor/fox9.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class FOX9IE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P\d+)' 6 | 7 | def _real_extract(self, url): 8 | video_id = self._match_id(url) 9 | return self.url_result( 10 | 'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id, 11 | 'Anvato', video_id) 12 | 13 | 14 | class FOX9NewsIE(InfoExtractor): 15 | _VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P[^/?&#]+)' 16 | _TEST = { 17 | 'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota', 18 | 'md5': 'd6e1b2572c3bab8a849c9103615dd243', 19 | 'info_dict': { 20 | 'id': '314473', 21 | 'ext': 'mp4', 22 | 'title': 'Bear climbs tree in downtown Duluth', 23 | 'description': 'md5:6a36bfb5073a411758a752455408ac90', 24 | 'duration': 51, 25 | 'timestamp': 1478123580, 26 | 'upload_date': '20161102', 27 | 'uploader': 'EPFOX', 28 | 'categories': ['News', 'Sports'], 29 | 'tags': ['news', 'video'], 30 | }, 31 | } 32 | 33 | def _real_extract(self, url): 34 | display_id = self._match_id(url) 35 | webpage = self._download_webpage(url, display_id) 36 | anvato_id = self._search_regex( 37 | r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id') 38 | return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9') 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/trunews.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class TruNewsIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P[^/?#&]+)' 6 | _TEST = { 7 | 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', 8 | 'info_dict': { 9 | 'id': '5c5a21e65d3c196e1c0020cc', 10 | 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', 11 | 'ext': 'mp4', 12 | 'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?", 13 | 'description': 'md5:c583b72147cc92cf21f56a31aff7a670', 14 | 'duration': 3685, 15 | 'timestamp': 1549411440, 16 | 'upload_date': '20190206', 17 | }, 18 | 'add_ie': ['Zype'], 19 | } 20 | _ZYPE_TEMPL = 'https://player.zype.com/embed/%s.js?api_key=X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt' 21 | 22 | def _real_extract(self, url): 23 | display_id = self._match_id(url) 24 | 25 | zype_id = self._download_json( 26 | 'https://api.zype.com/videos', display_id, query={ 27 | 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H', 28 | 'per_page': 1, 29 | 'active': 'true', 30 | 'friendly_title': display_id, 31 | })['response'][0]['_id'] 32 | return self.url_result(self._ZYPE_TEMPL % zype_id, 'Zype', zype_id) 33 | -------------------------------------------------------------------------------- /yt_dlp/dependencies/Cryptodome.py: -------------------------------------------------------------------------------- 1 | from ..compat.compat_utils import passthrough_module 2 | 3 | try: 4 | import Cryptodome as _parent 5 | except ImportError: 6 | try: 7 | import Crypto as _parent 8 | except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python 9 | _parent = passthrough_module(__name__, 'no_Cryptodome') 10 | __bool__ = lambda: False 11 | 12 | del passthrough_module 13 | 14 | __version__ = '' 15 | AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None 16 | try: 17 | if _parent.__name__ == 'Cryptodome': 18 | from Cryptodome import __version__ 19 | from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 20 | from Cryptodome.Hash import CMAC, SHA1 21 | from Cryptodome.PublicKey import RSA 22 | elif _parent.__name__ == 'Crypto': 23 | from Crypto import __version__ 24 | from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 25 | from Crypto.Hash import CMAC, SHA1 # noqa: F401 26 | from Crypto.PublicKey import RSA # noqa: F401 27 | except ImportError: 28 | __version__ = f'broken {__version__}'.strip() 29 | 30 | 31 | _yt_dlp__identifier = _parent.__name__ 32 | if AES and _yt_dlp__identifier == 'Crypto': 33 | try: 34 | # In pycrypto, mode defaults to ECB. See: 35 | # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode 36 | AES.new(b'abcdefghijklmnop') 37 | except TypeError: 38 | _yt_dlp__identifier = 'pycrypto' 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/theholetv.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import extract_attributes, remove_end 3 | 4 | 5 | class TheHoleTvIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?the-hole\.tv/episodes/(?P[\w-]+)' 7 | _TESTS = [{ 8 | 'url': 'https://the-hole.tv/episodes/gromkii-vopros-sergey-orlov', 9 | 'md5': 'fea6682f47786f3ae5a6cbd635ec4bf9', 10 | 'info_dict': { 11 | 'id': 'gromkii-vopros-sergey-orlov', 12 | 'ext': 'mp4', 13 | 'title': 'Сергей Орлов — Громкий вопрос', 14 | 'thumbnail': 'https://assets-cdn.the-hole.tv/images/t8gan4n6zn627e7wni11b2uemqts', 15 | 'description': 'md5:45741a9202331f995d9fb76996759379' 16 | } 17 | }] 18 | 19 | def _real_extract(self, url): 20 | video_id = self._match_id(url) 21 | webpage = self._download_webpage(url, video_id) 22 | 23 | player_attrs = extract_attributes(self._search_regex( 24 | r'(]*\bdata-controller="player"[^>]*>)', webpage, 'video player')) 25 | formats, subtitles = self._extract_m3u8_formats_and_subtitles( 26 | player_attrs['data-player-source-value'], video_id, 'mp4') 27 | 28 | return { 29 | 'id': video_id, 30 | 'title': remove_end(self._html_extract_title(webpage), ' — The Hole'), 31 | 'description': self._og_search_description(webpage), 32 | 'thumbnail': player_attrs.get('data-player-poster-value'), 33 | 'formats': formats, 34 | 'subtitles': subtitles 35 | } 36 | -------------------------------------------------------------------------------- /yt_dlp/extractor/filmweb.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class FilmwebIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?filmweb\.no/(?Ptrailere|filmnytt)/article(?P\d+)\.ece' 6 | _TEST = { 7 | 'url': 'http://www.filmweb.no/trailere/article1264921.ece', 8 | 'md5': 'e353f47df98e557d67edaceda9dece89', 9 | 'info_dict': { 10 | 'id': '13033574', 11 | 'ext': 'mp4', 12 | 'title': 'Det som en gang var', 13 | 'upload_date': '20160316', 14 | 'timestamp': 1458140101, 15 | 'uploader_id': '12639966', 16 | 'uploader': 'Live Roaldset', 17 | } 18 | } 19 | 20 | def _real_extract(self, url): 21 | article_type, article_id = self._match_valid_url(url).groups() 22 | if article_type == 'filmnytt': 23 | webpage = self._download_webpage(url, article_id) 24 | article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id') 25 | embed_code = self._download_json( 26 | 'https://www.filmweb.no/template_v2/ajax/json_trailerEmbed.jsp', 27 | article_id, query={ 28 | 'articleId': article_id, 29 | })['embedCode'] 30 | iframe_url = self._proto_relative_url(self._search_regex( 31 | r']+src="([^"]+)', embed_code, 'iframe url')) 32 | 33 | return { 34 | '_type': 'url_transparent', 35 | 'id': article_id, 36 | 'url': iframe_url, 37 | 'ie_key': 'TwentyThreeVideo', 38 | } 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/xbef.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..compat import compat_urllib_parse_unquote 3 | 4 | 5 | class XBefIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?xbef\.com/video/(?P[0-9]+)' 7 | _TEST = { 8 | 'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking', 9 | 'md5': 'a478b565baff61634a98f5e5338be995', 10 | 'info_dict': { 11 | 'id': '5119', 12 | 'ext': 'mp4', 13 | 'title': 'md5:7358a9faef8b7b57acda7c04816f170e', 14 | 'age_limit': 18, 15 | 'thumbnail': r're:^http://.*\.jpg', 16 | } 17 | } 18 | 19 | def _real_extract(self, url): 20 | video_id = self._match_id(url) 21 | webpage = self._download_webpage(url, video_id) 22 | 23 | title = self._html_search_regex( 24 | r']*>(.*?)', webpage, 'title') 25 | 26 | config_url_enc = self._download_webpage( 27 | 'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id, 28 | note='Retrieving config URL') 29 | config_url = compat_urllib_parse_unquote(config_url_enc) 30 | config = self._download_xml( 31 | config_url, video_id, note='Retrieving config') 32 | 33 | video_url = config.find('./file').text 34 | thumbnail = config.find('./image').text 35 | 36 | return { 37 | 'id': video_id, 38 | 'url': video_url, 39 | 'title': title, 40 | 'thumbnail': thumbnail, 41 | 'age_limit': 18, 42 | } 43 | -------------------------------------------------------------------------------- /devscripts/zsh-completion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Allow direct execution 4 | import os 5 | import sys 6 | 7 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 8 | 9 | 10 | import yt_dlp 11 | 12 | ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp" 13 | ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in" 14 | 15 | 16 | def build_completion(opt_parser): 17 | opts = [opt for group in opt_parser.option_groups 18 | for opt in group.option_list] 19 | opts_file = [opt for opt in opts if opt.metavar == "FILE"] 20 | opts_dir = [opt for opt in opts if opt.metavar == "DIR"] 21 | 22 | fileopts = [] 23 | for opt in opts_file: 24 | if opt._short_opts: 25 | fileopts.extend(opt._short_opts) 26 | if opt._long_opts: 27 | fileopts.extend(opt._long_opts) 28 | 29 | diropts = [] 30 | for opt in opts_dir: 31 | if opt._short_opts: 32 | diropts.extend(opt._short_opts) 33 | if opt._long_opts: 34 | diropts.extend(opt._long_opts) 35 | 36 | flags = [opt.get_opt_string() for opt in opts] 37 | 38 | with open(ZSH_COMPLETION_TEMPLATE) as f: 39 | template = f.read() 40 | 41 | template = template.replace("{{fileopts}}", "|".join(fileopts)) 42 | template = template.replace("{{diropts}}", "|".join(diropts)) 43 | template = template.replace("{{flags}}", " ".join(flags)) 44 | 45 | with open(ZSH_COMPLETION_FILE, "w") as f: 46 | f.write(template) 47 | 48 | 49 | parser = yt_dlp.parseOpts(ignore_config_files=True)[0] 50 | build_completion(parser) 51 | -------------------------------------------------------------------------------- /yt_dlp/extractor/uktvplay.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class UKTVPlayIE(InfoExtractor): 5 | _VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*)(?P\d+)' 6 | _TESTS = [{ 7 | 'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001', 8 | 'info_dict': { 9 | 'id': '2117008346001', 10 | 'ext': 'mp4', 11 | 'title': 'Pincers', 12 | 'description': 'Pincers', 13 | 'uploader_id': '1242911124001', 14 | 'upload_date': '20130124', 15 | 'timestamp': 1359049267, 16 | }, 17 | 'params': { 18 | # m3u8 download 19 | 'skip_download': True, 20 | }, 21 | 'expected_warnings': ['Failed to download MPD manifest'] 22 | }, { 23 | 'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001', 24 | 'only_matching': True, 25 | }, { 26 | 'url': 'https://uktvplay.co.uk/shows/hornby-a-model-world/series-1/episode-1/6276739790001?autoplaying=true', 27 | 'only_matching': True, 28 | }] 29 | # BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s' 30 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s' 31 | 32 | def _real_extract(self, url): 33 | video_id = self._match_id(url) 34 | return self.url_result( 35 | self.BRIGHTCOVE_URL_TEMPLATE % video_id, 36 | 'BrightcoveNew', video_id) 37 | -------------------------------------------------------------------------------- /yt_dlp/extractor/ehow.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..compat import compat_urllib_parse_unquote 3 | 4 | 5 | class EHowIE(InfoExtractor): 6 | IE_NAME = 'eHow' 7 | _VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P[0-9]+)' 8 | _TEST = { 9 | 'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html', 10 | 'md5': '9809b4e3f115ae2088440bcb4efbf371', 11 | 'info_dict': { 12 | 'id': '12245069', 13 | 'ext': 'flv', 14 | 'title': 'Hardwood Flooring Basics', 15 | 'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...', 16 | 'uploader': 'Erick Nathan', 17 | } 18 | } 19 | 20 | def _real_extract(self, url): 21 | video_id = self._match_id(url) 22 | webpage = self._download_webpage(url, video_id) 23 | video_url = self._search_regex( 24 | r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL') 25 | final_url = compat_urllib_parse_unquote(video_url) 26 | uploader = self._html_search_meta('uploader', webpage) 27 | title = self._og_search_title(webpage).replace(' | eHow', '') 28 | 29 | return { 30 | 'id': video_id, 31 | 'url': final_url, 32 | 'title': title, 33 | 'thumbnail': self._og_search_thumbnail(webpage), 34 | 'description': self._og_search_description(webpage), 35 | 'uploader': uploader, 36 | } 37 | -------------------------------------------------------------------------------- /yt_dlp/extractor/livejournal.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..compat import compat_str 3 | from ..utils import int_or_none 4 | 5 | 6 | class LiveJournalIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P\d+)' 8 | _TEST = { 9 | 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272', 10 | 'md5': 'adaf018388572ced8a6f301ace49d4b2', 11 | 'info_dict': { 12 | 'id': '1263729', 13 | 'ext': 'mp4', 14 | 'title': 'Истребители против БПЛА', 15 | 'upload_date': '20190624', 16 | 'timestamp': 1561406715, 17 | } 18 | } 19 | 20 | def _real_extract(self, url): 21 | video_id = self._match_id(url) 22 | webpage = self._download_webpage(url, video_id) 23 | record = self._parse_json(self._search_regex( 24 | r'Site\.page\s*=\s*({.+?});', webpage, 25 | 'page data'), video_id)['video']['record'] 26 | storage_id = compat_str(record['storageid']) 27 | title = record.get('name') 28 | if title: 29 | # remove filename extension(.mp4, .mov, etc...) 30 | title = title.rsplit('.', 1)[0] 31 | return { 32 | '_type': 'url_transparent', 33 | 'id': video_id, 34 | 'title': title, 35 | 'thumbnail': record.get('thumbnail'), 36 | 'timestamp': int_or_none(record.get('timecreate')), 37 | 'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id, 38 | 'ie_key': 'EaglePlatform', 39 | } 40 | -------------------------------------------------------------------------------- /yt_dlp/extractor/oktoberfesttv.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class OktoberfestTVIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P[^/?#]+)' 6 | 7 | _TEST = { 8 | 'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt', 9 | 'info_dict': { 10 | 'id': 'hb-zelt', 11 | 'ext': 'mp4', 12 | 'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 13 | 'thumbnail': r're:^https?://.*\.jpg$', 14 | 'is_live': True, 15 | }, 16 | 'params': { 17 | 'skip_download': True, 18 | } 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | webpage = self._download_webpage(url, video_id) 24 | 25 | title = self._html_search_regex( 26 | r'

    .*?(.*?)

    ', webpage, 'title') 27 | 28 | clip = self._search_regex( 29 | r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip') 30 | ncurl = self._search_regex( 31 | r"netConnectionUrl:\s*'([^']+)'", webpage, 'rtmp base') 32 | video_url = ncurl + clip 33 | thumbnail = self._search_regex( 34 | r"canvas:\s*\{\s*backgroundImage:\s*'url\(([^)]+)\)'", webpage, 35 | 'thumbnail', fatal=False) 36 | 37 | return { 38 | 'id': video_id, 39 | 'title': title, 40 | 'url': video_url, 41 | 'ext': 'mp4', 42 | 'is_live': True, 43 | 'thumbnail': thumbnail, 44 | } 45 | -------------------------------------------------------------------------------- /yt_dlp/extractor/odatv.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import ( 3 | ExtractorError, 4 | NO_DEFAULT, 5 | remove_start 6 | ) 7 | 8 | 9 | class OdaTVIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?.*\bid=(?P[^&]+)' 11 | _TESTS = [{ 12 | 'url': 'http://odatv.com/vid_video.php?id=8E388', 13 | 'md5': 'dc61d052f205c9bf2da3545691485154', 14 | 'info_dict': { 15 | 'id': '8E388', 16 | 'ext': 'mp4', 17 | 'title': 'Artık Davutoğlu ile devam edemeyiz' 18 | } 19 | }, { 20 | # mobile URL 21 | 'url': 'http://odatv.com/mob_video.php?id=8E388', 22 | 'only_matching': True, 23 | }, { 24 | # no video 25 | 'url': 'http://odatv.com/mob_video.php?id=8E900', 26 | 'only_matching': True, 27 | }] 28 | 29 | def _real_extract(self, url): 30 | video_id = self._match_id(url) 31 | webpage = self._download_webpage(url, video_id) 32 | 33 | no_video = 'NO VIDEO!' in webpage 34 | 35 | video_url = self._search_regex( 36 | r'mp4\s*:\s*(["\'])(?Phttp.+?)\1', webpage, 'video url', 37 | default=None if no_video else NO_DEFAULT, group='url') 38 | 39 | if no_video: 40 | raise ExtractorError('Video %s does not exist' % video_id, expected=True) 41 | 42 | return { 43 | 'id': video_id, 44 | 'url': video_url, 45 | 'title': remove_start(self._og_search_title(webpage), 'Video: '), 46 | 'thumbnail': self._og_search_thumbnail(webpage), 47 | } 48 | -------------------------------------------------------------------------------- /yt_dlp/extractor/masters.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from .common import InfoExtractor 3 | from ..utils import ( 4 | traverse_obj, 5 | unified_strdate, 6 | ) 7 | 8 | 9 | class MastersIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?masters\.com/en_US/watch/(?P\d{4}-\d{2}-\d{2})/(?P\d+)' 11 | _TESTS = [{ 12 | 'url': 'https://www.masters.com/en_US/watch/2022-04-07/16493755593805191/sungjae_im_thursday_interview_2022.html', 13 | 'info_dict': { 14 | 'id': '16493755593805191', 15 | 'ext': 'mp4', 16 | 'title': 'Sungjae Im: Thursday Interview 2022', 17 | 'upload_date': '20220407', 18 | 'thumbnail': r're:^https?://.*\.jpg$', 19 | } 20 | }] 21 | 22 | def _real_extract(self, url): 23 | video_id, upload_date = self._match_valid_url(url).group('id', 'date') 24 | content_resp = self._download_json( 25 | f'https://www.masters.com/relatedcontent/rest/v2/masters_v1/en/content/masters_v1_{video_id}_en', 26 | video_id) 27 | formats, subtitles = self._extract_m3u8_formats_and_subtitles(traverse_obj(content_resp, ('media', 'm3u8')), video_id, 'mp4') 28 | 29 | thumbnails = [{'id': name, 'url': url} for name, url in traverse_obj(content_resp, ('images', 0), default={}).items()] 30 | 31 | return { 32 | 'id': video_id, 33 | 'title': content_resp.get('title'), 34 | 'formats': formats, 35 | 'subtitles': subtitles, 36 | 'upload_date': unified_strdate(upload_date), 37 | 'thumbnails': thumbnails, 38 | } 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/ruhd.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class RUHDIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P\d+)' 6 | _TEST = { 7 | 'url': 'http://www.ruhd.ru/play.php?vid=207', 8 | 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', 9 | 'info_dict': { 10 | 'id': '207', 11 | 'ext': 'divx', 12 | 'title': 'КОТ бааааам', 13 | 'description': 'классный кот)', 14 | 'thumbnail': r're:^http://.*\.jpg$', 15 | } 16 | } 17 | 18 | def _real_extract(self, url): 19 | video_id = self._match_id(url) 20 | webpage = self._download_webpage(url, video_id) 21 | 22 | video_url = self._html_search_regex( 23 | r'([^<]+)   RUHD\.ru - Видео Высокого качества №1 в России!', 26 | webpage, 'title') 27 | description = self._html_search_regex( 28 | r'(?s)
    (.+?)', 29 | webpage, 'description', fatal=False) 30 | thumbnail = self._html_search_regex( 31 | r'video|production)_id=(?P[0-9]+)' 6 | _TEST = { 7 | 'url': 'https://mychannels.com/missholland/miss-holland?production_id=3416', 8 | 'md5': 'b8993daad4262dd68d89d651c0c52c45', 9 | 'info_dict': { 10 | 'id': 'wUUDZZep6vQD', 11 | 'ext': 'mp4', 12 | 'title': 'Miss Holland joins VOTE LEAVE', 13 | 'description': 'Miss Holland | #13 Not a potato', 14 | 'uploader': 'Miss Holland', 15 | } 16 | } 17 | 18 | def _real_extract(self, url): 19 | id_type, url_id = self._match_valid_url(url).groups() 20 | webpage = self._download_webpage(url, url_id) 21 | video_data = self._html_search_regex(r']+data-%s-id="%s"[^>]+)>' % (id_type, url_id), webpage, 'video data') 22 | 23 | def extract_data_val(attr, fatal=False): 24 | return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal) 25 | minoto_id = extract_data_val('minoto-id') or self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id') 26 | 27 | return { 28 | '_type': 'url_transparent', 29 | 'url': 'minoto:%s' % minoto_id, 30 | 'id': url_id, 31 | 'title': extract_data_val('title', True), 32 | 'description': extract_data_val('description'), 33 | 'thumbnail': extract_data_val('image'), 34 | 'uploader': extract_data_val('channel'), 35 | } 36 | -------------------------------------------------------------------------------- /yt_dlp/extractor/thisamericanlife.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class ThisAmericanLifeIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/(?:radio-archives/episode/|play_full\.php\?play=)(?P\d+)' 6 | _TESTS = [{ 7 | 'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one', 8 | 'md5': '8f7d2da8926298fdfca2ee37764c11ce', 9 | 'info_dict': { 10 | 'id': '487', 11 | 'ext': 'm4a', 12 | 'title': '487: Harper High School, Part One', 13 | 'description': 'md5:ee40bdf3fb96174a9027f76dbecea655', 14 | 'thumbnail': r're:^https?://.*\.jpg$', 15 | }, 16 | }, { 17 | 'url': 'http://www.thisamericanlife.org/play_full.php?play=487', 18 | 'only_matching': True, 19 | }] 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | 24 | webpage = self._download_webpage( 25 | 'http://www.thisamericanlife.org/radio-archives/episode/%s' % video_id, video_id) 26 | 27 | return { 28 | 'id': video_id, 29 | 'url': 'http://stream.thisamericanlife.org/{0}/stream/{0}_64k.m3u8'.format(video_id), 30 | 'protocol': 'm3u8_native', 31 | 'ext': 'm4a', 32 | 'acodec': 'aac', 33 | 'vcodec': 'none', 34 | 'abr': 64, 35 | 'title': self._html_search_meta(r'twitter:title', webpage, 'title', fatal=True), 36 | 'description': self._html_search_meta(r'description', webpage, 'description'), 37 | 'thumbnail': self._og_search_thumbnail(webpage), 38 | } 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/tvland.py: -------------------------------------------------------------------------------- 1 | from .mtv import MTVServicesInfoExtractor 2 | 3 | # TODO: Remove - Reason not used anymore - Service moved to youtube 4 | 5 | 6 | class TVLandIE(MTVServicesInfoExtractor): 7 | IE_NAME = 'tvland.com' 8 | _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P[^/?#.]+)' 9 | _FEED_URL = 'http://www.tvland.com/feeds/mrss/' 10 | _TESTS = [{ 11 | # Geo-restricted. Without a proxy metadata are still there. With a 12 | # proxy it redirects to http://m.tvland.com/app/ 13 | 'url': 'https://www.tvland.com/episodes/s04pzf/everybody-loves-raymond-the-dog-season-1-ep-19', 14 | 'info_dict': { 15 | 'description': 'md5:84928e7a8ad6649371fbf5da5e1ad75a', 16 | 'title': 'The Dog', 17 | }, 18 | 'playlist_mincount': 5, 19 | 'skip': '404 Not found', 20 | }, { 21 | 'url': 'https://www.tvland.com/video-clips/4n87f2/younger-a-first-look-at-younger-season-6', 22 | 'md5': 'e2c6389401cf485df26c79c247b08713', 23 | 'info_dict': { 24 | 'id': '891f7d3c-5b5b-4753-b879-b7ba1a601757', 25 | 'ext': 'mp4', 26 | 'title': 'Younger|April 30, 2019|6|NO-EPISODE#|A First Look at Younger Season 6', 27 | 'description': 'md5:595ea74578d3a888ae878dfd1c7d4ab2', 28 | 'upload_date': '20190430', 29 | 'timestamp': 1556658000, 30 | }, 31 | 'params': { 32 | 'skip_download': True, 33 | }, 34 | }, { 35 | 'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301', 36 | 'only_matching': True, 37 | }] 38 | -------------------------------------------------------------------------------- /yt_dlp/downloader/rtsp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | from .common import FileDownloader 5 | from ..utils import check_executable, encodeFilename 6 | 7 | 8 | class RtspFD(FileDownloader): 9 | def real_download(self, filename, info_dict): 10 | url = info_dict['url'] 11 | self.report_destination(filename) 12 | tmpfilename = self.temp_name(filename) 13 | 14 | if check_executable('mplayer', ['-h']): 15 | args = [ 16 | 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', 17 | '-dumpstream', '-dumpfile', tmpfilename, url] 18 | elif check_executable('mpv', ['-h']): 19 | args = [ 20 | 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] 21 | else: 22 | self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one') 23 | return False 24 | 25 | self._debug_cmd(args) 26 | 27 | retval = subprocess.call(args) 28 | if retval == 0: 29 | fsize = os.path.getsize(encodeFilename(tmpfilename)) 30 | self.to_screen(f'\r[{args[0]}] {fsize} bytes') 31 | self.try_rename(tmpfilename, filename) 32 | self._hook_progress({ 33 | 'downloaded_bytes': fsize, 34 | 'total_bytes': fsize, 35 | 'filename': filename, 36 | 'status': 'finished', 37 | }, info_dict) 38 | return True 39 | else: 40 | self.to_stderr('\n') 41 | self.report_error('%s exited with code %d' % (args[0], retval)) 42 | return False 43 | -------------------------------------------------------------------------------- /yt_dlp/extractor/goshgay.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..compat import ( 3 | compat_parse_qs, 4 | ) 5 | from ..utils import ( 6 | parse_duration, 7 | ) 8 | 9 | 10 | class GoshgayIE(InfoExtractor): 11 | _VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P\d+?)($|/)' 12 | _TEST = { 13 | 'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video', 14 | 'md5': '4b6db9a0a333142eb9f15913142b0ed1', 15 | 'info_dict': { 16 | 'id': '299069', 17 | 'ext': 'flv', 18 | 'title': 'DIESEL SFW XXX Video', 19 | 'thumbnail': r're:^http://.*\.jpg$', 20 | 'duration': 80, 21 | 'age_limit': 18, 22 | } 23 | } 24 | 25 | def _real_extract(self, url): 26 | video_id = self._match_id(url) 27 | webpage = self._download_webpage(url, video_id) 28 | 29 | title = self._html_search_regex( 30 | r'

    (.*?)<', webpage, 'title') 31 | duration = parse_duration(self._html_search_regex( 32 | r'\s*-?\s*(.*?)', 33 | webpage, 'duration', fatal=False)) 34 | 35 | flashvars = compat_parse_qs(self._html_search_regex( 36 | r'[0-9a-z]{5})' 7 | _TEST = { 8 | 'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', 9 | 'md5': 'b9cc91b5af8995e9f0c1cee04c575828', 10 | 'info_dict': { 11 | 'id': '1v6ga', 12 | 'ext': 'mp3', 13 | 'title': 'Tame', 14 | 'uploader': 'BODYWORK', 15 | 'timestamp': 1371810457, 16 | 'upload_date': '20130621', 17 | } 18 | } 19 | 20 | def _real_extract(self, url): 21 | track_id = self._match_id(url) 22 | 23 | response = self._download_webpage(url, track_id) 24 | 25 | track = self._parse_json(self._html_search_regex( 26 | r'(?s)(.+?)', 27 | response, 'tracks'), track_id)['tracks'][0] 28 | 29 | track_id = track['id'] 30 | title = track['song'] 31 | 32 | final_url = self._download_json( 33 | 'http://hypem.com/serve/source/%s/%s' % (track_id, track['key']), 34 | track_id, 'Downloading metadata', headers={ 35 | 'Content-Type': 'application/json' 36 | })['url'] 37 | 38 | return { 39 | 'id': track_id, 40 | 'url': final_url, 41 | 'ext': 'mp3', 42 | 'title': title, 43 | 'uploader': track.get('artist'), 44 | 'duration': int_or_none(track.get('time')), 45 | 'timestamp': int_or_none(track.get('ts')), 46 | 'track': title, 47 | } 48 | -------------------------------------------------------------------------------- /.github/workflows/download.yml: -------------------------------------------------------------------------------- 1 | name: Download Tests 2 | on: [push, pull_request] 3 | permissions: 4 | contents: read 5 | 6 | jobs: 7 | quick: 8 | name: Quick Download Tests 9 | if: "contains(github.event.head_commit.message, 'ci run dl')" 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Set up Python 14 | uses: actions/setup-python@v4 15 | with: 16 | python-version: 3.9 17 | - name: Install test requirements 18 | run: pip install pytest 19 | - name: Run tests 20 | continue-on-error: true 21 | run: ./devscripts/run_tests.sh download 22 | 23 | full: 24 | name: Full Download Tests 25 | if: "contains(github.event.head_commit.message, 'ci run dl all')" 26 | runs-on: ${{ matrix.os }} 27 | strategy: 28 | fail-fast: true 29 | matrix: 30 | os: [ubuntu-latest] 31 | python-version: ['3.7', '3.10', 3.11-dev, pypy-3.7, pypy-3.8] 32 | run-tests-ext: [sh] 33 | include: 34 | # atleast one of each CPython/PyPy tests must be in windows 35 | - os: windows-latest 36 | python-version: '3.8' 37 | run-tests-ext: bat 38 | - os: windows-latest 39 | python-version: pypy-3.9 40 | run-tests-ext: bat 41 | steps: 42 | - uses: actions/checkout@v3 43 | - name: Set up Python ${{ matrix.python-version }} 44 | uses: actions/setup-python@v4 45 | with: 46 | python-version: ${{ matrix.python-version }} 47 | - name: Install pytest 48 | run: pip install pytest 49 | - name: Run tests 50 | continue-on-error: true 51 | run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} download 52 | -------------------------------------------------------------------------------- /yt_dlp/extractor/caltrans.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class CaltransIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:[^/]+\.)?ca\.gov/vm/loc/[^/]+/(?P[a-z0-9_]+)\.htm' 6 | _TEST = { 7 | 'url': 'https://cwwp2.dot.ca.gov/vm/loc/d3/hwy50at24th.htm', 8 | 'info_dict': { 9 | 'id': 'hwy50at24th', 10 | 'ext': 'ts', 11 | 'title': 'US-50 : Sacramento : Hwy 50 at 24th', 12 | 'live_status': 'is_live', 13 | 'thumbnail': 'https://cwwp2.dot.ca.gov/data/d3/cctv/image/hwy50at24th/hwy50at24th.jpg', 14 | } 15 | } 16 | 17 | def _real_extract(self, url): 18 | video_id = self._match_id(url) 19 | webpage = self._download_webpage(url, video_id) 20 | 21 | global_vars = self._search_regex( 22 | r'', 23 | webpage, 'Global Vars') 24 | route_place = self._search_regex(r'routePlace\s*=\s*"([^"]+)"', global_vars, 'Route Place', fatal=False) 25 | location_name = self._search_regex(r'locationName\s*=\s*"([^"]+)"', global_vars, 'Location Name', fatal=False) 26 | poster_url = self._search_regex(r'posterURL\s*=\s*"([^"]+)"', global_vars, 'Poster Url', fatal=False) 27 | video_stream = self._search_regex(r'videoStreamURL\s*=\s*"([^"]+)"', global_vars, 'Video Stream URL', fatal=False) 28 | 29 | formats = self._extract_m3u8_formats(video_stream, video_id, 'ts', live=True) 30 | 31 | return { 32 | 'id': video_id, 33 | 'title': f'{route_place} : {location_name}', 34 | 'is_live': True, 35 | 'formats': formats, 36 | 'thumbnail': poster_url, 37 | } 38 | -------------------------------------------------------------------------------- /test/test_age_restriction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Allow direct execution 4 | import os 5 | import sys 6 | import unittest 7 | 8 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | 10 | 11 | from test.helper import is_download_test, try_rm 12 | from yt_dlp import YoutubeDL 13 | from yt_dlp.utils import DownloadError 14 | 15 | 16 | def _download_restricted(url, filename, age): 17 | """ Returns true if the file has been downloaded """ 18 | 19 | params = { 20 | 'age_limit': age, 21 | 'skip_download': True, 22 | 'writeinfojson': True, 23 | 'outtmpl': '%(id)s.%(ext)s', 24 | } 25 | ydl = YoutubeDL(params) 26 | ydl.add_default_info_extractors() 27 | json_filename = os.path.splitext(filename)[0] + '.info.json' 28 | try_rm(json_filename) 29 | try: 30 | ydl.download([url]) 31 | except DownloadError: 32 | pass 33 | else: 34 | return os.path.exists(json_filename) 35 | finally: 36 | try_rm(json_filename) 37 | 38 | 39 | @is_download_test 40 | class TestAgeRestriction(unittest.TestCase): 41 | def _assert_restricted(self, url, filename, age, old_age=None): 42 | self.assertTrue(_download_restricted(url, filename, old_age)) 43 | self.assertFalse(_download_restricted(url, filename, age)) 44 | 45 | def test_youtube(self): 46 | self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10) 47 | 48 | def test_youporn(self): 49 | self._assert_restricted( 50 | 'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/', 51 | '16715086.mp4', 2, old_age=25) 52 | 53 | 54 | if __name__ == '__main__': 55 | unittest.main() 56 | -------------------------------------------------------------------------------- /yt_dlp/extractor/livestreamfails.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import format_field, traverse_obj, unified_timestamp 3 | 4 | 5 | class LivestreamfailsIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?livestreamfails\.com/(?:clip|post)/(?P[0-9]+)' 7 | _TESTS = [{ 8 | 'url': 'https://livestreamfails.com/clip/139200', 9 | 'md5': '8a03aea1a46e94a05af6410337463102', 10 | 'info_dict': { 11 | 'id': '139200', 12 | 'ext': 'mp4', 13 | 'display_id': 'ConcernedLitigiousSalmonPeteZaroll-O8yo9W2L8OZEKhV2', 14 | 'title': 'Streamer jumps off a trampoline at full speed', 15 | 'creator': 'paradeev1ch', 16 | 'thumbnail': r're:^https?://.+', 17 | 'timestamp': 1656271785, 18 | 'upload_date': '20220626', 19 | } 20 | }, { 21 | 'url': 'https://livestreamfails.com/post/139200', 22 | 'only_matching': True, 23 | }] 24 | 25 | def _real_extract(self, url): 26 | video_id = self._match_id(url) 27 | api_response = self._download_json(f'https://api.livestreamfails.com/clip/{video_id}', video_id) 28 | 29 | return { 30 | 'id': video_id, 31 | 'display_id': api_response.get('sourceId'), 32 | 'timestamp': unified_timestamp(api_response.get('createdAt')), 33 | 'url': f'https://livestreamfails-video-prod.b-cdn.net/video/{api_response["videoId"]}', 34 | 'title': api_response.get('label'), 35 | 'creator': traverse_obj(api_response, ('streamer', 'label')), 36 | 'thumbnail': format_field(api_response, 'imageId', 'https://livestreamfails-image-prod.b-cdn.net/image/%s') 37 | } 38 | -------------------------------------------------------------------------------- /test/versions.json: -------------------------------------------------------------------------------- 1 | { 2 | "latest": "2013.01.06", 3 | "signature": "72158cdba391628569ffdbea259afbcf279bbe3d8aeb7492690735dc1cfa6afa754f55c61196f3871d429599ab22f2667f1fec98865527b32632e7f4b3675a7ef0f0fbe084d359256ae4bba68f0d33854e531a70754712f244be71d4b92e664302aa99653ee4df19800d955b6c4149cd2b3f24288d6e4b40b16126e01f4c8ce6", 4 | "versions": { 5 | "2013.01.02": { 6 | "bin": [ 7 | "http://youtube-dl.org/downloads/2013.01.02/youtube-dl", 8 | "f5b502f8aaa77675c4884938b1e4871ebca2611813a0c0e74f60c0fbd6dcca6b" 9 | ], 10 | "exe": [ 11 | "http://youtube-dl.org/downloads/2013.01.02/youtube-dl.exe", 12 | "75fa89d2ce297d102ff27675aa9d92545bbc91013f52ec52868c069f4f9f0422" 13 | ], 14 | "tar": [ 15 | "http://youtube-dl.org/downloads/2013.01.02/youtube-dl-2013.01.02.tar.gz", 16 | "6a66d022ac8e1c13da284036288a133ec8dba003b7bd3a5179d0c0daca8c8196" 17 | ] 18 | }, 19 | "2013.01.06": { 20 | "bin": [ 21 | "http://youtube-dl.org/downloads/2013.01.06/youtube-dl", 22 | "64b6ed8865735c6302e836d4d832577321b4519aa02640dc508580c1ee824049" 23 | ], 24 | "exe": [ 25 | "http://youtube-dl.org/downloads/2013.01.06/youtube-dl.exe", 26 | "58609baf91e4389d36e3ba586e21dab882daaaee537e4448b1265392ae86ff84" 27 | ], 28 | "tar": [ 29 | "http://youtube-dl.org/downloads/2013.01.06/youtube-dl-2013.01.06.tar.gz", 30 | "fe77ab20a95d980ed17a659aa67e371fdd4d656d19c4c7950e7b720b0c2f1a86" 31 | ] 32 | } 33 | } 34 | } -------------------------------------------------------------------------------- /yt_dlp/extractor/tvnoe.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import ( 3 | clean_html, 4 | get_element_by_class, 5 | js_to_json, 6 | ) 7 | 8 | 9 | class TVNoeIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P[0-9]+)' 11 | _TEST = { 12 | 'url': 'http://www.tvnoe.cz/video/10362', 13 | 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca', 14 | 'info_dict': { 15 | 'id': '10362', 16 | 'ext': 'mp4', 17 | 'series': 'Noční univerzita', 18 | 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací', 19 | 'description': 'md5:f337bae384e1a531a52c55ebc50fff41', 20 | } 21 | } 22 | 23 | def _real_extract(self, url): 24 | video_id = self._match_id(url) 25 | webpage = self._download_webpage(url, video_id) 26 | 27 | iframe_url = self._search_regex( 28 | r']+src="([^"]+)"', webpage, 'iframe URL') 29 | 30 | ifs_page = self._download_webpage(iframe_url, video_id) 31 | jwplayer_data = self._find_jwplayer_data( 32 | ifs_page, video_id, transform_source=js_to_json) 33 | info_dict = self._parse_jwplayer_data( 34 | jwplayer_data, video_id, require_title=False, base_url=iframe_url) 35 | 36 | info_dict.update({ 37 | 'id': video_id, 38 | 'title': clean_html(get_element_by_class( 39 | 'field-name-field-podnazev', webpage)), 40 | 'description': clean_html(get_element_by_class( 41 | 'field-name-body', webpage)), 42 | 'series': clean_html(get_element_by_class('title', webpage)) 43 | }) 44 | 45 | return info_dict 46 | -------------------------------------------------------------------------------- /yt_dlp/extractor/historicfilms.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import parse_duration 3 | 4 | 5 | class HistoricFilmsIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P\d+)' 7 | _TEST = { 8 | 'url': 'http://www.historicfilms.com/tapes/4728', 9 | 'md5': 'd4a437aec45d8d796a38a215db064e9a', 10 | 'info_dict': { 11 | 'id': '4728', 12 | 'ext': 'mov', 13 | 'title': 'Historic Films: GP-7', 14 | 'description': 'md5:1a86a0f3ac54024e419aba97210d959a', 15 | 'thumbnail': r're:^https?://.*\.jpg$', 16 | 'duration': 2096, 17 | }, 18 | } 19 | 20 | def _real_extract(self, url): 21 | video_id = self._match_id(url) 22 | 23 | webpage = self._download_webpage(url, video_id) 24 | 25 | tape_id = self._search_regex( 26 | [r'class="tapeId"[^>]*>([^<]+)<', r'tapeId\s*:\s*"([^"]+)"'], 27 | webpage, 'tape id') 28 | 29 | title = self._og_search_title(webpage) 30 | description = self._og_search_description(webpage) 31 | thumbnail = self._html_search_meta( 32 | 'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage) 33 | duration = parse_duration(self._html_search_meta( 34 | 'duration', webpage, 'duration')) 35 | 36 | video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id) 37 | 38 | return { 39 | 'id': video_id, 40 | 'url': video_url, 41 | 'title': title, 42 | 'description': description, 43 | 'thumbnail': thumbnail, 44 | 'duration': duration, 45 | } 46 | -------------------------------------------------------------------------------- /yt_dlp/extractor/aliexpress.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..compat import compat_str 3 | from ..utils import ( 4 | float_or_none, 5 | try_get, 6 | ) 7 | 8 | 9 | class AliExpressLiveIE(InfoExtractor): 10 | _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P\d+)' 11 | _TEST = { 12 | 'url': 'https://live.aliexpress.com/live/2800002704436634', 13 | 'md5': 'e729e25d47c5e557f2630eaf99b740a5', 14 | 'info_dict': { 15 | 'id': '2800002704436634', 16 | 'ext': 'mp4', 17 | 'title': 'CASIMA7.22', 18 | 'thumbnail': r're:https?://.*\.jpg', 19 | 'uploader': 'CASIMA Official Store', 20 | 'timestamp': 1500717600, 21 | 'upload_date': '20170722', 22 | }, 23 | } 24 | 25 | def _real_extract(self, url): 26 | video_id = self._match_id(url) 27 | 28 | webpage = self._download_webpage(url, video_id) 29 | 30 | data = self._parse_json( 31 | self._search_regex( 32 | r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var', 33 | webpage, 'runParams'), 34 | video_id) 35 | 36 | title = data['title'] 37 | 38 | formats = self._extract_m3u8_formats( 39 | data['replyStreamUrl'], video_id, 'mp4', 40 | entry_protocol='m3u8_native', m3u8_id='hls') 41 | 42 | return { 43 | 'id': video_id, 44 | 'title': title, 45 | 'thumbnail': data.get('coverUrl'), 46 | 'uploader': try_get( 47 | data, lambda x: x['followBar']['name'], compat_str), 48 | 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), 49 | 'formats': formats, 50 | } 51 | -------------------------------------------------------------------------------- /yt_dlp/extractor/sztvhu.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class SztvHuIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P[0-9]+)' 6 | _TEST = { 7 | 'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909', 8 | 'md5': 'a6df607b11fb07d0e9f2ad94613375cb', 9 | 'info_dict': { 10 | 'id': '20130909', 11 | 'ext': 'mp4', 12 | 'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren', 13 | 'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...', 14 | }, 15 | } 16 | 17 | def _real_extract(self, url): 18 | video_id = self._match_id(url) 19 | webpage = self._download_webpage(url, video_id) 20 | video_file = self._search_regex( 21 | r'file: "...:(.*?)",', webpage, 'video file') 22 | title = self._html_search_regex( 23 | r'', 27 | webpage, 'video description', fatal=False) 28 | thumbnail = self._og_search_thumbnail(webpage) 29 | 30 | video_url = 'http://media.sztv.hu/vod/' + video_file 31 | 32 | return { 33 | 'id': video_id, 34 | 'url': video_url, 35 | 'title': title, 36 | 'description': description, 37 | 'thumbnail': thumbnail, 38 | } 39 | -------------------------------------------------------------------------------- /yt_dlp/extractor/behindkink.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import url_basename 3 | 4 | 5 | class BehindKinkIE(InfoExtractor): 6 | _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P[0-9]{4})/(?P[0-9]{2})/(?P[0-9]{2})/(?P[^/#?_]+)' 7 | _TEST = { 8 | 'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', 9 | 'md5': '507b57d8fdcd75a41a9a7bdb7989c762', 10 | 'info_dict': { 11 | 'id': '37127', 12 | 'ext': 'mp4', 13 | 'title': 'What are you passionate about – Marley Blaze', 14 | 'description': 'md5:aee8e9611b4ff70186f752975d9b94b4', 15 | 'upload_date': '20141205', 16 | 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg', 17 | 'age_limit': 18, 18 | } 19 | } 20 | 21 | def _real_extract(self, url): 22 | mobj = self._match_valid_url(url) 23 | display_id = mobj.group('id') 24 | 25 | webpage = self._download_webpage(url, display_id) 26 | 27 | video_url = self._search_regex( 28 | r'[^/?#]+)' 7 | _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1'] 8 | _TESTS = [{ 9 | # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar 10 | 'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw', 11 | 'md5': '1db2b7249ce383d6be96499006e951fc', 12 | 'info_dict': { 13 | 'id': 'RufMcytHDolTH1MuKHY9Fw', 14 | 'ext': 'mp4', 15 | 'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"', 16 | } 17 | }, { 18 | 'url': 'http://embed.kwikmotion.com/embed/RufMcytHDolTH1MuKHY9Fw', 19 | 'only_matching': True, 20 | }] 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | webpage = self._download_webpage(url, video_id) 25 | 26 | title = unescapeHTML(self._og_search_title(webpage)) 27 | hidden_inputs = self._hidden_inputs(webpage) 28 | 29 | formats = self._extract_wowza_formats( 30 | hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil']) 31 | 32 | return { 33 | 'id': video_id, 34 | 'title': title, 35 | 'thumbnail': hidden_inputs.get('HiddenThumbnail') or self._og_search_thumbnail(webpage), 36 | 'formats': formats, 37 | } 38 | -------------------------------------------------------------------------------- /test/test_cache.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Allow direct execution 4 | import os 5 | import sys 6 | import unittest 7 | 8 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | 10 | 11 | import shutil 12 | 13 | from test.helper import FakeYDL 14 | from yt_dlp.cache import Cache 15 | 16 | 17 | def _is_empty(d): 18 | return not bool(os.listdir(d)) 19 | 20 | 21 | def _mkdir(d): 22 | if not os.path.exists(d): 23 | os.mkdir(d) 24 | 25 | 26 | class TestCache(unittest.TestCase): 27 | def setUp(self): 28 | TEST_DIR = os.path.dirname(os.path.abspath(__file__)) 29 | TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') 30 | _mkdir(TESTDATA_DIR) 31 | self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test') 32 | self.tearDown() 33 | 34 | def tearDown(self): 35 | if os.path.exists(self.test_dir): 36 | shutil.rmtree(self.test_dir) 37 | 38 | def test_cache(self): 39 | ydl = FakeYDL({ 40 | 'cachedir': self.test_dir, 41 | }) 42 | c = Cache(ydl) 43 | obj = {'x': 1, 'y': ['ä', '\\a', True]} 44 | self.assertEqual(c.load('test_cache', 'k.'), None) 45 | c.store('test_cache', 'k.', obj) 46 | self.assertEqual(c.load('test_cache', 'k2'), None) 47 | self.assertFalse(_is_empty(self.test_dir)) 48 | self.assertEqual(c.load('test_cache', 'k.'), obj) 49 | self.assertEqual(c.load('test_cache', 'y'), None) 50 | self.assertEqual(c.load('test_cache2', 'k.'), None) 51 | c.remove() 52 | self.assertFalse(os.path.exists(self.test_dir)) 53 | self.assertEqual(c.load('test_cache', 'k.'), None) 54 | 55 | 56 | if __name__ == '__main__': 57 | unittest.main() 58 | -------------------------------------------------------------------------------- /public.key: -------------------------------------------------------------------------------- 1 | -----BEGIN PGP PUBLIC KEY BLOCK----- 2 | 3 | mQINBGP78C4BEAD0rF9zjGPAt0thlt5C1ebzccAVX7Nb1v+eqQjk+WEZdTETVCg3 4 | WAM5ngArlHdm/fZqzUgO+pAYrB60GKeg7ffUDf+S0XFKEZdeRLYeAaqqKhSibVal 5 | DjvOBOztu3W607HLETQAqA7wTPuIt2WqmpL60NIcyr27LxqmgdN3mNvZ2iLO+bP0 6 | nKR/C+PgE9H4ytywDa12zMx6PmZCnVOOOu6XZEFmdUxxdQ9fFDqd9LcBKY2LDOcS 7 | Yo1saY0YWiZWHtzVoZu1kOzjnS5Fjq/yBHJLImDH7pNxHm7s/PnaurpmQFtDFruk 8 | t+2lhDnpKUmGr/I/3IHqH/X+9nPoS4uiqQ5HpblB8BK+4WfpaiEg75LnvuOPfZIP 9 | KYyXa/0A7QojMwgOrD88ozT+VCkKkkJ+ijXZ7gHNjmcBaUdKK7fDIEOYI63Lyc6Q 10 | WkGQTigFffSUXWHDCO9aXNhP3ejqFWgGMtCUsrbkcJkWuWY7q5ARy/05HbSM3K4D 11 | U9eqtnxmiV1WQ8nXuI9JgJQRvh5PTkny5LtxqzcmqvWO9TjHBbrs14BPEO9fcXxK 12 | L/CFBbzXDSvvAgArdqqlMoncQ/yicTlfL6qzJ8EKFiqW14QMTdAn6SuuZTodXCTi 13 | InwoT7WjjuFPKKdvfH1GP4bnqdzTnzLxCSDIEtfyfPsIX+9GI7Jkk/zZjQARAQAB 14 | tDdTaW1vbiBTYXdpY2tpICh5dC1kbHAgc2lnbmluZyBrZXkpIDxjb250YWN0QGdy 15 | dWI0ay54eXo+iQJOBBMBCgA4FiEErAy75oSNaoc0ZK9OV89lkztadYEFAmP78C4C 16 | GwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AACgkQV89lkztadYEVqQ//cW7TxhXg 17 | 7Xbh2EZQzXml0egn6j8QaV9KzGragMiShrlvTO2zXfLXqyizrFP4AspgjSn/4NrI 18 | 8mluom+Yi+qr7DXT4BjQqIM9y3AjwZPdywe912Lxcw52NNoPZCm24I9T7ySc8lmR 19 | FQvZC0w4H/VTNj/2lgJ1dwMflpwvNRiWa5YzcFGlCUeDIPskLx9++AJE+xwU3LYm 20 | jQQsPBqpHHiTBEJzMLl+rfd9Fg4N+QNzpFkTDW3EPerLuvJniSBBwZthqxeAtw4M 21 | UiAXh6JvCc2hJkKCoygRfM281MeolvmsGNyQm+axlB0vyldiPP6BnaRgZlx+l6MU 22 | cPqgHblb7RW5j9lfr6OYL7SceBIHNv0CFrt1OnkGo/tVMwcs8LH3Ae4a7UJlIceL 23 | V54aRxSsZU7w4iX+PB79BWkEsQzwKrUuJVOeL4UDwWajp75OFaUqbS/slDDVXvK5 24 | OIeuth3mA/adjdvgjPxhRQjA3l69rRWIJDrqBSHldmRsnX6cvXTDy8wSXZgy51lP 25 | m4IVLHnCy9m4SaGGoAsfTZS0cC9FgjUIyTyrq9M67wOMpUxnuB0aRZgJE1DsI23E 26 | qdvcSNVlO+39xM/KPWUEh6b83wMn88QeW+DCVGWACQq5N3YdPnAJa50617fGbY6I 27 | gXIoRHXkDqe23PZ/jURYCv0sjVtjPoVC+bg= 28 | =bJkn 29 | -----END PGP PUBLIC KEY BLOCK----- 30 | -------------------------------------------------------------------------------- /test/testdata/mpd/float_duration.mpd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /yt_dlp/extractor/webcamerapl.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class WebcameraplIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?P[\w-]+)\.webcamera\.pl' 8 | _TESTS = [{ 9 | 'url': 'https://warszawa-plac-zamkowy.webcamera.pl', 10 | 'info_dict': { 11 | 'id': 'warszawa-plac-zamkowy', 12 | 'ext': 'mp4', 13 | 'title': r're:WIDOK NA PLAC ZAMKOWY W WARSZAWIE \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 14 | 'live_status': 'is_live', 15 | } 16 | }, { 17 | 'url': 'https://gdansk-stare-miasto.webcamera.pl/', 18 | 'info_dict': { 19 | 'id': 'gdansk-stare-miasto', 20 | 'ext': 'mp4', 21 | 'title': r're:GDAŃSK - widok na Stare Miasto \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 22 | 'live_status': 'is_live', 23 | } 24 | }] 25 | 26 | def _real_extract(self, url): 27 | video_id = self._match_id(url) 28 | webpage = self._download_webpage(url, video_id) 29 | 30 | rot13_m3u8_url = self._search_regex(r'data-src\s*=\s*"(uggc[^"]+\.z3h8)"', 31 | webpage, 'm3u8 url', default=None) 32 | if not rot13_m3u8_url: 33 | self.raise_no_formats('No video/audio found at the provided url', expected=True) 34 | 35 | m3u8_url = codecs.decode(rot13_m3u8_url, 'rot-13') 36 | formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, live=True) 37 | 38 | return { 39 | 'id': video_id, 40 | 'title': self._html_search_regex(r']*>([^>]+)

    ', webpage, 'title'), 41 | 'formats': formats, 42 | 'subtitles': subtitles, 43 | 'is_live': True, 44 | } 45 | -------------------------------------------------------------------------------- /yt_dlp/extractor/dreisat.py: -------------------------------------------------------------------------------- 1 | from .zdf import ZDFIE 2 | 3 | 4 | class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE 5 | IE_NAME = '3sat' 6 | _VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P[^/?#&]+)\.html' 7 | _TESTS = [{ 8 | # Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html 9 | 'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html', 10 | 'md5': '0aff3e7bc72c8813f5e0fae333316a1d', 11 | 'info_dict': { 12 | 'id': '141007_ab18_10wochensommer_film', 13 | 'ext': 'mp4', 14 | 'title': 'Ab 18! - 10 Wochen Sommer', 15 | 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26', 16 | 'duration': 2660, 17 | 'timestamp': 1608604200, 18 | 'upload_date': '20201222', 19 | }, 20 | }, { 21 | 'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html', 22 | 'info_dict': { 23 | 'id': '140913_sendung_schweizweit', 24 | 'ext': 'mp4', 25 | 'title': 'Waidmannsheil', 26 | 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', 27 | 'timestamp': 1410623100, 28 | 'upload_date': '20140913' 29 | }, 30 | 'params': { 31 | 'skip_download': True, 32 | } 33 | }, { 34 | # Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html 35 | 'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html', 36 | 'only_matching': True, 37 | }, { 38 | # Same as https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids 39 | 'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html', 40 | 'only_matching': True, 41 | }] 42 | -------------------------------------------------------------------------------- /yt_dlp/postprocessor/exec.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | from .common import PostProcessor 4 | from ..compat import compat_shlex_quote 5 | from ..utils import PostProcessingError, encodeArgument, variadic 6 | 7 | 8 | class ExecPP(PostProcessor): 9 | 10 | def __init__(self, downloader, exec_cmd): 11 | PostProcessor.__init__(self, downloader) 12 | self.exec_cmd = variadic(exec_cmd) 13 | 14 | def parse_cmd(self, cmd, info): 15 | tmpl, tmpl_dict = self._downloader.prepare_outtmpl(cmd, info) 16 | if tmpl_dict: # if there are no replacements, tmpl_dict = {} 17 | return self._downloader.escape_outtmpl(tmpl) % tmpl_dict 18 | 19 | filepath = info.get('filepath', info.get('_filename')) 20 | # If video, and no replacements are found, replace {} for backard compatibility 21 | if filepath: 22 | if '{}' not in cmd: 23 | cmd += ' {}' 24 | cmd = cmd.replace('{}', compat_shlex_quote(filepath)) 25 | return cmd 26 | 27 | def run(self, info): 28 | for tmpl in self.exec_cmd: 29 | cmd = self.parse_cmd(tmpl, info) 30 | self.to_screen('Executing command: %s' % cmd) 31 | retCode = subprocess.call(encodeArgument(cmd), shell=True) 32 | if retCode != 0: 33 | raise PostProcessingError('Command returned error code %d' % retCode) 34 | return [], info 35 | 36 | 37 | # Deprecated 38 | class ExecAfterDownloadPP(ExecPP): 39 | def __init__(self, *args, **kwargs): 40 | super().__init__(*args, **kwargs) 41 | self.deprecation_warning( 42 | 'yt_dlp.postprocessor.ExecAfterDownloadPP is deprecated ' 43 | 'and may be removed in a future version. Use yt_dlp.postprocessor.ExecPP instead') 44 | -------------------------------------------------------------------------------- /yt_dlp/extractor/lenta.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class LentaIE(InfoExtractor): 5 | _VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P[^/?#&]+)' 6 | _TESTS = [{ 7 | 'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/', 8 | 'info_dict': { 9 | 'id': '964400', 10 | 'ext': 'mp4', 11 | 'title': 'Надежду Савченко задержали', 12 | 'thumbnail': r're:^https?://.*\.jpg$', 13 | 'duration': 61, 14 | 'view_count': int, 15 | }, 16 | 'params': { 17 | 'skip_download': True, 18 | }, 19 | }, { 20 | # EaglePlatform iframe embed 21 | 'url': 'http://lenta.ru/news/2015/03/06/navalny/', 22 | 'info_dict': { 23 | 'id': '227304', 24 | 'ext': 'mp4', 25 | 'title': 'Навальный вышел на свободу', 26 | 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', 27 | 'thumbnail': r're:^https?://.*\.jpg$', 28 | 'duration': 87, 29 | 'view_count': int, 30 | 'age_limit': 0, 31 | }, 32 | 'params': { 33 | 'skip_download': True, 34 | }, 35 | }] 36 | 37 | def _real_extract(self, url): 38 | display_id = self._match_id(url) 39 | 40 | webpage = self._download_webpage(url, display_id) 41 | 42 | video_id = self._search_regex( 43 | r'vid\s*:\s*["\']?(\d+)', webpage, 'eagleplatform id', 44 | default=None) 45 | if video_id: 46 | return self.url_result( 47 | 'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id, 48 | ie='EaglePlatform', video_id=video_id) 49 | 50 | return self.url_result(url, ie='Generic') 51 | -------------------------------------------------------------------------------- /yt_dlp/extractor/weiqitv.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | 3 | 4 | class WeiqiTVIE(InfoExtractor): 5 | IE_DESC = 'WQTV' 6 | _VALID_URL = r'https?://(?:www\.)?weiqitv\.com/index/video_play\?videoId=(?P[A-Za-z0-9]+)' 7 | 8 | _TESTS = [{ 9 | 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3', 10 | 'md5': '26450599afd64c513bc77030ad15db44', 11 | 'info_dict': { 12 | 'id': '53c744f09874f0e76a8b46f3', 13 | 'ext': 'mp4', 14 | 'title': '2013年度盘点', 15 | }, 16 | }, { 17 | 'url': 'http://www.weiqitv.com/index/video_play?videoId=567379a2d4c36cca518b4569', 18 | 'info_dict': { 19 | 'id': '567379a2d4c36cca518b4569', 20 | 'ext': 'mp4', 21 | 'title': '民国围棋史', 22 | }, 23 | }, { 24 | 'url': 'http://www.weiqitv.com/index/video_play?videoId=5430220a9874f088658b4567', 25 | 'info_dict': { 26 | 'id': '5430220a9874f088658b4567', 27 | 'ext': 'mp4', 28 | 'title': '二路托过的手段和运用', 29 | }, 30 | }] 31 | 32 | def _real_extract(self, url): 33 | media_id = self._match_id(url) 34 | page = self._download_webpage(url, media_id) 35 | 36 | info_json_str = self._search_regex( 37 | r'var\s+video\s*=\s*(.+});', page, 'info json str') 38 | info_json = self._parse_json(info_json_str, media_id) 39 | 40 | letvcloud_url = self._search_regex( 41 | r'var\s+letvurl\s*=\s*"([^"]+)', page, 'letvcloud url') 42 | 43 | return { 44 | '_type': 'url_transparent', 45 | 'ie_key': 'LetvCloud', 46 | 'url': letvcloud_url, 47 | 'title': info_json['name'], 48 | 'id': media_id, 49 | } 50 | -------------------------------------------------------------------------------- /yt_dlp/extractor/megaphone.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import js_to_json 3 | 4 | 5 | class MegaphoneIE(InfoExtractor): 6 | IE_NAME = 'megaphone.fm' 7 | IE_DESC = 'megaphone.fm embedded players' 8 | _VALID_URL = r'https://player\.megaphone\.fm/(?P[A-Z0-9]+)' 9 | _EMBED_REGEX = [rf']*?\ssrc=["\'](?P{_VALID_URL})'] 10 | _TEST = { 11 | 'url': 'https://player.megaphone.fm/GLT9749789991?"', 12 | 'md5': '4816a0de523eb3e972dc0dda2c191f96', 13 | 'info_dict': { 14 | 'id': 'GLT9749789991', 15 | 'ext': 'mp3', 16 | 'title': '#97 What Kind Of Idiot Gets Phished?', 17 | 'thumbnail': r're:^https://.*\.png.*$', 18 | 'duration': 1776.26375, 19 | 'author': 'Reply All', 20 | }, 21 | } 22 | 23 | def _real_extract(self, url): 24 | video_id = self._match_id(url) 25 | webpage = self._download_webpage(url, video_id) 26 | 27 | title = self._og_search_property('audio:title', webpage) 28 | author = self._og_search_property('audio:artist', webpage) 29 | thumbnail = self._og_search_thumbnail(webpage) 30 | 31 | episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON') 32 | episode_data = self._parse_json(episode_json, video_id, js_to_json) 33 | video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:') 34 | 35 | formats = [{ 36 | 'url': video_url, 37 | }] 38 | 39 | return { 40 | 'id': video_id, 41 | 'thumbnail': thumbnail, 42 | 'title': title, 43 | 'author': author, 44 | 'duration': episode_data['duration'], 45 | 'formats': formats, 46 | } 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Config 2 | *.conf 3 | cookies 4 | *cookies.txt 5 | .netrc 6 | 7 | # Downloaded 8 | *.annotations.xml 9 | *.aria2 10 | *.description 11 | *.dump 12 | *.frag 13 | *.frag.aria2 14 | *.frag.urls 15 | *.info.json 16 | *.live_chat.json 17 | *.meta 18 | *.part* 19 | *.tmp 20 | *.temp 21 | *.unknown_video 22 | *.ytdl 23 | .cache/ 24 | 25 | *.3gp 26 | *.ape 27 | *.ass 28 | *.avi 29 | *.desktop 30 | *.f4v 31 | *.flac 32 | *.flv 33 | *.gif 34 | *.jpeg 35 | *.jpg 36 | *.m4a 37 | *.m4v 38 | *.mhtml 39 | *.mkv 40 | *.mov 41 | *.mp3 42 | *.mp4 43 | *.mpga 44 | *.oga 45 | *.ogg 46 | *.opus 47 | *.png 48 | *.sbv 49 | *.srt 50 | *.swf 51 | *.swp 52 | *.tt 53 | *.ttml 54 | *.url 55 | *.vtt 56 | *.wav 57 | *.webloc 58 | *.webm 59 | *.webp 60 | 61 | # Allow config/media files in testdata 62 | !test/** 63 | 64 | # Python 65 | *.pyc 66 | *.pyo 67 | .pytest_cache 68 | wine-py2exe/ 69 | py2exe.log 70 | build/ 71 | dist/ 72 | zip/ 73 | tmp/ 74 | venv/ 75 | .venv/ 76 | completions/ 77 | 78 | # Misc 79 | *~ 80 | *.DS_Store 81 | *.kate-swp 82 | MANIFEST 83 | test/local_parameters.json 84 | .coverage 85 | cover/ 86 | secrets/ 87 | updates_key.pem 88 | *.egg-info 89 | .tox 90 | *.class 91 | *.isorted 92 | *.stackdump 93 | 94 | # Generated 95 | AUTHORS 96 | README.txt 97 | .mailmap 98 | *.1 99 | *.bash-completion 100 | *.fish 101 | *.tar.gz 102 | *.zsh 103 | *.spec 104 | test/testdata/sigs/player-*.js 105 | 106 | # Binary 107 | /youtube-dl 108 | /youtube-dlc 109 | /yt-dlp 110 | yt-dlp.zip 111 | *.exe 112 | 113 | # Text Editor / IDE 114 | .idea 115 | *.iml 116 | .vscode 117 | *.sublime-* 118 | *.code-workspace 119 | 120 | # Lazy extractors 121 | */extractor/lazy_extractors.py 122 | 123 | # Plugins 124 | ytdlp_plugins/ 125 | yt-dlp-plugins 126 | -------------------------------------------------------------------------------- /yt_dlp/extractor/reverbnation.py: -------------------------------------------------------------------------------- 1 | from .common import InfoExtractor 2 | from ..utils import ( 3 | qualities, 4 | str_or_none, 5 | ) 6 | 7 | 8 | class ReverbNationIE(InfoExtractor): 9 | _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' 10 | _TESTS = [{ 11 | 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', 12 | 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', 13 | 'info_dict': { 14 | 'id': '16965047', 15 | 'ext': 'mp3', 16 | 'title': 'MONA LISA', 17 | 'uploader': 'ALKILADOS', 18 | 'uploader_id': '216429', 19 | 'thumbnail': r're:^https?://.*\.jpg', 20 | }, 21 | }] 22 | 23 | def _real_extract(self, url): 24 | song_id = self._match_id(url) 25 | 26 | api_res = self._download_json( 27 | 'https://api.reverbnation.com/song/%s' % song_id, 28 | song_id, 29 | note='Downloading information of song %s' % song_id 30 | ) 31 | 32 | THUMBNAILS = ('thumbnail', 'image') 33 | quality = qualities(THUMBNAILS) 34 | thumbnails = [] 35 | for thumb_key in THUMBNAILS: 36 | if api_res.get(thumb_key): 37 | thumbnails.append({ 38 | 'url': api_res[thumb_key], 39 | 'preference': quality(thumb_key) 40 | }) 41 | 42 | return { 43 | 'id': song_id, 44 | 'title': api_res['name'], 45 | 'url': api_res['url'], 46 | 'uploader': api_res.get('artist', {}).get('name'), 47 | 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), 48 | 'thumbnails': thumbnails, 49 | 'ext': 'mp3', 50 | 'vcodec': 'none', 51 | } 52 | --------------------------------------------------------------------------------