├── test ├── __init__.py ├── swftests │ ├── .gitignore │ ├── EqualsOperator.as │ ├── DictCall.as │ ├── ConstantInt.as │ ├── StringBasics.as │ ├── StringConversion.as │ ├── StaticAssignment.as │ ├── ClassConstruction.as │ ├── StringCharCodeAt.as │ ├── LocalVars.as │ ├── StaticRetrieval.as │ ├── ClassCall.as │ ├── PrivateCall.as │ ├── ConstArrayAccess.as │ ├── PrivateVoidCall.as │ ├── MemberAssignment.as │ ├── ArrayAccess.as │ └── NeOperator.as ├── test_postprocessors.py ├── test_netrc.py ├── test_update.py ├── parameters.json ├── test_iqiyi_sdk_interpreter.py ├── test_execution.py ├── test_age_restriction.py ├── versions.json └── test_cache.py ├── docs ├── .gitignore └── index.rst ├── youtube_dl ├── version.py ├── __main__.py ├── extractor │ ├── embedly.py │ ├── engadget.py │ ├── lci.py │ ├── m6.py │ ├── anitube.py │ ├── myvidster.py │ ├── ku6.py │ ├── videodetective.py │ ├── cliprs.py │ ├── cbssports.py │ ├── ina.py │ ├── ebaumsworld.py │ ├── makertv.py │ ├── formula1.py │ ├── teachingchannel.py │ ├── skysports.py │ ├── tutv.py │ ├── people.py │ ├── cnbc.py │ ├── nerdcubed.py │ ├── savefrom.py │ ├── gameinformer.py │ ├── learnr.py │ ├── commonprotocols.py │ ├── fusion.py │ ├── nuevo.py │ ├── lovehomeporn.py │ ├── gputechconf.py │ ├── freevideo.py │ ├── restudy.py │ ├── hentaistigma.py │ ├── tvland.py │ ├── freespeech.py │ ├── sonyliv.py │ ├── defense.py │ ├── rottentomatoes.py │ ├── youjizz.py │ ├── aljazeera.py │ ├── __init__.py │ ├── criterion.py │ ├── macgamestore.py │ ├── dropbox.py │ ├── keek.py │ ├── dreisat.py │ ├── hark.py │ ├── foxsports.py │ ├── echomsk.py │ ├── telequebec.py │ ├── lemonde.py │ ├── spike.py │ ├── bild.py │ ├── screencastomatic.py │ ├── helsinki.py │ ├── howcast.py │ ├── thestar.py │ ├── freesound.py │ ├── vodplatform.py │ ├── academicearth.py │ ├── biqle.py │ ├── slutload.py │ ├── commonmistakes.py │ ├── sportschau.py │ ├── moviezine.py │ ├── newgrounds.py │ ├── reverbnation.py │ ├── fczenit.py │ ├── ro220.py │ ├── xbef.py │ ├── miaopai.py │ ├── nintendo.py │ ├── vidzi.py │ ├── tv3.py │ ├── ehow.py │ ├── phoenix.py │ ├── ruleporn.py │ ├── odatv.py │ ├── oktoberfesttv.py │ ├── parliamentliveuk.py │ ├── glide.py │ ├── thisamericanlife.py │ ├── ruhd.py │ ├── videopremium.py │ ├── makerschannel.py │ ├── goshgay.py │ ├── rmcdecouverte.py │ ├── yourupload.py │ ├── fktv.py │ ├── historicfilms.py │ ├── xnxx.py │ ├── rudo.py │ ├── streetvoice.py │ ├── tlc.py │ ├── sztvhu.py │ ├── behindkink.py │ ├── tvnoe.py │ ├── footyroom.py │ ├── weiqitv.py │ ├── ir90tv.py │ ├── cinchcast.py │ ├── thescene.py │ ├── localnews8.py │ ├── charlierose.py │ ├── morningstar.py │ └── kankan.py ├── postprocessor │ ├── execafterdownload.py │ ├── __init__.py │ └── metadatafromtitle.py └── downloader │ ├── __init__.py │ └── rtsp.py ├── devscripts ├── SizeOfImage.patch ├── SizeOfImage_w.patch ├── fish-completion.in ├── posix-locale.sh ├── lazy_load_template.py ├── gh-pages │ ├── update-copyright.py │ ├── sign-versions.py │ ├── generate-download.py │ ├── update-sites.py │ └── add-version.py ├── make_readme.py ├── zsh-completion.in ├── make_issue_template.py ├── make_contributing.py ├── bash-completion.in ├── bash-completion.py ├── generate_aes_testdata.py ├── make_supportedsites.py ├── show-downloads-statistics.py ├── zsh-completion.py └── fish-completion.py ├── bin └── youtube-dl ├── MANIFEST.in ├── setup.cfg ├── .travis.yml ├── .gitignore ├── tox.ini ├── youtube-dl.plugin.zsh ├── .github └── PULL_REQUEST_TEMPLATE.md └── LICENSE /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | -------------------------------------------------------------------------------- /test/swftests/.gitignore: -------------------------------------------------------------------------------- 1 | *.swf 2 | -------------------------------------------------------------------------------- /youtube_dl/version.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | __version__ = '2016.09.19' 4 | -------------------------------------------------------------------------------- /devscripts/SizeOfImage.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monkey-wenjun/youtube-dl/master/devscripts/SizeOfImage.patch -------------------------------------------------------------------------------- /devscripts/SizeOfImage_w.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monkey-wenjun/youtube-dl/master/devscripts/SizeOfImage_w.patch -------------------------------------------------------------------------------- /bin/youtube-dl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import youtube_dl 4 | 5 | if __name__ == '__main__': 6 | youtube_dl.main() 7 | -------------------------------------------------------------------------------- /devscripts/fish-completion.in: -------------------------------------------------------------------------------- 1 | 2 | {{commands}} 3 | 4 | 5 | complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" 6 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include test/*.py 3 | include test/*.json 4 | include youtube-dl.bash-completion 5 | include youtube-dl.fish 6 | include youtube-dl.1 7 | recursive-include docs Makefile conf.py *.rst 8 | -------------------------------------------------------------------------------- /devscripts/posix-locale.sh: -------------------------------------------------------------------------------- 1 | 2 | # source this file in your shell to get a POSIX locale (which will break many programs, but that's kind of the point) 3 | 4 | export LC_ALL=POSIX 5 | export LANG=POSIX 6 | export LANGUAGE=POSIX 7 | -------------------------------------------------------------------------------- /test/swftests/EqualsOperator.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: false 3 | 4 | package { 5 | public class EqualsOperator { 6 | public static function main():Boolean{ 7 | return 1 == 2; 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /test/swftests/DictCall.as: -------------------------------------------------------------------------------- 1 | // input: [{"x": 1, "y": 2}] 2 | // output: 3 3 | 4 | package { 5 | public class DictCall { 6 | public static function main(d:Object):int{ 7 | return d.x + d.y; 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal = True 3 | 4 | [flake8] 5 | exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git 6 | ignore = E402,E501,E731 7 | -------------------------------------------------------------------------------- /test/swftests/ConstantInt.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: 2 3 | 4 | package { 5 | public class ConstantInt { 6 | private static const x:int = 2; 7 | 8 | public static function main():int{ 9 | return x; 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /test/swftests/StringBasics.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: 3 3 | 4 | package { 5 | public class StringBasics { 6 | public static function main():int{ 7 | var s:String = "abc"; 8 | return s.length; 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /test/swftests/StringConversion.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: 2 3 | 4 | package { 5 | public class StringConversion { 6 | public static function main():int{ 7 | var s:String = String(99); 8 | return s.length; 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /test/swftests/StaticAssignment.as: -------------------------------------------------------------------------------- 1 | // input: [1] 2 | // output: 1 3 | 4 | package { 5 | public class StaticAssignment { 6 | public static var v:int; 7 | 8 | public static function main(a:int):int{ 9 | v = a; 10 | return v; 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /test/swftests/ClassConstruction.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: 0 3 | 4 | package { 5 | public class ClassConstruction { 6 | public static function main():int{ 7 | var f:Foo = new Foo(); 8 | return 0; 9 | } 10 | } 11 | } 12 | 13 | class Foo { 14 | 15 | } 16 | -------------------------------------------------------------------------------- /test/swftests/StringCharCodeAt.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: 9897 3 | 4 | package { 5 | public class StringCharCodeAt { 6 | public static function main():int{ 7 | var s:String = "abc"; 8 | return s.charCodeAt(1) * 100 + s.charCodeAt(); 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /test/swftests/LocalVars.as: -------------------------------------------------------------------------------- 1 | // input: [1, 2] 2 | // output: 3 3 | 4 | package { 5 | public class LocalVars { 6 | public static function main(a:int, b:int):int{ 7 | var c:int = a + b + b; 8 | var d:int = c - b; 9 | var e:int = d; 10 | return e; 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /test/swftests/StaticRetrieval.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: 1 3 | 4 | package { 5 | public class StaticRetrieval { 6 | public static var v:int; 7 | 8 | public static function main():int{ 9 | if (v) { 10 | return 0; 11 | } else { 12 | return 1; 13 | } 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /test/swftests/ClassCall.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: 121 3 | 4 | package { 5 | public class ClassCall { 6 | public static function main():int{ 7 | var f:OtherClass = new OtherClass(); 8 | return f.func(100,20); 9 | } 10 | } 11 | } 12 | 13 | class OtherClass { 14 | public function func(x: int, y: int):int { 15 | return x+y+1; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.6" 4 | - "2.7" 5 | - "3.2" 6 | - "3.3" 7 | - "3.4" 8 | - "3.5" 9 | sudo: false 10 | script: nosetests test --verbose 11 | notifications: 12 | email: 13 | - filippo.valsorda@gmail.com 14 | - yasoob.khld@gmail.com 15 | # irc: 16 | # channels: 17 | # - "irc.freenode.org#youtube-dl" 18 | # skip_join: true 19 | -------------------------------------------------------------------------------- /test/swftests/PrivateCall.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: 9 3 | 4 | package { 5 | public class PrivateCall { 6 | public static function main():int{ 7 | var f:OtherClass = new OtherClass(); 8 | return f.func(); 9 | } 10 | } 11 | } 12 | 13 | class OtherClass { 14 | private function pf():int { 15 | return 9; 16 | } 17 | 18 | public function func():int { 19 | return this.pf(); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /test/swftests/ConstArrayAccess.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: 4 3 | 4 | package { 5 | public class ConstArrayAccess { 6 | private static const x:int = 2; 7 | private static const ar:Array = ["42", "3411"]; 8 | 9 | public static function main():int{ 10 | var c:ConstArrayAccess = new ConstArrayAccess(); 11 | return c.f(); 12 | } 13 | 14 | public function f(): int { 15 | return ar[1].length; 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /test/swftests/PrivateVoidCall.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: 9 3 | 4 | package { 5 | public class PrivateVoidCall { 6 | public static function main():int{ 7 | var f:OtherClass = new OtherClass(); 8 | f.func(); 9 | return 9; 10 | } 11 | } 12 | } 13 | 14 | class OtherClass { 15 | private function pf():void { 16 | ; 17 | } 18 | 19 | public function func():void { 20 | this.pf(); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /test/swftests/MemberAssignment.as: -------------------------------------------------------------------------------- 1 | // input: [1] 2 | // output: 2 3 | 4 | package { 5 | public class MemberAssignment { 6 | public var v:int; 7 | 8 | public function g():int { 9 | return this.v; 10 | } 11 | 12 | public function f(a:int):int{ 13 | this.v = a; 14 | return this.v + this.g(); 15 | } 16 | 17 | public static function main(a:int): int { 18 | var v:MemberAssignment = new MemberAssignment(); 19 | return v.f(a); 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /test/swftests/ArrayAccess.as: -------------------------------------------------------------------------------- 1 | // input: [["a", "b", "c", "d"]] 2 | // output: ["c", "b", "a", "d"] 3 | 4 | package { 5 | public class ArrayAccess { 6 | public static function main(ar:Array):Array { 7 | var aa:ArrayAccess = new ArrayAccess(); 8 | return aa.f(ar, 2); 9 | } 10 | 11 | private function f(ar:Array, num:Number):Array{ 12 | var x:String = ar[0]; 13 | var y:String = ar[num % ar.length]; 14 | ar[0] = y; 15 | ar[num] = x; 16 | return ar; 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /test/swftests/NeOperator.as: -------------------------------------------------------------------------------- 1 | // input: [] 2 | // output: 123 3 | 4 | package { 5 | public class NeOperator { 6 | public static function main(): int { 7 | var res:int = 0; 8 | if (1 != 2) { 9 | res += 3; 10 | } else { 11 | res += 4; 12 | } 13 | if (2 != 2) { 14 | res += 10; 15 | } else { 16 | res += 20; 17 | } 18 | if (9 == 9) { 19 | res += 100; 20 | } 21 | return res; 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /devscripts/lazy_load_template.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | 7 | class LazyLoadExtractor(object): 8 | _module = None 9 | 10 | @classmethod 11 | def ie_key(cls): 12 | return cls.__name__[:-2] 13 | 14 | def __new__(cls, *args, **kwargs): 15 | mod = __import__(cls._module, fromlist=(cls.__name__,)) 16 | real_cls = getattr(mod, cls.__name__) 17 | instance = real_cls.__new__(real_cls) 18 | instance.__init__(*args, **kwargs) 19 | return instance 20 | -------------------------------------------------------------------------------- /youtube_dl/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import unicode_literals 3 | 4 | # Execute with 5 | # $ python youtube_dl/__main__.py (2.6+) 6 | # $ python -m youtube_dl (2.7+) 7 | 8 | import sys 9 | 10 | if __package__ is None and not hasattr(sys, 'frozen'): 11 | # direct call of __main__.py 12 | import os.path 13 | path = os.path.realpath(os.path.abspath(__file__)) 14 | sys.path.insert(0, os.path.dirname(os.path.dirname(path))) 15 | 16 | import youtube_dl 17 | 18 | if __name__ == '__main__': 19 | youtube_dl.main() 20 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to youtube-dl's documentation! 2 | ====================================== 3 | 4 | *youtube-dl* is a command-line program to download videos from YouTube.com and more sites. 5 | It can also be used in Python code. 6 | 7 | Developer guide 8 | --------------- 9 | 10 | This section contains information for using *youtube-dl* from Python programs. 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | module_guide 16 | 17 | Indices and tables 18 | ================== 19 | 20 | * :ref:`genindex` 21 | * :ref:`modindex` 22 | * :ref:`search` 23 | 24 | -------------------------------------------------------------------------------- /test/test_postprocessors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import unicode_literals 4 | 5 | # Allow direct execution 6 | import os 7 | import sys 8 | import unittest 9 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 10 | 11 | from youtube_dl.postprocessor import MetadataFromTitlePP 12 | 13 | 14 | class TestMetadataFromTitle(unittest.TestCase): 15 | def test_format_to_regex(self): 16 | pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s') 17 | self.assertEqual(pp._titleregex, '(?P.+)\ \-\ (?P<artist>.+)') 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | *.class 4 | *~ 5 | *.DS_Store 6 | wine-py2exe/ 7 | py2exe.log 8 | *.kate-swp 9 | build/ 10 | dist/ 11 | MANIFEST 12 | README.txt 13 | youtube-dl.1 14 | youtube-dl.bash-completion 15 | youtube-dl.fish 16 | youtube_dl/extractor/lazy_extractors.py 17 | youtube-dl 18 | youtube-dl.exe 19 | youtube-dl.tar.gz 20 | .coverage 21 | cover/ 22 | updates_key.pem 23 | *.egg-info 24 | *.srt 25 | *.sbv 26 | *.vtt 27 | *.flv 28 | *.mp4 29 | *.m4a 30 | *.m4v 31 | *.mp3 32 | *.part 33 | *.swp 34 | test/testdata 35 | test/local_parameters.json 36 | .tox 37 | youtube-dl.zsh 38 | 39 | # IntelliJ related files 40 | .idea 41 | *.iml 42 | 43 | tmp/ 44 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py26,py27,py33,py34,py35 3 | [testenv] 4 | deps = 5 | nose 6 | coverage 7 | # We need a valid $HOME for test_compat_expanduser 8 | passenv = HOME 9 | defaultargs = test --exclude test_download.py --exclude test_age_restriction.py 10 | --exclude test_subtitles.py --exclude test_write_annotations.py 11 | --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py 12 | --exclude test_socks.py 13 | commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html 14 | # test.test_download:TestDownload.test_NowVideo 15 | -------------------------------------------------------------------------------- /devscripts/gh-pages/update-copyright.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import with_statement, unicode_literals 5 | 6 | import datetime 7 | import glob 8 | import io # For Python 2 compatibility 9 | import os 10 | import re 11 | 12 | year = str(datetime.datetime.now().year) 13 | for fn in glob.glob('*.html*'): 14 | with io.open(fn, encoding='utf-8') as f: 15 | content = f.read() 16 | newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content) 17 | if content != newc: 18 | tmpFn = fn + '.part' 19 | with io.open(tmpFn, 'wt', encoding='utf-8') as outf: 20 | outf.write(newc) 21 | os.rename(tmpFn, fn) 22 | -------------------------------------------------------------------------------- /test/test_netrc.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import os 5 | import sys 6 | import unittest 7 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 8 | 9 | 10 | from youtube_dl.extractor import ( 11 | gen_extractors, 12 | ) 13 | 14 | 15 | class TestNetRc(unittest.TestCase): 16 | def test_netrc_present(self): 17 | for ie in gen_extractors(): 18 | if not hasattr(ie, '_login'): 19 | continue 20 | self.assertTrue( 21 | hasattr(ie, '_NETRC_MACHINE'), 22 | 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME) 23 | 24 | 25 | if __name__ == '__main__': 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /devscripts/make_readme.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import io 4 | import sys 5 | import re 6 | 7 | README_FILE = 'README.md' 8 | helptext = sys.stdin.read() 9 | 10 | if isinstance(helptext, bytes): 11 | helptext = helptext.decode('utf-8') 12 | 13 | with io.open(README_FILE, encoding='utf-8') as f: 14 | oldreadme = f.read() 15 | 16 | header = oldreadme[:oldreadme.index('# OPTIONS')] 17 | footer = oldreadme[oldreadme.index('# CONFIGURATION'):] 18 | 19 | options = helptext[helptext.index(' General Options:') + 19:] 20 | options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options) 21 | options = '# OPTIONS\n' + options + '\n' 22 | 23 | with io.open(README_FILE, 'w', encoding='utf-8') as f: 24 | f.write(header) 25 | f.write(options) 26 | f.write(footer) 27 | -------------------------------------------------------------------------------- /devscripts/zsh-completion.in: -------------------------------------------------------------------------------- 1 | #compdef youtube-dl 2 | 3 | __youtube_dl() { 4 | local curcontext="$curcontext" fileopts diropts cur prev 5 | typeset -A opt_args 6 | fileopts="{{fileopts}}" 7 | diropts="{{diropts}}" 8 | cur=$words[CURRENT] 9 | case $cur in 10 | :) 11 | _arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)' 12 | ;; 13 | *) 14 | prev=$words[CURRENT-1] 15 | if [[ ${prev} =~ ${fileopts} ]]; then 16 | _path_files 17 | elif [[ ${prev} =~ ${diropts} ]]; then 18 | _path_files -/ 19 | elif [[ ${prev} == "--recode-video" ]]; then 20 | _arguments '*: :(mp4 flv ogg webm mkv)' 21 | else 22 | _arguments '*: :({{flags}})' 23 | fi 24 | ;; 25 | esac 26 | } 27 | 28 | __youtube_dl -------------------------------------------------------------------------------- /youtube_dl/extractor/embedly.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..compat import compat_urllib_parse_unquote 6 | 7 | 8 | class EmbedlyIE(InfoExtractor): 9 | _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)' 10 | _TESTS = [{ 11 | 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', 12 | 'only_matching': True, 13 | }] 14 | 15 | def _real_extract(self, url): 16 | return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) 17 | -------------------------------------------------------------------------------- /devscripts/make_issue_template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import unicode_literals 3 | 4 | import io 5 | import optparse 6 | 7 | 8 | def main(): 9 | parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') 10 | options, args = parser.parse_args() 11 | if len(args) != 2: 12 | parser.error('Expected an input and an output filename') 13 | 14 | infile, outfile = args 15 | 16 | with io.open(infile, encoding='utf-8') as inf: 17 | issue_template_tmpl = inf.read() 18 | 19 | # Get the version from youtube_dl/version.py without importing the package 20 | exec(compile(open('youtube_dl/version.py').read(), 21 | 'youtube_dl/version.py', 'exec')) 22 | 23 | out = issue_template_tmpl % {'version': locals()['__version__']} 24 | 25 | with io.open(outfile, 'w', encoding='utf-8') as outf: 26 | outf.write(out) 27 | 28 | if __name__ == '__main__': 29 | main() 30 | -------------------------------------------------------------------------------- /youtube_dl/extractor/engadget.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class EngadgetIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)' 8 | 9 | _TESTS = [{ 10 | # video with 5min ID 11 | 'url': 'http://www.engadget.com/video/518153925/', 12 | 'md5': 'c6820d4828a5064447a4d9fc73f312c9', 13 | 'info_dict': { 14 | 'id': '518153925', 15 | 'ext': 'mp4', 16 | 'title': 'Samsung Galaxy Tab Pro 8.4 Review', 17 | }, 18 | 'add_ie': ['FiveMin'], 19 | }, { 20 | # video with vidible ID 21 | 'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/', 22 | 'only_matching': True, 23 | }] 24 | 25 | def _real_extract(self, url): 26 | video_id = self._match_id(url) 27 | return self.url_result('aol-video:%s' % video_id) 28 | -------------------------------------------------------------------------------- /devscripts/make_contributing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import unicode_literals 3 | 4 | import io 5 | import optparse 6 | import re 7 | 8 | 9 | def main(): 10 | parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') 11 | options, args = parser.parse_args() 12 | if len(args) != 2: 13 | parser.error('Expected an input and an output filename') 14 | 15 | infile, outfile = args 16 | 17 | with io.open(infile, encoding='utf-8') as inf: 18 | readme = inf.read() 19 | 20 | bug_text = re.search( 21 | r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) 22 | dev_text = re.search( 23 | r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING YOUTUBE-DL', 24 | readme).group(1) 25 | 26 | out = bug_text + dev_text 27 | 28 | with io.open(outfile, 'w', encoding='utf-8') as outf: 29 | outf.write(out) 30 | 31 | if __name__ == '__main__': 32 | main() 33 | -------------------------------------------------------------------------------- /youtube_dl/postprocessor/execafterdownload.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import subprocess 4 | 5 | from .common import PostProcessor 6 | from ..compat import compat_shlex_quote 7 | from ..utils import PostProcessingError 8 | 9 | 10 | class ExecAfterDownloadPP(PostProcessor): 11 | def __init__(self, downloader, exec_cmd): 12 | super(ExecAfterDownloadPP, self).__init__(downloader) 13 | self.exec_cmd = exec_cmd 14 | 15 | def run(self, information): 16 | cmd = self.exec_cmd 17 | if '{}' not in cmd: 18 | cmd += ' {}' 19 | 20 | cmd = cmd.replace('{}', compat_shlex_quote(information['filepath'])) 21 | 22 | self._downloader.to_screen('[exec] Executing command: %s' % cmd) 23 | retCode = subprocess.call(cmd, shell=True) 24 | if retCode != 0: 25 | raise PostProcessingError( 26 | 'Command returned error code %d' % retCode) 27 | 28 | return [], information 29 | -------------------------------------------------------------------------------- /devscripts/bash-completion.in: -------------------------------------------------------------------------------- 1 | __youtube_dl() 2 | { 3 | local cur prev opts fileopts diropts keywords 4 | COMPREPLY=() 5 | cur="${COMP_WORDS[COMP_CWORD]}" 6 | prev="${COMP_WORDS[COMP_CWORD-1]}" 7 | opts="{{flags}}" 8 | keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" 9 | fileopts="-a|--batch-file|--download-archive|--cookies|--load-info" 10 | diropts="--cache-dir" 11 | 12 | if [[ ${prev} =~ ${fileopts} ]]; then 13 | COMPREPLY=( $(compgen -f -- ${cur}) ) 14 | return 0 15 | elif [[ ${prev} =~ ${diropts} ]]; then 16 | COMPREPLY=( $(compgen -d -- ${cur}) ) 17 | return 0 18 | fi 19 | 20 | if [[ ${cur} =~ : ]]; then 21 | COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) 22 | return 0 23 | elif [[ ${cur} == * ]] ; then 24 | COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) 25 | return 0 26 | fi 27 | } 28 | 29 | complete -F __youtube_dl youtube-dl 30 | -------------------------------------------------------------------------------- /devscripts/bash-completion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import unicode_literals 3 | 4 | import os 5 | from os.path import dirname as dirn 6 | import sys 7 | 8 | sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) 9 | import youtube_dl 10 | 11 | BASH_COMPLETION_FILE = "youtube-dl.bash-completion" 12 | BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" 13 | 14 | 15 | def build_completion(opt_parser): 16 | opts_flag = [] 17 | for group in opt_parser.option_groups: 18 | for option in group.option_list: 19 | # for every long flag 20 | opts_flag.append(option.get_opt_string()) 21 | with open(BASH_COMPLETION_TEMPLATE) as f: 22 | template = f.read() 23 | with open(BASH_COMPLETION_FILE, "w") as f: 24 | # just using the special char 25 | filled_template = template.replace("{{flags}}", " ".join(opts_flag)) 26 | f.write(filled_template) 27 | 28 | parser = youtube_dl.parseOpts()[0] 29 | build_completion(parser) 30 | -------------------------------------------------------------------------------- /youtube_dl/extractor/lci.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class LCIIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?lci\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html' 9 | _TEST = { 10 | 'url': 'http://www.lci.fr/international/etats-unis-a-j-62-hillary-clinton-reste-sans-voix-2001679.html', 11 | 'md5': '2fdb2538b884d4d695f9bd2bde137e6c', 12 | 'info_dict': { 13 | 'id': '13244802', 14 | 'ext': 'mp4', 15 | 'title': 'Hillary Clinton et sa quinte de toux, en plein meeting', 16 | 'description': 'md5:a4363e3a960860132f8124b62f4a01c9', 17 | } 18 | } 19 | 20 | def _real_extract(self, url): 21 | video_id = self._match_id(url) 22 | webpage = self._download_webpage(url, video_id) 23 | wat_id = self._search_regex(r'data-watid=[\'"](\d+)', webpage, 'wat id') 24 | return self.url_result('wat:' + wat_id, 'Wat', wat_id) 25 | -------------------------------------------------------------------------------- /youtube_dl/extractor/m6.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class M6IE(InfoExtractor): 8 | IE_NAME = 'm6' 9 | _VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html' 10 | 11 | _TEST = { 12 | 'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html', 13 | 'md5': '242994a87de2c316891428e0176bcb77', 14 | 'info_dict': { 15 | 'id': '11323908', 16 | 'ext': 'mp4', 17 | 'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »', 18 | 'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2', 19 | 'duration': 100, 20 | } 21 | } 22 | 23 | def _real_extract(self, url): 24 | video_id = self._match_id(url) 25 | return self.url_result('6play:%s' % video_id, 'SixPlay', video_id) 26 | -------------------------------------------------------------------------------- /youtube_dl/extractor/anitube.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .nuevo import NuevoBaseIE 4 | 5 | 6 | class AnitubeIE(NuevoBaseIE): 7 | IE_NAME = 'anitube.se' 8 | _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)' 9 | 10 | _TEST = { 11 | 'url': 'http://www.anitube.se/video/36621', 12 | 'md5': '59d0eeae28ea0bc8c05e7af429998d43', 13 | 'info_dict': { 14 | 'id': '36621', 15 | 'ext': 'mp4', 16 | 'title': 'Recorder to Randoseru 01', 17 | 'duration': 180.19, 18 | }, 19 | 'skip': 'Blocked in the US', 20 | } 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | 25 | webpage = self._download_webpage(url, video_id) 26 | key = self._search_regex( 27 | r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key') 28 | 29 | return self._extract_nuevo( 30 | 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, video_id) 31 | -------------------------------------------------------------------------------- /youtube_dl/extractor/myvidster.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class MyVidsterIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/' 8 | 9 | _TEST = { 10 | 'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making', 11 | 'md5': '95296d0231c1363222c3441af62dc4ca', 12 | 'info_dict': { 13 | 'id': '3685814', 14 | 'title': 'md5:7d8427d6d02c4fbcef50fe269980c749', 15 | 'upload_date': '20141027', 16 | 'uploader': 'utkualp', 17 | 'ext': 'mp4', 18 | 'age_limit': 18, 19 | }, 20 | 'add_ie': ['XHamster'], 21 | } 22 | 23 | def _real_extract(self, url): 24 | video_id = self._match_id(url) 25 | webpage = self._download_webpage(url, video_id) 26 | 27 | return self.url_result(self._html_search_regex( 28 | r'rel="videolink" href="(?P<real_url>.*)">', 29 | webpage, 'real video url')) 30 | -------------------------------------------------------------------------------- /devscripts/gh-pages/sign-versions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import unicode_literals, with_statement 3 | 4 | import rsa 5 | import json 6 | from binascii import hexlify 7 | 8 | try: 9 | input = raw_input 10 | except NameError: 11 | pass 12 | 13 | versions_info = json.load(open('update/versions.json')) 14 | if 'signature' in versions_info: 15 | del versions_info['signature'] 16 | 17 | print('Enter the PKCS1 private key, followed by a blank line:') 18 | privkey = b'' 19 | while True: 20 | try: 21 | line = input() 22 | except EOFError: 23 | break 24 | if line == '': 25 | break 26 | privkey += line.encode('ascii') + b'\n' 27 | privkey = rsa.PrivateKey.load_pkcs1(privkey) 28 | 29 | signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode() 30 | print('signature: ' + signature) 31 | 32 | versions_info['signature'] = signature 33 | with open('update/versions.json', 'w') as versionsf: 34 | json.dump(versions_info, versionsf, indent=4, sort_keys=True) 35 | -------------------------------------------------------------------------------- /youtube-dl.plugin.zsh: -------------------------------------------------------------------------------- 1 | # This allows the youtube-dl command to be installed in ZSH using antigen. 2 | # Antigen is a bundle manager. It allows you to enhance the functionality of 3 | # your zsh session by installing bundles and themes easily. 4 | 5 | # Antigen documentation: 6 | # http://antigen.sharats.me/ 7 | # https://github.com/zsh-users/antigen 8 | 9 | # Install youtube-dl: 10 | # antigen bundle rg3/youtube-dl 11 | # Bundles installed by antigen are available for use immediately. 12 | 13 | # Update youtube-dl (and all other antigen bundles): 14 | # antigen update 15 | 16 | # The antigen command will download the git repository to a folder and then 17 | # execute an enabling script (this file). The complete process for loading the 18 | # code is documented here: 19 | # https://github.com/zsh-users/antigen#notes-on-writing-plugins 20 | 21 | # This specific script just aliases youtube-dl to the python script that this 22 | # library provides. This requires updating the PYTHONPATH to ensure that the 23 | # full set of code can be located. 24 | alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl" 25 | -------------------------------------------------------------------------------- /youtube_dl/extractor/ku6.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class Ku6IE(InfoExtractor): 7 | _VALID_URL = r'https?://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html' 8 | _TEST = { 9 | 'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html', 10 | 'md5': '01203549b9efbb45f4b87d55bdea1ed1', 11 | 'info_dict': { 12 | 'id': 'JG-8yS14xzBr4bCn1pu0xw', 13 | 'ext': 'f4v', 14 | 'title': 'techniques test', 15 | } 16 | } 17 | 18 | def _real_extract(self, url): 19 | video_id = self._match_id(url) 20 | webpage = self._download_webpage(url, video_id) 21 | 22 | title = self._html_search_regex( 23 | r'<h1 title=.*>(.*?)</h1>', webpage, 'title') 24 | dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id 25 | jsonData = self._download_json(dataUrl, video_id) 26 | downloadUrl = jsonData['data']['f'] 27 | 28 | return { 29 | 'id': video_id, 30 | 'title': title, 31 | 'url': downloadUrl 32 | } 33 | -------------------------------------------------------------------------------- /youtube_dl/postprocessor/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .embedthumbnail import EmbedThumbnailPP 4 | from .ffmpeg import ( 5 | FFmpegPostProcessor, 6 | FFmpegEmbedSubtitlePP, 7 | FFmpegExtractAudioPP, 8 | FFmpegFixupStretchedPP, 9 | FFmpegFixupM3u8PP, 10 | FFmpegFixupM4aPP, 11 | FFmpegMergerPP, 12 | FFmpegMetadataPP, 13 | FFmpegVideoConvertorPP, 14 | FFmpegSubtitlesConvertorPP, 15 | ) 16 | from .xattrpp import XAttrMetadataPP 17 | from .execafterdownload import ExecAfterDownloadPP 18 | from .metadatafromtitle import MetadataFromTitlePP 19 | 20 | 21 | def get_postprocessor(key): 22 | return globals()[key + 'PP'] 23 | 24 | 25 | __all__ = [ 26 | 'EmbedThumbnailPP', 27 | 'ExecAfterDownloadPP', 28 | 'FFmpegEmbedSubtitlePP', 29 | 'FFmpegExtractAudioPP', 30 | 'FFmpegFixupM3u8PP', 31 | 'FFmpegFixupM4aPP', 32 | 'FFmpegFixupStretchedPP', 33 | 'FFmpegMergerPP', 34 | 'FFmpegMetadataPP', 35 | 'FFmpegPostProcessor', 36 | 'FFmpegSubtitlesConvertorPP', 37 | 'FFmpegVideoConvertorPP', 38 | 'MetadataFromTitlePP', 39 | 'XAttrMetadataPP', 40 | ] 41 | -------------------------------------------------------------------------------- /youtube_dl/extractor/videodetective.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from ..compat import compat_urlparse 5 | from .internetvideoarchive import InternetVideoArchiveIE 6 | 7 | 8 | class VideoDetectiveIE(InfoExtractor): 9 | _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' 10 | 11 | _TEST = { 12 | 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', 13 | 'info_dict': { 14 | 'id': '194487', 15 | 'ext': 'mp4', 16 | 'title': 'KICK-ASS 2', 17 | 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', 18 | }, 19 | 'params': { 20 | # m3u8 download 21 | 'skip_download': True, 22 | }, 23 | } 24 | 25 | def _real_extract(self, url): 26 | video_id = self._match_id(url) 27 | webpage = self._download_webpage(url, video_id) 28 | og_video = self._og_search_video_url(webpage) 29 | query = compat_urlparse.urlparse(og_video).query 30 | return self.url_result(InternetVideoArchiveIE._build_json_url(query), ie=InternetVideoArchiveIE.ie_key()) 31 | -------------------------------------------------------------------------------- /youtube_dl/extractor/cliprs.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .onet import OnetBaseIE 5 | 6 | 7 | class ClipRsIE(OnetBaseIE): 8 | _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+' 9 | _TEST = { 10 | 'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732', 11 | 'md5': 'c412d57815ba07b56f9edc7b5d6a14e5', 12 | 'info_dict': { 13 | 'id': '1488842.1399140381', 14 | 'ext': 'mp4', 15 | 'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli', 16 | 'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026', 17 | 'duration': 229, 18 | 'timestamp': 1459850243, 19 | 'upload_date': '20160405', 20 | } 21 | } 22 | 23 | def _real_extract(self, url): 24 | display_id = self._match_id(url) 25 | 26 | webpage = self._download_webpage(url, display_id) 27 | 28 | mvp_id = self._search_mvp_id(webpage) 29 | 30 | info_dict = self._extract_from_id(mvp_id, webpage) 31 | info_dict['display_id'] = display_id 32 | 33 | return info_dict 34 | -------------------------------------------------------------------------------- /youtube_dl/extractor/cbssports.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .cbs import CBSBaseIE 4 | 5 | 6 | class CBSSportsIE(CBSBaseIE): 7 | _VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P<id>\d+)' 8 | 9 | _TESTS = [{ 10 | 'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast', 11 | 'info_dict': { 12 | 'id': '708337219968', 13 | 'ext': 'mp4', 14 | 'title': 'Ben Simmons the next LeBron? Not so fast', 15 | 'description': 'md5:854294f627921baba1f4b9a990d87197', 16 | 'timestamp': 1466293740, 17 | 'upload_date': '20160618', 18 | 'uploader': 'CBSI-NEW', 19 | }, 20 | 'params': { 21 | # m3u8 download 22 | 'skip_download': True, 23 | } 24 | }] 25 | 26 | def _extract_video_info(self, filter_query, video_id): 27 | return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id) 28 | 29 | def _real_extract(self, url): 30 | video_id = self._match_id(url) 31 | return self._extract_video_info('byId=%s' % video_id, video_id) 32 | -------------------------------------------------------------------------------- /devscripts/gh-pages/generate-download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import unicode_literals 3 | 4 | import hashlib 5 | import urllib.request 6 | import json 7 | 8 | versions_info = json.load(open('update/versions.json')) 9 | version = versions_info['latest'] 10 | URL = versions_info['versions'][version]['bin'][0] 11 | 12 | data = urllib.request.urlopen(URL).read() 13 | 14 | # Read template page 15 | with open('download.html.in', 'r', encoding='utf-8') as tmplf: 16 | template = tmplf.read() 17 | 18 | sha256sum = hashlib.sha256(data).hexdigest() 19 | template = template.replace('@PROGRAM_VERSION@', version) 20 | template = template.replace('@PROGRAM_URL@', URL) 21 | template = template.replace('@PROGRAM_SHA256SUM@', sha256sum) 22 | template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0]) 23 | template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1]) 24 | template = template.replace('@TAR_URL@', versions_info['versions'][version]['tar'][0]) 25 | template = template.replace('@TAR_SHA256SUM@', versions_info['versions'][version]['tar'][1]) 26 | with open('download.html', 'w', encoding='utf-8') as dlf: 27 | dlf.write(template) 28 | -------------------------------------------------------------------------------- /devscripts/gh-pages/update-sites.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import unicode_literals 3 | 4 | import sys 5 | import os 6 | import textwrap 7 | 8 | # We must be able to import youtube_dl 9 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 10 | 11 | import youtube_dl 12 | 13 | 14 | def main(): 15 | with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf: 16 | template = tmplf.read() 17 | 18 | ie_htmls = [] 19 | for ie in youtube_dl.list_extractors(age_limit=None): 20 | ie_html = '<b>{}</b>'.format(ie.IE_NAME) 21 | ie_desc = getattr(ie, 'IE_DESC', None) 22 | if ie_desc is False: 23 | continue 24 | elif ie_desc is not None: 25 | ie_html += ': {}'.format(ie.IE_DESC) 26 | if not ie.working(): 27 | ie_html += ' (Currently broken)' 28 | ie_htmls.append('<li>{}</li>'.format(ie_html)) 29 | 30 | template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t')) 31 | 32 | with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: 33 | sitesf.write(template) 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Please follow the guide below 2 | 3 | - You will be asked some questions, please read them **carefully** and answer honestly 4 | - Put an `x` into all the boxes [ ] relevant to your *pull request* (like that [x]) 5 | - Use *Preview* tab to see how your *pull request* will actually look like 6 | 7 | --- 8 | 9 | ### Before submitting a *pull request* make sure you have: 10 | - [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections 11 | - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests 12 | 13 | ### What is the purpose of your *pull request*? 14 | - [ ] Bug fix 15 | - [ ] New extractor 16 | - [ ] New feature 17 | 18 | --- 19 | 20 | ### Description of your *pull request* and other information 21 | 22 | Explanation of your *pull request* in arbitrary form goes here. Please make sure the description explains the purpose and effect of your *pull request* and is worded well enough to be understood. Provide as much context and examples as possible. 23 | -------------------------------------------------------------------------------- /youtube_dl/extractor/ina.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | from .common import InfoExtractor 7 | 8 | 9 | class InaIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)' 11 | _TEST = { 12 | 'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', 13 | 'md5': 'a667021bf2b41f8dc6049479d9bb38a3', 14 | 'info_dict': { 15 | 'id': 'I12055569', 16 | 'ext': 'mp4', 17 | 'title': 'François Hollande "Je crois que c\'est clair"', 18 | } 19 | } 20 | 21 | def _real_extract(self, url): 22 | mobj = re.match(self._VALID_URL, url) 23 | 24 | video_id = mobj.group('id') 25 | mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id 26 | info_doc = self._download_xml(mrss_url, video_id) 27 | 28 | self.report_extraction(video_id) 29 | 30 | video_url = info_doc.find('.//{http://search.yahoo.com/mrss/}player').attrib['url'] 31 | 32 | return { 33 | 'id': video_id, 34 | 'url': video_url, 35 | 'title': info_doc.find('.//title').text, 36 | } 37 | -------------------------------------------------------------------------------- /test/test_update.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import unicode_literals 4 | 5 | # Allow direct execution 6 | import os 7 | import sys 8 | import unittest 9 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 10 | 11 | 12 | import json 13 | from youtube_dl.update import rsa_verify 14 | 15 | 16 | class TestUpdate(unittest.TestCase): 17 | def test_rsa_verify(self): 18 | UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) 19 | with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'versions.json'), 'rb') as f: 20 | versions_info = f.read().decode() 21 | versions_info = json.loads(versions_info) 22 | signature = versions_info['signature'] 23 | del versions_info['signature'] 24 | self.assertTrue(rsa_verify( 25 | json.dumps(versions_info, sort_keys=True).encode('utf-8'), 26 | signature, UPDATES_RSA_KEY)) 27 | 28 | 29 | if __name__ == '__main__': 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /youtube_dl/extractor/ebaumsworld.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class EbaumsWorldIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:www\.)?ebaumsworld\.com/videos/[^/]+/(?P<id>\d+)' 8 | 9 | _TEST = { 10 | 'url': 'http://www.ebaumsworld.com/videos/a-giant-python-opens-the-door/83367677/', 11 | 'info_dict': { 12 | 'id': '83367677', 13 | 'ext': 'mp4', 14 | 'title': 'A Giant Python Opens The Door', 15 | 'description': 'This is how nightmares start...', 16 | 'uploader': 'jihadpizza', 17 | }, 18 | } 19 | 20 | def _real_extract(self, url): 21 | video_id = self._match_id(url) 22 | config = self._download_xml( 23 | 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id) 24 | video_url = config.find('file').text 25 | 26 | return { 27 | 'id': video_id, 28 | 'title': config.find('title').text, 29 | 'url': video_url, 30 | 'description': config.find('description').text, 31 | 'thumbnail': config.find('image').text, 32 | 'uploader': config.find('username').text, 33 | } 34 | -------------------------------------------------------------------------------- /youtube_dl/extractor/makertv.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class MakerTVIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' 9 | _TEST = { 10 | 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc', 11 | 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', 12 | 'info_dict': { 13 | 'id': 'Fh3QgymL9gsc', 14 | 'ext': 'mp4', 15 | 'title': 'Maze Runner: The Scorch Trials Official Movie Review', 16 | 'description': 'md5:11ff3362d7ef1d679fdb649f6413975a', 17 | 'upload_date': '20150918', 18 | 'timestamp': 1442549540, 19 | } 20 | } 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | webpage = self._download_webpage(url, video_id) 25 | jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id') 26 | 27 | return { 28 | '_type': 'url_transparent', 29 | 'id': video_id, 30 | 'url': 'jwplatform:%s' % jwplatform_id, 31 | 'ie_key': 'JWPlatform', 32 | } 33 | -------------------------------------------------------------------------------- /youtube_dl/extractor/formula1.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class Formula1IE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html' 9 | _TESTS = [{ 10 | 'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html', 11 | 'md5': '8c79e54be72078b26b89e0e111c0502b', 12 | 'info_dict': { 13 | 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', 14 | 'ext': 'flv', 15 | 'title': 'Race highlights - Spain 2016', 16 | }, 17 | 'add_ie': ['Ooyala'], 18 | }, { 19 | 'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html', 20 | 'only_matching': True, 21 | }] 22 | 23 | def _real_extract(self, url): 24 | display_id = self._match_id(url) 25 | webpage = self._download_webpage(url, display_id) 26 | ooyala_embed_code = self._search_regex( 27 | r'data-videoid="([^"]+)"', webpage, 'ooyala embed code') 28 | return self.url_result( 29 | 'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code) 30 | -------------------------------------------------------------------------------- /youtube_dl/extractor/teachingchannel.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import re 4 | 5 | from .common import InfoExtractor 6 | from .ooyala import OoyalaIE 7 | 8 | 9 | class TeachingChannelIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos/(?P<title>.+)' 11 | 12 | _TEST = { 13 | 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', 14 | 'md5': '3d6361864d7cac20b57c8784da17166f', 15 | 'info_dict': { 16 | 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', 17 | 'ext': 'mp4', 18 | 'title': 'A History of Teaming', 19 | 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', 20 | 'duration': 422.255, 21 | }, 22 | 'params': { 23 | 'skip_download': True, 24 | }, 25 | 'add_ie': ['Ooyala'], 26 | } 27 | 28 | def _real_extract(self, url): 29 | mobj = re.match(self._VALID_URL, url) 30 | title = mobj.group('title') 31 | webpage = self._download_webpage(url, title) 32 | ooyala_code = self._search_regex( 33 | r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code') 34 | 35 | return OoyalaIE._build_url_result(ooyala_code) 36 | -------------------------------------------------------------------------------- /youtube_dl/extractor/skysports.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class SkySportsIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)' 9 | _TEST = { 10 | 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', 11 | 'md5': 'c44a1db29f27daf9a0003e010af82100', 12 | 'info_dict': { 13 | 'id': '10328419', 14 | 'ext': 'flv', 15 | 'title': 'Bale: Its our time to shine', 16 | 'description': 'md5:9fd1de3614d525f5addda32ac3c482c9', 17 | }, 18 | 'add_ie': ['Ooyala'], 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | webpage = self._download_webpage(url, video_id) 24 | 25 | return { 26 | '_type': 'url_transparent', 27 | 'id': video_id, 28 | 'url': 'ooyala:%s' % self._search_regex( 29 | r'data-video-id="([^"]+)"', webpage, 'ooyala id'), 30 | 'title': self._og_search_title(webpage), 31 | 'description': self._og_search_description(webpage), 32 | 'ie_key': 'Ooyala', 33 | } 34 | -------------------------------------------------------------------------------- /test/parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "consoletitle": false, 3 | "continuedl": true, 4 | "forcedescription": false, 5 | "forcefilename": false, 6 | "forceformat": false, 7 | "forcethumbnail": false, 8 | "forcetitle": false, 9 | "forceurl": false, 10 | "format": "best", 11 | "ignoreerrors": false, 12 | "listformats": null, 13 | "logtostderr": false, 14 | "matchtitle": null, 15 | "max_downloads": null, 16 | "nooverwrites": false, 17 | "nopart": false, 18 | "noprogress": false, 19 | "outtmpl": "%(id)s.%(ext)s", 20 | "password": null, 21 | "playlistend": -1, 22 | "playliststart": 1, 23 | "prefer_free_formats": false, 24 | "quiet": false, 25 | "ratelimit": null, 26 | "rejecttitle": null, 27 | "retries": 10, 28 | "simulate": false, 29 | "subtitleslang": null, 30 | "subtitlesformat": "best", 31 | "test": true, 32 | "updatetime": true, 33 | "usenetrc": false, 34 | "username": null, 35 | "verbose": true, 36 | "writedescription": false, 37 | "writeinfojson": true, 38 | "writesubtitles": false, 39 | "allsubtitles": false, 40 | "listssubtitles": false, 41 | "socket_timeout": 20, 42 | "fixup": "never" 43 | } 44 | -------------------------------------------------------------------------------- /youtube_dl/extractor/tutv.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import base64 4 | 5 | from .common import InfoExtractor 6 | from ..compat import compat_parse_qs 7 | 8 | 9 | class TutvIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)' 11 | _TEST = { 12 | 'url': 'http://tu.tv/videos/robots-futbolistas', 13 | 'md5': '0cd9e28ad270488911b0d2a72323395d', 14 | 'info_dict': { 15 | 'id': '2973058', 16 | 'ext': 'mp4', 17 | 'title': 'Robots futbolistas', 18 | }, 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | webpage = self._download_webpage(url, video_id) 24 | 25 | internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') 26 | 27 | data_content = self._download_webpage( 28 | 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info') 29 | video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0].encode('utf-8')).decode('utf-8') 30 | 31 | return { 32 | 'id': internal_id, 33 | 'url': video_url, 34 | 'title': self._og_search_title(webpage), 35 | } 36 | -------------------------------------------------------------------------------- /youtube_dl/extractor/people.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class PeopleIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P<id>\d+),00\.html' 9 | 10 | _TEST = { 11 | 'url': 'http://www.people.com/people/videos/0,,20995451,00.html', 12 | 'info_dict': { 13 | 'id': 'ref:20995451', 14 | 'ext': 'mp4', 15 | 'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”', 16 | 'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack', 17 | 'thumbnail': 're:^https?://.*\.jpg', 18 | 'duration': 246.318, 19 | 'timestamp': 1458720585, 20 | 'upload_date': '20160323', 21 | 'uploader_id': '416418724', 22 | }, 23 | 'params': { 24 | 'skip_download': True, 25 | }, 26 | 'add_ie': ['BrightcoveNew'], 27 | } 28 | 29 | def _real_extract(self, url): 30 | return self.url_result( 31 | 'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s' 32 | % self._match_id(url), 'BrightcoveNew') 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to <http://unlicense.org/> 25 | -------------------------------------------------------------------------------- /youtube_dl/extractor/cnbc.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..utils import smuggle_url 6 | 7 | 8 | class CNBCIE(InfoExtractor): 9 | _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)' 10 | _TEST = { 11 | 'url': 'http://video.cnbc.com/gallery/?video=3000503714', 12 | 'info_dict': { 13 | 'id': '3000503714', 14 | 'ext': 'mp4', 15 | 'title': 'Fighting zombies is big business', 16 | 'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e', 17 | 'timestamp': 1459332000, 18 | 'upload_date': '20160330', 19 | 'uploader': 'NBCU-CNBC', 20 | }, 21 | 'params': { 22 | # m3u8 download 23 | 'skip_download': True, 24 | }, 25 | } 26 | 27 | def _real_extract(self, url): 28 | video_id = self._match_id(url) 29 | return { 30 | '_type': 'url_transparent', 31 | 'ie_key': 'ThePlatform', 32 | 'url': smuggle_url( 33 | 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id, 34 | {'force_smil_url': True}), 35 | 'id': video_id, 36 | } 37 | -------------------------------------------------------------------------------- /youtube_dl/extractor/nerdcubed.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import datetime 5 | 6 | from .common import InfoExtractor 7 | 8 | 9 | class NerdCubedFeedIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json' 11 | _TEST = { 12 | 'url': 'http://www.nerdcubed.co.uk/feed.json', 13 | 'info_dict': { 14 | 'id': 'nerdcubed-feed', 15 | 'title': 'nerdcubed.co.uk feed', 16 | }, 17 | 'playlist_mincount': 1300, 18 | } 19 | 20 | def _real_extract(self, url): 21 | feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed') 22 | 23 | entries = [{ 24 | '_type': 'url', 25 | 'title': feed_entry['title'], 26 | 'uploader': feed_entry['source']['name'] if feed_entry['source'] else None, 27 | 'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'), 28 | 'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'], 29 | } for feed_entry in feed] 30 | 31 | return { 32 | '_type': 'playlist', 33 | 'title': 'nerdcubed.co.uk feed', 34 | 'id': 'nerdcubed-feed', 35 | 'entries': entries, 36 | } 37 | -------------------------------------------------------------------------------- /youtube_dl/extractor/savefrom.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import os.path 5 | import re 6 | 7 | from .common import InfoExtractor 8 | 9 | 10 | class SaveFromIE(InfoExtractor): 11 | IE_NAME = 'savefrom.net' 12 | _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$' 13 | 14 | _TEST = { 15 | 'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com', 16 | 'info_dict': { 17 | 'id': 'UlVRAPW2WJY', 18 | 'ext': 'mp4', 19 | 'title': 'About Team Radical MMA | MMA Fighting', 20 | 'upload_date': '20120816', 21 | 'uploader': 'Howcast', 22 | 'uploader_id': 'Howcast', 23 | 'description': 're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*', 24 | }, 25 | 'params': { 26 | 'skip_download': True 27 | } 28 | } 29 | 30 | def _real_extract(self, url): 31 | mobj = re.match(self._VALID_URL, url) 32 | video_id = os.path.splitext(url.split('/')[-1])[0] 33 | return { 34 | '_type': 'url', 35 | 'id': video_id, 36 | 'url': mobj.group('url'), 37 | } 38 | -------------------------------------------------------------------------------- /youtube_dl/extractor/gameinformer.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class GameInformerIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx' 9 | _TEST = { 10 | 'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx', 11 | 'md5': '292f26da1ab4beb4c9099f1304d2b071', 12 | 'info_dict': { 13 | 'id': '4515472681001', 14 | 'ext': 'mp4', 15 | 'title': 'Replay - Animal Crossing', 16 | 'description': 'md5:2e211891b215c85d061adc7a4dd2d930', 17 | 'timestamp': 1443457610, 18 | 'upload_date': '20150928', 19 | 'uploader_id': '694940074001', 20 | }, 21 | } 22 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s' 23 | 24 | def _real_extract(self, url): 25 | display_id = self._match_id(url) 26 | webpage = self._download_webpage(url, display_id) 27 | brightcove_id = self._search_regex(r"getVideo\('[^']+video_id=(\d+)", webpage, 'brightcove id') 28 | return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) 29 | -------------------------------------------------------------------------------- /test/test_iqiyi_sdk_interpreter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import unicode_literals 4 | 5 | # Allow direct execution 6 | import os 7 | import sys 8 | import unittest 9 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 10 | 11 | from test.helper import FakeYDL 12 | from youtube_dl.extractor import IqiyiIE 13 | 14 | 15 | class IqiyiIEWithCredentials(IqiyiIE): 16 | def _get_login_info(self): 17 | return 'foo', 'bar' 18 | 19 | 20 | class WarningLogger(object): 21 | def __init__(self): 22 | self.messages = [] 23 | 24 | def warning(self, msg): 25 | self.messages.append(msg) 26 | 27 | def debug(self, msg): 28 | pass 29 | 30 | def error(self, msg): 31 | pass 32 | 33 | 34 | class TestIqiyiSDKInterpreter(unittest.TestCase): 35 | def test_iqiyi_sdk_interpreter(self): 36 | ''' 37 | Test the functionality of IqiyiSDKInterpreter by trying to log in 38 | 39 | If `sign` is incorrect, /validate call throws an HTTP 556 error 40 | ''' 41 | logger = WarningLogger() 42 | ie = IqiyiIEWithCredentials(FakeYDL({'logger': logger})) 43 | ie._login() 44 | self.assertTrue('unable to log in:' in logger.messages[0]) 45 | 46 | if __name__ == '__main__': 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /devscripts/generate_aes_testdata.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import codecs 4 | import subprocess 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | 10 | from youtube_dl.utils import intlist_to_bytes 11 | from youtube_dl.aes import aes_encrypt, key_expansion 12 | 13 | secret_msg = b'Secret message goes here' 14 | 15 | 16 | def hex_str(int_list): 17 | return codecs.encode(intlist_to_bytes(int_list), 'hex') 18 | 19 | 20 | def openssl_encode(algo, key, iv): 21 | cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)] 22 | prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 23 | out, _ = prog.communicate(secret_msg) 24 | return out 25 | 26 | iv = key = [0x20, 0x15] + 14 * [0] 27 | 28 | r = openssl_encode('aes-128-cbc', key, iv) 29 | print('aes_cbc_decrypt') 30 | print(repr(r)) 31 | 32 | password = key 33 | new_key = aes_encrypt(password, key_expansion(password)) 34 | r = openssl_encode('aes-128-ctr', new_key, iv) 35 | print('aes_decrypt_text 16') 36 | print(repr(r)) 37 | 38 | password = key + 16 * [0] 39 | new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16) 40 | r = openssl_encode('aes-256-ctr', new_key, iv) 41 | print('aes_decrypt_text 32') 42 | print(repr(r)) 43 | -------------------------------------------------------------------------------- /youtube_dl/extractor/learnr.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class LearnrIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)' 9 | _TEST = { 10 | 'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript', 11 | 'md5': '3719fdf0a68397f49899e82c308a89de', 12 | 'info_dict': { 13 | 'id': '51624', 14 | 'ext': 'mp4', 15 | 'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript', 16 | 'description': 'md5:b36dbfa92350176cdf12b4d388485503', 17 | 'uploader': 'LearnCode.academy', 18 | 'uploader_id': 'learncodeacademy', 19 | 'upload_date': '20131021', 20 | }, 21 | 'add_ie': ['Youtube'], 22 | } 23 | 24 | def _real_extract(self, url): 25 | video_id = self._match_id(url) 26 | webpage = self._download_webpage(url, video_id) 27 | 28 | return { 29 | '_type': 'url_transparent', 30 | 'url': self._search_regex( 31 | r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'), 32 | 'id': video_id, 33 | } 34 | -------------------------------------------------------------------------------- /youtube_dl/extractor/commonprotocols.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import os 4 | 5 | from .common import InfoExtractor 6 | from ..compat import ( 7 | compat_urllib_parse_unquote, 8 | compat_urlparse, 9 | ) 10 | from ..utils import url_basename 11 | 12 | 13 | class RtmpIE(InfoExtractor): 14 | IE_DESC = False # Do not list 15 | _VALID_URL = r'(?i)rtmp[est]?://.+' 16 | 17 | _TESTS = [{ 18 | 'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4', 19 | 'only_matching': True, 20 | }, { 21 | 'url': 'rtmp://edge.live.hitbox.tv/live/dimak', 22 | 'only_matching': True, 23 | }] 24 | 25 | def _real_extract(self, url): 26 | video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) 27 | title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) 28 | return { 29 | 'id': video_id, 30 | 'title': title, 31 | 'formats': [{ 32 | 'url': url, 33 | 'ext': 'flv', 34 | 'format_id': compat_urlparse.urlparse(url).scheme, 35 | }], 36 | } 37 | -------------------------------------------------------------------------------- /youtube_dl/extractor/fusion.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from .ooyala import OoyalaIE 5 | 6 | 7 | class FusionIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)' 9 | _TESTS = [{ 10 | 'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', 11 | 'info_dict': { 12 | 'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P', 13 | 'ext': 'mp4', 14 | 'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs', 15 | 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', 16 | 'duration': 140.0, 17 | }, 18 | 'params': { 19 | 'skip_download': True, 20 | }, 21 | 'add_ie': ['Ooyala'], 22 | }, { 23 | 'url': 'http://fusion.net/video/201781', 24 | 'only_matching': True, 25 | }] 26 | 27 | def _real_extract(self, url): 28 | display_id = self._match_id(url) 29 | webpage = self._download_webpage(url, display_id) 30 | 31 | ooyala_code = self._search_regex( 32 | r'data-video-id=(["\'])(?P<code>.+?)\1', 33 | webpage, 'ooyala code', group='code') 34 | 35 | return OoyalaIE._build_url_result(ooyala_code) 36 | -------------------------------------------------------------------------------- /youtube_dl/extractor/nuevo.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | from ..utils import ( 7 | float_or_none, 8 | xpath_text 9 | ) 10 | 11 | 12 | class NuevoBaseIE(InfoExtractor): 13 | def _extract_nuevo(self, config_url, video_id): 14 | config = self._download_xml( 15 | config_url, video_id, transform_source=lambda s: s.strip()) 16 | 17 | title = xpath_text(config, './title', 'title', fatal=True).strip() 18 | video_id = xpath_text(config, './mediaid', default=video_id) 19 | thumbnail = xpath_text(config, ['./image', './thumb']) 20 | duration = float_or_none(xpath_text(config, './duration')) 21 | 22 | formats = [] 23 | for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')): 24 | video_url = xpath_text(config, element_name) 25 | if video_url: 26 | formats.append({ 27 | 'url': video_url, 28 | 'format_id': format_id, 29 | }) 30 | self._check_formats(formats, video_id) 31 | 32 | return { 33 | 'id': video_id, 34 | 'title': title, 35 | 'thumbnail': thumbnail, 36 | 'duration': duration, 37 | 'formats': formats 38 | } 39 | -------------------------------------------------------------------------------- /youtube_dl/extractor/lovehomeporn.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import re 4 | 5 | from .nuevo import NuevoBaseIE 6 | 7 | 8 | class LoveHomePornIE(NuevoBaseIE): 9 | _VALID_URL = r'https?://(?:www\.)?lovehomeporn\.com/video/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' 10 | _TEST = { 11 | 'url': 'http://lovehomeporn.com/video/48483/stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick#menu', 12 | 'info_dict': { 13 | 'id': '48483', 14 | 'display_id': 'stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick', 15 | 'ext': 'mp4', 16 | 'title': 'Stunning busty brunette girlfriend sucking and riding a big dick', 17 | 'age_limit': 18, 18 | 'duration': 238.47, 19 | }, 20 | 'params': { 21 | 'skip_download': True, 22 | } 23 | } 24 | 25 | def _real_extract(self, url): 26 | mobj = re.match(self._VALID_URL, url) 27 | video_id = mobj.group('id') 28 | display_id = mobj.group('display_id') 29 | 30 | info = self._extract_nuevo( 31 | 'http://lovehomeporn.com/media/nuevo/config.php?key=%s' % video_id, 32 | video_id) 33 | info.update({ 34 | 'display_id': display_id, 35 | 'age_limit': 18 36 | }) 37 | return info 38 | -------------------------------------------------------------------------------- /devscripts/make_supportedsites.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import unicode_literals 3 | 4 | import io 5 | import optparse 6 | import os 7 | import sys 8 | 9 | 10 | # Import youtube_dl 11 | ROOT_DIR = os.path.join(os.path.dirname(__file__), '..') 12 | sys.path.insert(0, ROOT_DIR) 13 | import youtube_dl 14 | 15 | 16 | def main(): 17 | parser = optparse.OptionParser(usage='%prog OUTFILE.md') 18 | options, args = parser.parse_args() 19 | if len(args) != 1: 20 | parser.error('Expected an output filename') 21 | 22 | outfile, = args 23 | 24 | def gen_ies_md(ies): 25 | for ie in ies: 26 | ie_md = '**{0}**'.format(ie.IE_NAME) 27 | ie_desc = getattr(ie, 'IE_DESC', None) 28 | if ie_desc is False: 29 | continue 30 | if ie_desc is not None: 31 | ie_md += ': {0}'.format(ie.IE_DESC) 32 | if not ie.working(): 33 | ie_md += ' (Currently broken)' 34 | yield ie_md 35 | 36 | ies = sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()) 37 | out = '# Supported sites\n' + ''.join( 38 | ' - ' + md + '\n' 39 | for md in gen_ies_md(ies)) 40 | 41 | with io.open(outfile, 'w', encoding='utf-8') as outf: 42 | outf.write(out) 43 | 44 | if __name__ == '__main__': 45 | main() 46 | -------------------------------------------------------------------------------- /youtube_dl/extractor/gputechconf.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class GPUTechConfIE(InfoExtractor): 8 | _VALID_URL = r'https?://on-demand\.gputechconf\.com/gtc/2015/video/S(?P<id>\d+)\.html' 9 | _TEST = { 10 | 'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html', 11 | 'md5': 'a8862a00a0fd65b8b43acc5b8e33f798', 12 | 'info_dict': { 13 | 'id': '5156', 14 | 'ext': 'mp4', 15 | 'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis', 16 | 'duration': 1219, 17 | } 18 | } 19 | 20 | def _real_extract(self, url): 21 | video_id = self._match_id(url) 22 | webpage = self._download_webpage(url, video_id) 23 | 24 | root_path = self._search_regex( 25 | r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 26 | default='http://evt.dispeak.com/nvidia/events/gtc15/') 27 | xml_file_id = self._search_regex( 28 | r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id') 29 | 30 | return { 31 | '_type': 'url_transparent', 32 | 'id': video_id, 33 | 'url': '%sxml/%s.xml' % (root_path, xml_file_id), 34 | 'ie_key': 'DigitallySpeaking', 35 | } 36 | -------------------------------------------------------------------------------- /youtube_dl/extractor/freevideo.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from ..utils import ExtractorError 5 | 6 | 7 | class FreeVideoIE(InfoExtractor): 8 | _VALID_URL = r'^https?://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])' 9 | 10 | _TEST = { 11 | 'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html', 12 | 'info_dict': { 13 | 'id': 'vysukany-zadecek-22033', 14 | 'ext': 'mp4', 15 | 'title': 'vysukany-zadecek-22033', 16 | 'age_limit': 18, 17 | }, 18 | 'skip': 'Blocked outside .cz', 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | webpage, handle = self._download_webpage_handle(url, video_id) 24 | if '//www.czechav.com/' in handle.geturl(): 25 | raise ExtractorError( 26 | 'Access to freevideo is blocked from your location', 27 | expected=True) 28 | 29 | video_url = self._search_regex( 30 | r'\s+url: "(http://[a-z0-9-]+.cdn.freevideo.cz/stream/.*?/video.mp4)"', 31 | webpage, 'video URL') 32 | 33 | return { 34 | 'id': video_id, 35 | 'url': video_url, 36 | 'title': video_id, 37 | 'age_limit': 18, 38 | } 39 | -------------------------------------------------------------------------------- /devscripts/gh-pages/add-version.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import unicode_literals 3 | 4 | import json 5 | import sys 6 | import hashlib 7 | import os.path 8 | 9 | 10 | if len(sys.argv) <= 1: 11 | print('Specify the version number as parameter') 12 | sys.exit() 13 | version = sys.argv[1] 14 | 15 | with open('update/LATEST_VERSION', 'w') as f: 16 | f.write(version) 17 | 18 | versions_info = json.load(open('update/versions.json')) 19 | if 'signature' in versions_info: 20 | del versions_info['signature'] 21 | 22 | new_version = {} 23 | 24 | filenames = { 25 | 'bin': 'youtube-dl', 26 | 'exe': 'youtube-dl.exe', 27 | 'tar': 'youtube-dl-%s.tar.gz' % version} 28 | build_dir = os.path.join('..', '..', 'build', version) 29 | for key, filename in filenames.items(): 30 | url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename) 31 | fn = os.path.join(build_dir, filename) 32 | with open(fn, 'rb') as f: 33 | data = f.read() 34 | if not data: 35 | raise ValueError('File %s is empty!' % fn) 36 | sha256sum = hashlib.sha256(data).hexdigest() 37 | new_version[key] = (url, sha256sum) 38 | 39 | versions_info['versions'][version] = new_version 40 | versions_info['latest'] = version 41 | 42 | with open('update/versions.json', 'w') as jsonf: 43 | json.dump(versions_info, jsonf, indent=4, sort_keys=True) 44 | -------------------------------------------------------------------------------- /youtube_dl/extractor/restudy.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class RestudyIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?restudy\.dk/video/play/id/(?P<id>[0-9]+)' 9 | _TEST = { 10 | 'url': 'https://www.restudy.dk/video/play/id/1637', 11 | 'info_dict': { 12 | 'id': '1637', 13 | 'ext': 'flv', 14 | 'title': 'Leiden-frosteffekt', 15 | 'description': 'Denne video er et eksperiment med flydende kvælstof.', 16 | }, 17 | 'params': { 18 | # rtmp download 19 | 'skip_download': True, 20 | } 21 | } 22 | 23 | def _real_extract(self, url): 24 | video_id = self._match_id(url) 25 | 26 | webpage = self._download_webpage(url, video_id) 27 | 28 | title = self._og_search_title(webpage).strip() 29 | description = self._og_search_description(webpage).strip() 30 | 31 | formats = self._extract_smil_formats( 32 | 'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id, 33 | video_id) 34 | self._sort_formats(formats) 35 | 36 | return { 37 | 'id': video_id, 38 | 'title': title, 39 | 'description': description, 40 | 'formats': formats, 41 | } 42 | -------------------------------------------------------------------------------- /youtube_dl/extractor/hentaistigma.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class HentaiStigmaIE(InfoExtractor): 7 | _VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<id>[^/]+)' 8 | _TEST = { 9 | 'url': 'http://hentai.animestigma.com/inyouchuu-etsu-bonus/', 10 | 'md5': '4e3d07422a68a4cc363d8f57c8bf0d23', 11 | 'info_dict': { 12 | 'id': 'inyouchuu-etsu-bonus', 13 | 'ext': 'mp4', 14 | 'title': 'Inyouchuu Etsu Bonus', 15 | 'age_limit': 18, 16 | } 17 | } 18 | 19 | def _real_extract(self, url): 20 | video_id = self._match_id(url) 21 | 22 | webpage = self._download_webpage(url, video_id) 23 | 24 | title = self._html_search_regex( 25 | r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>', 26 | webpage, 'title') 27 | wrap_url = self._html_search_regex( 28 | r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url') 29 | wrap_webpage = self._download_webpage(wrap_url, video_id) 30 | 31 | video_url = self._html_search_regex( 32 | r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url') 33 | 34 | return { 35 | 'id': video_id, 36 | 'url': video_url, 37 | 'title': title, 38 | 'age_limit': 18, 39 | } 40 | -------------------------------------------------------------------------------- /youtube_dl/extractor/tvland.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .mtv import MTVServicesInfoExtractor 5 | 6 | 7 | class TVLandIE(MTVServicesInfoExtractor): 8 | IE_NAME = 'tvland.com' 9 | _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)' 10 | _FEED_URL = 'http://www.tvland.com/feeds/mrss/' 11 | _TESTS = [{ 12 | # Geo-restricted. Without a proxy metadata are still there. With a 13 | # proxy it redirects to http://m.tvland.com/app/ 14 | 'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048', 15 | 'info_dict': { 16 | 'description': 'md5:80973e81b916a324e05c14a3fb506d29', 17 | 'title': 'The Invasion', 18 | }, 19 | 'playlist': [], 20 | }, { 21 | 'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies', 22 | 'md5': 'e2c6389401cf485df26c79c247b08713', 23 | 'info_dict': { 24 | 'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88', 25 | 'ext': 'mp4', 26 | 'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies', 27 | 'description': 'md5:7d192f56ca8d958645c83f0de8ef0269', 28 | 'upload_date': '20151228', 29 | 'timestamp': 1451289600, 30 | }, 31 | }] 32 | -------------------------------------------------------------------------------- /youtube_dl/extractor/freespeech.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import re 4 | import json 5 | 6 | from .common import InfoExtractor 7 | 8 | 9 | class FreespeechIE(InfoExtractor): 10 | IE_NAME = 'freespeech.org' 11 | _VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P<title>.+)' 12 | _TEST = { 13 | 'add_ie': ['Youtube'], 14 | 'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', 15 | 'info_dict': { 16 | 'id': 'poKsVCZ64uU', 17 | 'ext': 'webm', 18 | 'title': 'Obama, Romney Campaign in Colorado Ahead of Debate', 19 | 'description': 'Obama, Romney Campaign in Colorado Ahead of Debate', 20 | 'uploader': 'freespeechtv', 21 | 'uploader_id': 'freespeechtv', 22 | 'upload_date': '20121002', 23 | }, 24 | } 25 | 26 | def _real_extract(self, url): 27 | mobj = re.match(self._VALID_URL, url) 28 | title = mobj.group('title') 29 | webpage = self._download_webpage(url, title) 30 | info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info') 31 | info = json.loads(info_json) 32 | 33 | return { 34 | '_type': 'url', 35 | 'url': info['jw_player']['basic_video_node_player']['file'], 36 | 'ie_key': 'Youtube', 37 | } 38 | -------------------------------------------------------------------------------- /youtube_dl/extractor/sonyliv.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class SonyLIVIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)' 9 | _TESTS = [{ 10 | 'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight", 11 | 'info_dict': { 12 | 'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight", 13 | 'id': '5024612095001', 14 | 'ext': 'mp4', 15 | 'upload_date': '20160707', 16 | 'description': 'md5:7f28509a148d5be9d0782b4d5106410d', 17 | 'uploader_id': '4338955589001', 18 | 'timestamp': 1467870968, 19 | }, 20 | 'params': { 21 | 'skip_download': True, 22 | }, 23 | 'add_ie': ['BrightcoveNew'], 24 | }, { 25 | 'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)', 26 | 'only_matching': True, 27 | }] 28 | 29 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s' 30 | 31 | def _real_extract(self, url): 32 | brightcove_id = self._match_id(url) 33 | return self.url_result( 34 | self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) 35 | -------------------------------------------------------------------------------- /youtube_dl/extractor/defense.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class DefenseGouvFrIE(InfoExtractor): 7 | IE_NAME = 'defense.gouv.fr' 8 | _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)' 9 | 10 | _TEST = { 11 | 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', 12 | 'md5': '75bba6124da7e63d2d60b5244ec9430c', 13 | 'info_dict': { 14 | 'id': '11213', 15 | 'ext': 'mp4', 16 | 'title': 'attaque-chimique-syrienne-du-21-aout-2013-1' 17 | } 18 | } 19 | 20 | def _real_extract(self, url): 21 | title = self._match_id(url) 22 | webpage = self._download_webpage(url, title) 23 | 24 | video_id = self._search_regex( 25 | r"flashvars.pvg_id=\"(\d+)\";", 26 | webpage, 'ID') 27 | 28 | json_url = ( 29 | 'http://static.videos.gouv.fr/brightcovehub/export/json/%s' % 30 | video_id) 31 | info = self._download_json(json_url, title, 'Downloading JSON config') 32 | video_url = info['renditions'][0]['url'] 33 | 34 | return { 35 | 'id': video_id, 36 | 'ext': 'mp4', 37 | 'url': video_url, 38 | 'title': title, 39 | } 40 | -------------------------------------------------------------------------------- /youtube_dl/extractor/rottentomatoes.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from .internetvideoarchive import InternetVideoArchiveIE 5 | 6 | 7 | class RottenTomatoesIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' 9 | 10 | _TEST = { 11 | 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', 12 | 'info_dict': { 13 | 'id': '11028566', 14 | 'ext': 'mp4', 15 | 'title': 'Toy Story 3', 16 | 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', 17 | 'thumbnail': 're:^https?://.*\.jpg$', 18 | }, 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | webpage = self._download_webpage(url, video_id) 24 | iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id') 25 | 26 | return { 27 | '_type': 'url_transparent', 28 | 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id, 29 | 'ie_key': InternetVideoArchiveIE.ie_key(), 30 | 'id': video_id, 31 | 'title': self._og_search_title(webpage), 32 | } 33 | -------------------------------------------------------------------------------- /youtube_dl/extractor/youjizz.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class YouJizzIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])' 8 | _TESTS = [{ 9 | 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', 10 | 'md5': '78fc1901148284c69af12640e01c6310', 11 | 'info_dict': { 12 | 'id': '2189178', 13 | 'ext': 'mp4', 14 | 'title': 'Zeichentrick 1', 15 | 'age_limit': 18, 16 | } 17 | }, { 18 | 'url': 'http://www.youjizz.com/videos/-2189178.html', 19 | 'only_matching': True, 20 | }] 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | webpage = self._download_webpage(url, video_id) 25 | # YouJizz's HTML5 player has invalid HTML 26 | webpage = webpage.replace('"controls', '" controls') 27 | age_limit = self._rta_search(webpage) 28 | video_title = self._html_search_regex( 29 | r'<title>\s*(.*)\s*', webpage, 'title') 30 | 31 | info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] 32 | 33 | info_dict.update({ 34 | 'id': video_id, 35 | 'title': video_title, 36 | 'age_limit': age_limit, 37 | }) 38 | 39 | return info_dict 40 | -------------------------------------------------------------------------------- /youtube_dl/extractor/aljazeera.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class AlJazeeraIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P[^/]+)\.html' 8 | 9 | _TEST = { 10 | 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', 11 | 'info_dict': { 12 | 'id': '3792260579001', 13 | 'ext': 'mp4', 14 | 'title': 'The Slum - Episode 1: Deliverance', 15 | 'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.', 16 | 'uploader_id': '665003303001', 17 | 'timestamp': 1411116829, 18 | 'upload_date': '20140919', 19 | }, 20 | 'add_ie': ['BrightcoveNew'], 21 | 'skip': 'Not accessible from Travis CI server', 22 | } 23 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s' 24 | 25 | def _real_extract(self, url): 26 | program_name = self._match_id(url) 27 | webpage = self._download_webpage(url, program_name) 28 | brightcove_id = self._search_regex( 29 | r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id') 30 | return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) 31 | -------------------------------------------------------------------------------- /youtube_dl/extractor/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | try: 4 | from .lazy_extractors import * 5 | from .lazy_extractors import _ALL_CLASSES 6 | _LAZY_LOADER = True 7 | except ImportError: 8 | _LAZY_LOADER = False 9 | from .extractors import * 10 | 11 | _ALL_CLASSES = [ 12 | klass 13 | for name, klass in globals().items() 14 | if name.endswith('IE') and name != 'GenericIE' 15 | ] 16 | _ALL_CLASSES.append(GenericIE) 17 | 18 | 19 | def gen_extractor_classes(): 20 | """ Return a list of supported extractors. 21 | The order does matter; the first extractor matched is the one handling the URL. 22 | """ 23 | return _ALL_CLASSES 24 | 25 | 26 | def gen_extractors(): 27 | """ Return a list of an instance of every supported extractor. 28 | The order does matter; the first extractor matched is the one handling the URL. 29 | """ 30 | return [klass() for klass in gen_extractor_classes()] 31 | 32 | 33 | def list_extractors(age_limit): 34 | """ 35 | Return a list of extractors that are suitable for the given age, 36 | sorted by extractor ID. 37 | """ 38 | 39 | return sorted( 40 | filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()), 41 | key=lambda ie: ie.IE_NAME.lower()) 42 | 43 | 44 | def get_info_extractor(ie_name): 45 | """Returns the info extractor class with the given ie_name""" 46 | return globals()[ie_name + 'IE'] 47 | -------------------------------------------------------------------------------- /youtube_dl/extractor/criterion.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | from .common import InfoExtractor 7 | 8 | 9 | class CriterionIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P[0-9]+)-.+' 11 | _TEST = { 12 | 'url': 'http://www.criterion.com/films/184-le-samourai', 13 | 'md5': 'bc51beba55685509883a9a7830919ec3', 14 | 'info_dict': { 15 | 'id': '184', 16 | 'ext': 'mp4', 17 | 'title': 'Le Samouraï', 18 | 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', 19 | } 20 | } 21 | 22 | def _real_extract(self, url): 23 | mobj = re.match(self._VALID_URL, url) 24 | video_id = mobj.group('id') 25 | webpage = self._download_webpage(url, video_id) 26 | 27 | final_url = self._search_regex( 28 | r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') 29 | title = self._og_search_title(webpage) 30 | description = self._html_search_meta('description', webpage) 31 | thumbnail = self._search_regex( 32 | r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', 33 | webpage, 'thumbnail url') 34 | 35 | return { 36 | 'id': video_id, 37 | 'url': final_url, 38 | 'title': title, 39 | 'description': description, 40 | 'thumbnail': thumbnail, 41 | } 42 | -------------------------------------------------------------------------------- /youtube_dl/extractor/macgamestore.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from ..utils import ExtractorError 5 | 6 | 7 | class MacGameStoreIE(InfoExtractor): 8 | IE_NAME = 'macgamestore' 9 | IE_DESC = 'MacGameStore trailers' 10 | _VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P\d+)' 11 | 12 | _TEST = { 13 | 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', 14 | 'md5': '8649b8ea684b6666b4c5be736ecddc61', 15 | 'info_dict': { 16 | 'id': '2450', 17 | 'ext': 'm4v', 18 | 'title': 'Crow', 19 | } 20 | } 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | webpage = self._download_webpage( 25 | url, video_id, 'Downloading trailer page') 26 | 27 | if '>Missing Media<' in webpage: 28 | raise ExtractorError( 29 | 'Trailer %s does not exist' % video_id, expected=True) 30 | 31 | video_title = self._html_search_regex( 32 | r'MacGameStore: (.*?) Trailer', webpage, 'title') 33 | 34 | video_url = self._html_search_regex( 35 | r'(?s)', 36 | webpage, 'video URL') 37 | 38 | return { 39 | 'id': video_id, 40 | 'url': video_url, 41 | 'title': video_title 42 | } 43 | -------------------------------------------------------------------------------- /youtube_dl/extractor/dropbox.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import os.path 5 | import re 6 | 7 | from .common import InfoExtractor 8 | from ..compat import compat_urllib_parse_unquote 9 | from ..utils import url_basename 10 | 11 | 12 | class DropboxIE(InfoExtractor): 13 | _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P[a-zA-Z0-9]{15})/.*' 14 | _TESTS = [ 15 | { 16 | 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0', 17 | 'info_dict': { 18 | 'id': 'nelirfsxnmcfbfh', 19 | 'ext': 'mp4', 20 | 'title': 'youtube-dl test video \'ä"BaW_jenozKc' 21 | } 22 | }, { 23 | 'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v', 24 | 'only_matching': True, 25 | }, 26 | ] 27 | 28 | def _real_extract(self, url): 29 | mobj = re.match(self._VALID_URL, url) 30 | video_id = mobj.group('id') 31 | fn = compat_urllib_parse_unquote(url_basename(url)) 32 | title = os.path.splitext(fn)[0] 33 | video_url = re.sub(r'[?&]dl=0', '', url) 34 | video_url += ('?' if '?' not in video_url else '&') + 'dl=1' 35 | 36 | return { 37 | 'id': video_id, 38 | 'title': title, 39 | 'url': video_url, 40 | } 41 | -------------------------------------------------------------------------------- /youtube_dl/extractor/keek.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class KeekIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P\w+)' 9 | IE_NAME = 'keek' 10 | _TEST = { 11 | 'url': 'https://www.keek.com/keek/NODfbab', 12 | 'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83', 13 | 'info_dict': { 14 | 'id': 'NODfbab', 15 | 'ext': 'mp4', 16 | 'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896', 17 | 'uploader': 'ytdl', 18 | 'uploader_id': 'eGT5bab', 19 | }, 20 | } 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | 25 | webpage = self._download_webpage(url, video_id) 26 | 27 | return { 28 | 'id': video_id, 29 | 'url': self._og_search_video_url(webpage), 30 | 'ext': 'mp4', 31 | 'title': self._og_search_description(webpage).strip(), 32 | 'thumbnail': self._og_search_thumbnail(webpage), 33 | 'uploader': self._search_regex( 34 | r'data-username=(["\'])(?P.+?)\1', webpage, 35 | 'uploader', fatal=False, group='uploader'), 36 | 'uploader_id': self._search_regex( 37 | r'data-user-id=(["\'])(?P.+?)\1', webpage, 38 | 'uploader id', fatal=False, group='uploader_id'), 39 | } 40 | -------------------------------------------------------------------------------- /youtube_dl/extractor/dreisat.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import re 4 | 5 | from .zdf import ZDFIE 6 | 7 | 8 | class DreiSatIE(ZDFIE): 9 | IE_NAME = '3sat' 10 | _VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P[0-9]+)$' 11 | _TESTS = [ 12 | { 13 | 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', 14 | 'md5': 'be37228896d30a88f315b638900a026e', 15 | 'info_dict': { 16 | 'id': '45918', 17 | 'ext': 'mp4', 18 | 'title': 'Waidmannsheil', 19 | 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', 20 | 'uploader': 'SCHWEIZWEIT', 21 | 'uploader_id': '100000210', 22 | 'upload_date': '20140913' 23 | }, 24 | 'params': { 25 | 'skip_download': True, # m3u8 downloads 26 | } 27 | }, 28 | { 29 | 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066', 30 | 'only_matching': True, 31 | }, 32 | ] 33 | 34 | def _real_extract(self, url): 35 | mobj = re.match(self._VALID_URL, url) 36 | video_id = mobj.group('id') 37 | details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id 38 | return self.extract_from_xml_url(video_id, details_url) 39 | -------------------------------------------------------------------------------- /youtube_dl/extractor/hark.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class HarkIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P.+?)-.+' 9 | _TEST = { 10 | 'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', 11 | 'md5': '6783a58491b47b92c7c1af5a77d4cbee', 12 | 'info_dict': { 13 | 'id': 'mmbzyhkgny', 14 | 'ext': 'mp3', 15 | 'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013', 16 | 'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.', 17 | 'duration': 11, 18 | } 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | data = self._download_json( 24 | 'http://www.hark.com/clips/%s.json' % video_id, video_id) 25 | 26 | return { 27 | 'id': video_id, 28 | 'url': data['url'], 29 | 'title': data['name'], 30 | 'description': data.get('description'), 31 | 'thumbnail': data.get('image_original'), 32 | 'duration': data.get('duration'), 33 | } 34 | -------------------------------------------------------------------------------- /youtube_dl/extractor/foxsports.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from ..utils import ( 5 | smuggle_url, 6 | update_url_query, 7 | ) 8 | 9 | 10 | class FoxSportsIE(InfoExtractor): 11 | _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P[^/]+)' 12 | 13 | _TEST = { 14 | 'url': 'http://www.foxsports.com/video?vid=432609859715', 15 | 'md5': 'b49050e955bebe32c301972e4012ac17', 16 | 'info_dict': { 17 | 'id': 'i0qKWsk3qJaM', 18 | 'ext': 'mp4', 19 | 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', 20 | 'description': 'Courtney Lee talks about Memphis being focused.', 21 | 'upload_date': '20150423', 22 | 'timestamp': 1429761109, 23 | 'uploader': 'NEWA-FNG-FOXSPORTS', 24 | }, 25 | 'add_ie': ['ThePlatform'], 26 | } 27 | 28 | def _real_extract(self, url): 29 | video_id = self._match_id(url) 30 | 31 | webpage = self._download_webpage(url, video_id) 32 | 33 | config = self._parse_json( 34 | self._search_regex( 35 | r"data-player-config='([^']+)'", webpage, 'data player config'), 36 | video_id) 37 | 38 | return self.url_result(smuggle_url(update_url_query( 39 | config['releaseURL'], { 40 | 'mbr': 'true', 41 | 'switch': 'http', 42 | }), {'force_smil_url': True})) 43 | -------------------------------------------------------------------------------- /test/test_execution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | import unittest 7 | 8 | import sys 9 | import os 10 | import subprocess 11 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 12 | 13 | from youtube_dl.utils import encodeArgument 14 | 15 | rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 16 | 17 | 18 | try: 19 | _DEV_NULL = subprocess.DEVNULL 20 | except AttributeError: 21 | _DEV_NULL = open(os.devnull, 'wb') 22 | 23 | 24 | class TestExecution(unittest.TestCase): 25 | def test_import(self): 26 | subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir) 27 | 28 | def test_module_exec(self): 29 | if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution 30 | subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) 31 | 32 | def test_main_exec(self): 33 | subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) 34 | 35 | def test_cmdline_umlauts(self): 36 | p = subprocess.Popen( 37 | [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], 38 | cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) 39 | _, stderr = p.communicate() 40 | self.assertFalse(stderr) 41 | 42 | if __name__ == '__main__': 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /youtube_dl/extractor/echomsk.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | from .common import InfoExtractor 7 | 8 | 9 | class EchoMskIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P\d+)' 11 | _TEST = { 12 | 'url': 'http://www.echo.msk.ru/sounds/1464134.html', 13 | 'md5': '2e44b3b78daff5b458e4dbc37f191f7c', 14 | 'info_dict': { 15 | 'id': '1464134', 16 | 'ext': 'mp3', 17 | 'title': 'Особое мнение - 29 декабря 2014, 19:08', 18 | }, 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | 24 | webpage = self._download_webpage(url, video_id) 25 | 26 | audio_url = self._search_regex( 27 | r'', webpage, 'audio URL') 28 | 29 | title = self._html_search_regex( 30 | r'([^<]+)', 31 | webpage, 'title') 32 | 33 | air_date = self._html_search_regex( 34 | r'(?s)
(.+?)
', 35 | webpage, 'date', fatal=False, default=None) 36 | 37 | if air_date: 38 | air_date = re.sub(r'(\s)\1+', r'\1', air_date) 39 | if air_date: 40 | title = '%s - %s' % (title, air_date) 41 | 42 | return { 43 | 'id': video_id, 44 | 'url': audio_url, 45 | 'title': title, 46 | } 47 | -------------------------------------------------------------------------------- /youtube_dl/extractor/telequebec.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..utils import int_or_none 6 | 7 | 8 | class TeleQuebecIE(InfoExtractor): 9 | _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P\d+)' 10 | _TEST = { 11 | 'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york', 12 | 'md5': 'fe95a0957e5707b1b01f5013e725c90f', 13 | 'info_dict': { 14 | 'id': '20984', 15 | 'ext': 'mp4', 16 | 'title': 'Le couronnement de New York', 17 | 'description': 'md5:f5b3d27a689ec6c1486132b2d687d432', 18 | 'upload_date': '20160220', 19 | 'timestamp': 1455965438, 20 | } 21 | } 22 | 23 | def _real_extract(self, url): 24 | media_id = self._match_id(url) 25 | media_data = self._download_json( 26 | 'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id, 27 | media_id)['media'] 28 | return { 29 | '_type': 'url_transparent', 30 | 'id': media_id, 31 | 'url': 'limelight:media:' + media_data['streamInfo']['sourceId'], 32 | 'title': media_data['title'], 33 | 'description': media_data.get('descriptions', [{'text': None}])[0].get('text'), 34 | 'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000), 35 | 'ie_key': 'LimelightMedia', 36 | } 37 | -------------------------------------------------------------------------------- /youtube_dl/extractor/lemonde.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class LemondeIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P[^/]+)\.html' 8 | _TESTS = [{ 9 | 'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html', 10 | 'md5': '01fb3c92de4c12c573343d63e163d302', 11 | 'info_dict': { 12 | 'id': 'lqm3kl', 13 | 'ext': 'mp4', 14 | 'title': "Comprendre l'affaire Bygmalion en 5 minutes", 15 | 'thumbnail': 're:^https?://.*\.jpg', 16 | 'duration': 320, 17 | 'upload_date': '20160119', 18 | 'timestamp': 1453194778, 19 | 'uploader_id': '3pmkp', 20 | }, 21 | }, { 22 | 'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html', 23 | 'only_matching': True, 24 | }] 25 | 26 | def _real_extract(self, url): 27 | display_id = self._match_id(url) 28 | 29 | webpage = self._download_webpage(url, display_id) 30 | 31 | digiteka_url = self._proto_relative_url(self._search_regex( 32 | r'url\s*:\s*(["\'])(?P(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1', 33 | webpage, 'digiteka url', group='url')) 34 | return self.url_result(digiteka_url, 'Digiteka') 35 | -------------------------------------------------------------------------------- /youtube_dl/extractor/spike.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .mtv import MTVServicesInfoExtractor 4 | 5 | 6 | class SpikeIE(MTVServicesInfoExtractor): 7 | _VALID_URL = r'https?://(?:[^/]+\.)?spike\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)' 8 | _TESTS = [{ 9 | 'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', 10 | 'md5': '1a9265f32b0c375793d6c4ce45255256', 11 | 'info_dict': { 12 | 'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba', 13 | 'ext': 'mp4', 14 | 'title': 'Auction Hunters|December 27, 2013|4|414|Can Allen Ride A Hundred Year-Old Motorcycle?', 15 | 'description': 'md5:fbed7e82ed5fad493615b3094a9499cb', 16 | 'timestamp': 1388120400, 17 | 'upload_date': '20131227', 18 | }, 19 | }, { 20 | 'url': 'http://www.spike.com/video-clips/lhtu8m/', 21 | 'only_matching': True, 22 | }, { 23 | 'url': 'http://www.spike.com/video-clips/lhtu8m', 24 | 'only_matching': True, 25 | }, { 26 | 'url': 'http://bellator.spike.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg', 27 | 'only_matching': True, 28 | }, { 29 | 'url': 'http://bellator.spike.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page', 30 | 'only_matching': True, 31 | }] 32 | 33 | _FEED_URL = 'http://www.spike.com/feeds/mrss/' 34 | _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' 35 | -------------------------------------------------------------------------------- /youtube_dl/extractor/bild.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..utils import ( 6 | int_or_none, 7 | unescapeHTML, 8 | ) 9 | 10 | 11 | class BildIE(InfoExtractor): 12 | _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P[^/]+)-(?P\d+)(?:,auto=true)?\.bild\.html' 13 | IE_DESC = 'Bild.de' 14 | _TEST = { 15 | 'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html', 16 | 'md5': 'dd495cbd99f2413502a1713a1156ac8a', 17 | 'info_dict': { 18 | 'id': '38184146', 19 | 'ext': 'mp4', 20 | 'title': 'Das können die neuen iPads', 21 | 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f', 22 | 'thumbnail': 're:^https?://.*\.jpg$', 23 | 'duration': 196, 24 | } 25 | } 26 | 27 | def _real_extract(self, url): 28 | video_id = self._match_id(url) 29 | 30 | video_data = self._download_json( 31 | url.split('.bild.html')[0] + ',view=json.bild.html', video_id) 32 | 33 | return { 34 | 'id': video_id, 35 | 'title': unescapeHTML(video_data['title']).strip(), 36 | 'description': unescapeHTML(video_data.get('description')), 37 | 'url': video_data['clipList'][0]['srces'][0]['src'], 38 | 'thumbnail': video_data.get('poster'), 39 | 'duration': int_or_none(video_data.get('durationSec')), 40 | } 41 | -------------------------------------------------------------------------------- /youtube_dl/extractor/screencastomatic.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .jwplatform import JWPlatformBaseIE 5 | from ..utils import js_to_json 6 | 7 | 8 | class ScreencastOMaticIE(JWPlatformBaseIE): 9 | _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P[0-9a-zA-Z]+)' 10 | _TEST = { 11 | 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl', 12 | 'md5': '483583cb80d92588f15ccbedd90f0c18', 13 | 'info_dict': { 14 | 'id': 'c2lD3BeOPl', 15 | 'ext': 'mp4', 16 | 'title': 'Welcome to 3-4 Philosophy @ DECV!', 17 | 'thumbnail': 're:^https?://.*\.jpg$', 18 | 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.', 19 | 'duration': 369.163, 20 | } 21 | } 22 | 23 | def _real_extract(self, url): 24 | video_id = self._match_id(url) 25 | webpage = self._download_webpage(url, video_id) 26 | 27 | jwplayer_data = self._parse_json( 28 | self._search_regex( 29 | r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'), 30 | video_id, transform_source=js_to_json) 31 | 32 | info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False) 33 | info_dict.update({ 34 | 'title': self._og_search_title(webpage), 35 | 'description': self._og_search_description(webpage), 36 | }) 37 | return info_dict 38 | -------------------------------------------------------------------------------- /youtube_dl/extractor/helsinki.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | from .common import InfoExtractor 6 | from ..utils import js_to_json 7 | 8 | 9 | class HelsinkiIE(InfoExtractor): 10 | IE_DESC = 'helsinki.fi' 11 | _VALID_URL = r'https?://video\.helsinki\.fi/Arkisto/flash\.php\?id=(?P\d+)' 12 | _TEST = { 13 | 'url': 'http://video.helsinki.fi/Arkisto/flash.php?id=20258', 14 | 'info_dict': { 15 | 'id': '20258', 16 | 'ext': 'mp4', 17 | 'title': 'Tietotekniikkafoorumi-iltapäivä', 18 | 'description': 'md5:f5c904224d43c133225130fe156a5ee0', 19 | }, 20 | 'params': { 21 | 'skip_download': True, # RTMP 22 | } 23 | } 24 | 25 | def _real_extract(self, url): 26 | video_id = self._match_id(url) 27 | webpage = self._download_webpage(url, video_id) 28 | 29 | params = self._parse_json(self._html_search_regex( 30 | r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);', 31 | webpage, 'player code'), video_id, transform_source=js_to_json) 32 | formats = [{ 33 | 'url': s['file'], 34 | 'ext': 'mp4', 35 | } for s in params['sources']] 36 | self._sort_formats(formats) 37 | 38 | return { 39 | 'id': video_id, 40 | 'title': self._og_search_title(webpage).replace('Video: ', ''), 41 | 'description': self._og_search_description(webpage), 42 | 'formats': formats, 43 | } 44 | -------------------------------------------------------------------------------- /youtube_dl/extractor/howcast.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from ..utils import parse_iso8601 5 | 6 | 7 | class HowcastIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P\d+)' 9 | _TEST = { 10 | 'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', 11 | 'md5': '7d45932269a288149483144f01b99789', 12 | 'info_dict': { 13 | 'id': '390161', 14 | 'ext': 'mp4', 15 | 'title': 'How to Tie a Square Knot Properly', 16 | 'description': 'md5:dbe792e5f6f1489027027bf2eba188a3', 17 | 'timestamp': 1276081287, 18 | 'upload_date': '20100609', 19 | 'duration': 56.823, 20 | }, 21 | 'params': { 22 | 'skip_download': True, 23 | }, 24 | 'add_ie': ['Ooyala'], 25 | } 26 | 27 | def _real_extract(self, url): 28 | video_id = self._match_id(url) 29 | 30 | webpage = self._download_webpage(url, video_id) 31 | 32 | embed_code = self._search_regex( 33 | r']+src="[^"]+\bembed_code=([^\b]+)\b', 34 | webpage, 'ooyala embed code') 35 | 36 | return { 37 | '_type': 'url_transparent', 38 | 'ie_key': 'Ooyala', 39 | 'url': 'ooyala:%s' % embed_code, 40 | 'id': video_id, 41 | 'timestamp': parse_iso8601(self._html_search_meta( 42 | 'article:published_time', webpage, 'timestamp')), 43 | } 44 | -------------------------------------------------------------------------------- /youtube_dl/extractor/thestar.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class TheStarIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?thestar\.com/(?:[^/]+/)*(?P.+)\.html' 9 | _TEST = { 10 | 'url': 'http://www.thestar.com/life/2016/02/01/mankind-why-this-woman-started-a-men-s-skincare-line.html', 11 | 'md5': '2c62dd4db2027e35579fefb97a8b6554', 12 | 'info_dict': { 13 | 'id': '4732393888001', 14 | 'ext': 'mp4', 15 | 'title': 'Mankind: Why this woman started a men\'s skin care line', 16 | 'description': 'Robert Cribb talks to Young Lee, the founder of Uncle Peter\'s MAN.', 17 | 'uploader_id': '794267642001', 18 | 'timestamp': 1454353482, 19 | 'upload_date': '20160201', 20 | }, 21 | 'params': { 22 | # m3u8 download 23 | 'skip_download': True, 24 | } 25 | } 26 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/794267642001/default_default/index.html?videoId=%s' 27 | 28 | def _real_extract(self, url): 29 | display_id = self._match_id(url) 30 | webpage = self._download_webpage(url, display_id) 31 | brightcove_id = self._search_regex( 32 | r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)', 33 | webpage, 'brightcove id') 34 | return self.url_result( 35 | self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 36 | 'BrightcoveNew', brightcove_id) 37 | -------------------------------------------------------------------------------- /devscripts/show-downloads-statistics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import unicode_literals 3 | 4 | import itertools 5 | import json 6 | import os 7 | import re 8 | import sys 9 | 10 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 11 | 12 | from youtube_dl.compat import ( 13 | compat_print, 14 | compat_urllib_request, 15 | ) 16 | from youtube_dl.utils import format_bytes 17 | 18 | 19 | def format_size(bytes): 20 | return '%s (%d bytes)' % (format_bytes(bytes), bytes) 21 | 22 | 23 | total_bytes = 0 24 | 25 | for page in itertools.count(1): 26 | releases = json.loads(compat_urllib_request.urlopen( 27 | 'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page 28 | ).read().decode('utf-8')) 29 | 30 | if not releases: 31 | break 32 | 33 | for release in releases: 34 | compat_print(release['name']) 35 | for asset in release['assets']: 36 | asset_name = asset['name'] 37 | total_bytes += asset['download_count'] * asset['size'] 38 | if all(not re.match(p, asset_name) for p in ( 39 | r'^youtube-dl$', 40 | r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$', 41 | r'^youtube-dl\.exe$')): 42 | continue 43 | compat_print( 44 | ' %s size: %s downloads: %d' 45 | % (asset_name, format_size(asset['size']), asset['download_count'])) 46 | 47 | compat_print('total downloads traffic: %s' % format_size(total_bytes)) 48 | -------------------------------------------------------------------------------- /youtube_dl/extractor/freesound.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import re 4 | 5 | from .common import InfoExtractor 6 | 7 | 8 | class FreesoundIE(InfoExtractor): 9 | _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P[^/]+)' 10 | _TEST = { 11 | 'url': 'http://www.freesound.org/people/miklovan/sounds/194503/', 12 | 'md5': '12280ceb42c81f19a515c745eae07650', 13 | 'info_dict': { 14 | 'id': '194503', 15 | 'ext': 'mp3', 16 | 'title': 'gulls in the city.wav', 17 | 'uploader': 'miklovan', 18 | 'description': 'the sounds of seagulls in the city', 19 | } 20 | } 21 | 22 | def _real_extract(self, url): 23 | mobj = re.match(self._VALID_URL, url) 24 | music_id = mobj.group('id') 25 | webpage = self._download_webpage(url, music_id) 26 | title = self._html_search_regex( 27 | r'
.*?(.+?)', 28 | webpage, 'music title', flags=re.DOTALL) 29 | description = self._html_search_regex( 30 | r'
(.*?)
', webpage, 'description', 31 | fatal=False, flags=re.DOTALL) 32 | 33 | return { 34 | 'id': music_id, 35 | 'title': title, 36 | 'url': self._og_search_property('audio', webpage, 'music url'), 37 | 'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'), 38 | 'description': description, 39 | } 40 | -------------------------------------------------------------------------------- /youtube_dl/extractor/vodplatform.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..utils import unescapeHTML 6 | 7 | 8 | class VODPlatformIE(InfoExtractor): 9 | _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P[^/?#]+)' 10 | _TEST = { 11 | # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar 12 | 'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw', 13 | 'md5': '1db2b7249ce383d6be96499006e951fc', 14 | 'info_dict': { 15 | 'id': 'RufMcytHDolTH1MuKHY9Fw', 16 | 'ext': 'mp4', 17 | 'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"', 18 | } 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | webpage = self._download_webpage(url, video_id) 24 | 25 | title = unescapeHTML(self._og_search_title(webpage)) 26 | hidden_inputs = self._hidden_inputs(webpage) 27 | 28 | formats = self._extract_wowza_formats( 29 | hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil']) 30 | self._sort_formats(formats) 31 | 32 | return { 33 | 'id': video_id, 34 | 'title': title, 35 | 'thumbnail': hidden_inputs.get('HiddenThumbnail') or self._og_search_thumbnail(webpage), 36 | 'formats': formats, 37 | } 38 | -------------------------------------------------------------------------------- /youtube_dl/extractor/academicearth.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import re 4 | 5 | from .common import InfoExtractor 6 | 7 | 8 | class AcademicEarthCourseIE(InfoExtractor): 9 | _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P[^?#/]+)' 10 | IE_NAME = 'AcademicEarth:Course' 11 | _TEST = { 12 | 'url': 'http://academicearth.org/playlists/laws-of-nature/', 13 | 'info_dict': { 14 | 'id': 'laws-of-nature', 15 | 'title': 'Laws of Nature', 16 | 'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.', 17 | }, 18 | 'playlist_count': 3, 19 | } 20 | 21 | def _real_extract(self, url): 22 | playlist_id = self._match_id(url) 23 | 24 | webpage = self._download_webpage(url, playlist_id) 25 | title = self._html_search_regex( 26 | r'

]*?>(.*?)

', webpage, 'title') 27 | description = self._html_search_regex( 28 | r'

]*?>(.*?)

', 29 | webpage, 'description', fatal=False) 30 | urls = re.findall( 31 | r'
  • \s*?', 32 | webpage) 33 | entries = [self.url_result(u) for u in urls] 34 | 35 | return { 36 | '_type': 'playlist', 37 | 'id': playlist_id, 38 | 'title': title, 39 | 'description': description, 40 | 'entries': entries, 41 | } 42 | -------------------------------------------------------------------------------- /youtube_dl/extractor/biqle.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class BIQLEIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P-?\d+_\d+)' 9 | _TESTS = [{ 10 | 'url': 'http://www.biqle.ru/watch/847655_160197695', 11 | 'md5': 'ad5f746a874ccded7b8f211aeea96637', 12 | 'info_dict': { 13 | 'id': '160197695', 14 | 'ext': 'mp4', 15 | 'title': 'Foo Fighters - The Pretender (Live at Wembley Stadium)', 16 | 'uploader': 'Andrey Rogozin', 17 | 'upload_date': '20110605', 18 | } 19 | }, { 20 | 'url': 'https://biqle.org/watch/-44781847_168547604', 21 | 'md5': '7f24e72af1db0edf7c1aaba513174f97', 22 | 'info_dict': { 23 | 'id': '168547604', 24 | 'ext': 'mp4', 25 | 'title': 'Ребенок в шоке от автоматической мойки', 26 | 'uploader': 'Dmitry Kotov', 27 | }, 28 | 'skip': ' This video was marked as adult. Embedding adult videos on external sites is prohibited.', 29 | }] 30 | 31 | def _real_extract(self, url): 32 | video_id = self._match_id(url) 33 | webpage = self._download_webpage(url, video_id) 34 | embed_url = self._proto_relative_url(self._search_regex( 35 | r'', webpage, 'embed url')) 36 | 37 | return { 38 | '_type': 'url_transparent', 39 | 'url': embed_url, 40 | } 41 | -------------------------------------------------------------------------------- /youtube_dl/extractor/slutload.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import re 4 | 5 | from .common import InfoExtractor 6 | 7 | 8 | class SlutloadIE(InfoExtractor): 9 | _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P[^/]+)/?$' 10 | _TEST = { 11 | 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', 12 | 'md5': '0cf531ae8006b530bd9df947a6a0df77', 13 | 'info_dict': { 14 | 'id': 'TD73btpBqSxc', 15 | 'ext': 'mp4', 16 | 'title': 'virginie baisee en cam', 17 | 'age_limit': 18, 18 | 'thumbnail': 're:https?://.*?\.jpg' 19 | } 20 | } 21 | 22 | def _real_extract(self, url): 23 | mobj = re.match(self._VALID_URL, url) 24 | video_id = mobj.group('id') 25 | 26 | webpage = self._download_webpage(url, video_id) 27 | 28 | video_title = self._html_search_regex(r'

    ([^<]+)', 29 | webpage, 'title').strip() 30 | 31 | video_url = self._html_search_regex( 32 | r'(?s)
    \ufeff)(?P.*)$' 35 | 36 | _TESTS = [{ 37 | 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', 38 | 'only_matching': True, 39 | }] 40 | 41 | def _real_extract(self, url): 42 | real_url = self._match_id(url) 43 | self.report_warning( 44 | 'Your URL starts with a Byte Order Mark (BOM). ' 45 | 'Removing the BOM and looking for "%s" ...' % real_url) 46 | return self.url_result(real_url) 47 | -------------------------------------------------------------------------------- /youtube_dl/extractor/sportschau.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .wdr import WDRBaseIE 5 | from ..utils import get_element_by_attribute 6 | 7 | 8 | class SportschauIE(WDRBaseIE): 9 | IE_NAME = 'Sportschau' 10 | _VALID_URL = r'https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video-?(?P[^/#?]+)\.html' 11 | _TEST = { 12 | 'url': 'http://www.sportschau.de/uefaeuro2016/videos/video-dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100.html', 13 | 'info_dict': { 14 | 'id': 'mdb-1140188', 15 | 'display_id': 'dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100', 16 | 'ext': 'mp4', 17 | 'title': 'DFB-Team geht gut gelaunt ins Spiel gegen Polen', 18 | 'description': 'Vor dem zweiten Gruppenspiel gegen Polen herrscht gute Stimmung im deutschen Team. Insbesondere Bastian Schweinsteiger strotzt vor Optimismus nach seinem Tor gegen die Ukraine.', 19 | 'upload_date': '20160615', 20 | }, 21 | 'skip': 'Geo-restricted to Germany', 22 | } 23 | 24 | def _real_extract(self, url): 25 | video_id = self._match_id(url) 26 | 27 | webpage = self._download_webpage(url, video_id) 28 | title = get_element_by_attribute('class', 'headline', webpage) 29 | description = self._html_search_meta('description', webpage, 'description') 30 | 31 | info = self._extract_wdr_video(webpage, video_id) 32 | 33 | info.update({ 34 | 'title': title, 35 | 'description': description, 36 | }) 37 | 38 | return info 39 | -------------------------------------------------------------------------------- /youtube_dl/extractor/moviezine.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | from .common import InfoExtractor 7 | 8 | 9 | class MoviezineIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P[^?#]+)' 11 | 12 | _TEST = { 13 | 'url': 'http://www.moviezine.se/video/205866', 14 | 'info_dict': { 15 | 'id': '205866', 16 | 'ext': 'mp4', 17 | 'title': 'Oculus - Trailer 1', 18 | 'description': 'md5:40cc6790fc81d931850ca9249b40e8a4', 19 | 'thumbnail': 're:http://.*\.jpg', 20 | }, 21 | } 22 | 23 | def _real_extract(self, url): 24 | mobj = re.match(self._VALID_URL, url) 25 | video_id = mobj.group('id') 26 | 27 | webpage = self._download_webpage(url, video_id) 28 | jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player') 29 | 30 | formats = [{ 31 | 'format_id': 'sd', 32 | 'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'), 33 | 'quality': 0, 34 | 'ext': 'mp4', 35 | }] 36 | 37 | self._sort_formats(formats) 38 | 39 | return { 40 | 'id': video_id, 41 | 'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'), 42 | 'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'), 43 | 'formats': formats, 44 | 'description': self._og_search_description(webpage), 45 | } 46 | -------------------------------------------------------------------------------- /youtube_dl/extractor/newgrounds.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class NewgroundsIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P[0-9]+)' 8 | _TESTS = [{ 9 | 'url': 'https://www.newgrounds.com/audio/listen/549479', 10 | 'md5': 'fe6033d297591288fa1c1f780386f07a', 11 | 'info_dict': { 12 | 'id': '549479', 13 | 'ext': 'mp3', 14 | 'title': 'B7 - BusMode', 15 | 'uploader': 'Burn7', 16 | } 17 | }, { 18 | 'url': 'https://www.newgrounds.com/portal/view/673111', 19 | 'md5': '3394735822aab2478c31b1004fe5e5bc', 20 | 'info_dict': { 21 | 'id': '673111', 22 | 'ext': 'mp4', 23 | 'title': 'Dancin', 24 | 'uploader': 'Squirrelman82', 25 | }, 26 | }] 27 | 28 | def _real_extract(self, url): 29 | media_id = self._match_id(url) 30 | webpage = self._download_webpage(url, media_id) 31 | 32 | title = self._html_search_regex( 33 | r'([^>]+)', webpage, 'title') 34 | 35 | uploader = self._html_search_regex( 36 | r'Author\s*]+>([^<]+)', webpage, 'uploader', fatal=False) 37 | 38 | music_url = self._parse_json(self._search_regex( 39 | r'"url":("[^"]+"),', webpage, ''), media_id) 40 | 41 | return { 42 | 'id': media_id, 43 | 'title': title, 44 | 'url': music_url, 45 | 'uploader': uploader, 46 | } 47 | -------------------------------------------------------------------------------- /youtube_dl/extractor/reverbnation.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import re 4 | 5 | from .common import InfoExtractor 6 | from ..utils import str_or_none 7 | 8 | 9 | class ReverbNationIE(InfoExtractor): 10 | _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' 11 | _TESTS = [{ 12 | 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', 13 | 'md5': '3da12ebca28c67c111a7f8b262d3f7a7', 14 | 'info_dict': { 15 | 'id': '16965047', 16 | 'ext': 'mp3', 17 | 'title': 'MONA LISA', 18 | 'uploader': 'ALKILADOS', 19 | 'uploader_id': '216429', 20 | 'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$' 21 | }, 22 | }] 23 | 24 | def _real_extract(self, url): 25 | mobj = re.match(self._VALID_URL, url) 26 | song_id = mobj.group('id') 27 | 28 | api_res = self._download_json( 29 | 'https://api.reverbnation.com/song/%s' % song_id, 30 | song_id, 31 | note='Downloading information of song %s' % song_id 32 | ) 33 | 34 | return { 35 | 'id': song_id, 36 | 'title': api_res.get('name'), 37 | 'url': api_res.get('url'), 38 | 'uploader': api_res.get('artist', {}).get('name'), 39 | 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), 40 | 'thumbnail': self._proto_relative_url( 41 | api_res.get('image', api_res.get('thumbnail'))), 42 | 'ext': 'mp3', 43 | 'vcodec': 'none', 44 | } 45 | -------------------------------------------------------------------------------- /youtube_dl/extractor/fczenit.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..compat import compat_urlparse 6 | 7 | 8 | class FczenitIE(InfoExtractor): 9 | _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/(?P[0-9]+)' 10 | _TEST = { 11 | 'url': 'http://fc-zenit.ru/video/41044/', 12 | 'md5': '0e3fab421b455e970fa1aa3891e57df0', 13 | 'info_dict': { 14 | 'id': '41044', 15 | 'ext': 'mp4', 16 | 'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»', 17 | }, 18 | } 19 | 20 | def _real_extract(self, url): 21 | video_id = self._match_id(url) 22 | webpage = self._download_webpage(url, video_id) 23 | 24 | video_title = self._html_search_regex( 25 | r'<[^>]+class=\"photoalbum__title\">([^<]+)', webpage, 'title') 26 | 27 | video_items = self._parse_json(self._search_regex( 28 | r'arrPath\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, 'video items'), 29 | video_id) 30 | 31 | def merge_dicts(*dicts): 32 | ret = {} 33 | for a_dict in dicts: 34 | ret.update(a_dict) 35 | return ret 36 | 37 | formats = [{ 38 | 'url': compat_urlparse.urljoin(url, video_url), 39 | 'tbr': int(tbr), 40 | } for tbr, video_url in merge_dicts(*video_items).items()] 41 | 42 | self._sort_formats(formats) 43 | 44 | return { 45 | 'id': video_id, 46 | 'title': video_title, 47 | 'formats': formats, 48 | } 49 | -------------------------------------------------------------------------------- /youtube_dl/extractor/ro220.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from ..compat import compat_urllib_parse_unquote 5 | 6 | 7 | class Ro220IE(InfoExtractor): 8 | IE_NAME = '220.ro' 9 | _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)' 10 | _TEST = { 11 | 'url': 'http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/', 12 | 'md5': '03af18b73a07b4088753930db7a34add', 13 | 'info_dict': { 14 | 'id': 'LYV6doKo7f', 15 | 'ext': 'mp4', 16 | 'title': 'Luati-le Banii sez 4 ep 1', 17 | 'description': 're:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$', 18 | } 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | 24 | webpage = self._download_webpage(url, video_id) 25 | url = compat_urllib_parse_unquote(self._search_regex( 26 | r'(?s)clip\s*:\s*{.*?url\s*:\s*\'([^\']+)\'', webpage, 'url')) 27 | title = self._og_search_title(webpage) 28 | description = self._og_search_description(webpage) 29 | thumbnail = self._og_search_thumbnail(webpage) 30 | 31 | formats = [{ 32 | 'format_id': 'sd', 33 | 'url': url, 34 | 'ext': 'mp4', 35 | }] 36 | 37 | return { 38 | 'id': video_id, 39 | 'formats': formats, 40 | 'title': title, 41 | 'description': description, 42 | 'thumbnail': thumbnail, 43 | } 44 | -------------------------------------------------------------------------------- /youtube_dl/extractor/xbef.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from ..compat import compat_urllib_parse_unquote 5 | 6 | 7 | class XBefIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?xbef\.com/video/(?P[0-9]+)' 9 | _TEST = { 10 | 'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking', 11 | 'md5': 'a478b565baff61634a98f5e5338be995', 12 | 'info_dict': { 13 | 'id': '5119', 14 | 'ext': 'mp4', 15 | 'title': 'md5:7358a9faef8b7b57acda7c04816f170e', 16 | 'age_limit': 18, 17 | 'thumbnail': 're:^http://.*\.jpg', 18 | } 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | webpage = self._download_webpage(url, video_id) 24 | 25 | title = self._html_search_regex( 26 | r']*>(.*?)

    ', webpage, 'title') 27 | 28 | config_url_enc = self._download_webpage( 29 | 'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id, 30 | note='Retrieving config URL') 31 | config_url = compat_urllib_parse_unquote(config_url_enc) 32 | config = self._download_xml( 33 | config_url, video_id, note='Retrieving config') 34 | 35 | video_url = config.find('./file').text 36 | thumbnail = config.find('./image').text 37 | 38 | return { 39 | 'id': video_id, 40 | 'url': video_url, 41 | 'title': title, 42 | 'thumbnail': thumbnail, 43 | 'age_limit': 18, 44 | } 45 | -------------------------------------------------------------------------------- /youtube_dl/extractor/miaopai.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class MiaoPaiIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P[-A-Za-z0-9~_]+)' 9 | _TEST = { 10 | 'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm', 11 | 'md5': '095ed3f1cd96b821add957bdc29f845b', 12 | 'info_dict': { 13 | 'id': 'n~0hO7sfV1nBEw4Y29-Hqg__', 14 | 'ext': 'mp4', 15 | 'title': '西游记音乐会的秒拍视频', 16 | 'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg', 17 | } 18 | } 19 | 20 | _USER_AGENT_IPAD = 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1' 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | webpage = self._download_webpage( 25 | url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD}) 26 | 27 | title = self._html_search_regex( 28 | r'([^<]+)', webpage, 'title') 29 | thumbnail = self._html_search_regex( 30 | r']+class=(?P[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P[\'"])(?P[^\'"]+)(?P=q2)', 31 | webpage, 'thumbnail', fatal=False, group='url') 32 | videos = self._parse_html5_media_entries(url, webpage, video_id) 33 | info = videos[0] 34 | 35 | info.update({ 36 | 'id': video_id, 37 | 'title': title, 38 | 'thumbnail': thumbnail, 39 | }) 40 | return info 41 | -------------------------------------------------------------------------------- /youtube_dl/extractor/nintendo.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | from .common import InfoExtractor 7 | from .ooyala import OoyalaIE 8 | from ..utils import unescapeHTML 9 | 10 | 11 | class NintendoIE(InfoExtractor): 12 | _VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P[^/?#&]+)' 13 | _TESTS = [{ 14 | 'url': 'http://www.nintendo.com/games/detail/yEiAzhU2eQI1KZ7wOHhngFoAHc1FpHwj', 15 | 'info_dict': { 16 | 'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW', 17 | 'ext': 'flv', 18 | 'title': 'Duck Hunt Wii U VC NES - Trailer', 19 | 'duration': 60.326, 20 | }, 21 | 'params': { 22 | 'skip_download': True, 23 | }, 24 | 'add_ie': ['Ooyala'], 25 | }, { 26 | 'url': 'http://www.nintendo.com/games/detail/tokyo-mirage-sessions-fe-wii-u', 27 | 'info_dict': { 28 | 'id': 'tokyo-mirage-sessions-fe-wii-u', 29 | 'title': 'Tokyo Mirage Sessions ♯FE', 30 | }, 31 | 'playlist_count': 3, 32 | }] 33 | 34 | def _real_extract(self, url): 35 | page_id = self._match_id(url) 36 | 37 | webpage = self._download_webpage(url, page_id) 38 | 39 | entries = [ 40 | OoyalaIE._build_url_result(m.group('code')) 41 | for m in re.finditer( 42 | r'class=(["\'])embed-video\1[^>]+data-video-code=(["\'])(?P(?:(?!\2).)+)\2', 43 | webpage)] 44 | 45 | return self.playlist_result( 46 | entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False))) 47 | -------------------------------------------------------------------------------- /youtube_dl/extractor/vidzi.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .jwplatform import JWPlatformBaseIE 5 | from ..utils import ( 6 | decode_packed_codes, 7 | js_to_json, 8 | ) 9 | 10 | 11 | class VidziIE(JWPlatformBaseIE): 12 | _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P[0-9a-zA-Z]+)' 13 | _TESTS = [{ 14 | 'url': 'http://vidzi.tv/cghql9yq6emu.html', 15 | 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660', 16 | 'info_dict': { 17 | 'id': 'cghql9yq6emu', 18 | 'ext': 'mp4', 19 | 'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭', 20 | }, 21 | 'params': { 22 | # m3u8 download 23 | 'skip_download': True, 24 | }, 25 | }, { 26 | 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html', 27 | 'skip_download': True, 28 | }] 29 | 30 | def _real_extract(self, url): 31 | video_id = self._match_id(url) 32 | 33 | webpage = self._download_webpage( 34 | 'http://vidzi.tv/%s' % video_id, video_id) 35 | title = self._html_search_regex( 36 | r'(?s)

    (.*?)

    ', webpage, 'title') 37 | 38 | code = decode_packed_codes(webpage).replace('\\\'', '\'') 39 | jwplayer_data = self._parse_json( 40 | self._search_regex(r'setup\(([^)]+)\)', code, 'jwplayer data'), 41 | video_id, transform_source=js_to_json) 42 | 43 | info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False) 44 | info_dict['title'] = title 45 | 46 | return info_dict 47 | -------------------------------------------------------------------------------- /youtube_dl/extractor/tv3.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class TV3IE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx' 9 | _TEST = { 10 | 'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx', 11 | 'info_dict': { 12 | 'id': '4659127992001', 13 | 'ext': 'mp4', 14 | 'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3', 15 | 'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.', 16 | 'uploader_id': '3812193411001', 17 | 'upload_date': '20151213', 18 | 'timestamp': 1449975272, 19 | }, 20 | 'expected_warnings': [ 21 | 'Failed to download MPD manifest' 22 | ], 23 | 'params': { 24 | # m3u8 download 25 | 'skip_download': True, 26 | }, 27 | } 28 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s' 29 | 30 | def _real_extract(self, url): 31 | display_id = self._match_id(url) 32 | webpage = self._download_webpage(url, display_id) 33 | brightcove_id = self._search_regex(r'[0-9]+)' 10 | _TEST = { 11 | 'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html', 12 | 'md5': '9809b4e3f115ae2088440bcb4efbf371', 13 | 'info_dict': { 14 | 'id': '12245069', 15 | 'ext': 'flv', 16 | 'title': 'Hardwood Flooring Basics', 17 | 'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...', 18 | 'uploader': 'Erick Nathan', 19 | } 20 | } 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | webpage = self._download_webpage(url, video_id) 25 | video_url = self._search_regex( 26 | r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL') 27 | final_url = compat_urllib_parse_unquote(video_url) 28 | uploader = self._html_search_meta('uploader', webpage) 29 | title = self._og_search_title(webpage).replace(' | eHow', '') 30 | 31 | return { 32 | 'id': video_id, 33 | 'url': final_url, 34 | 'title': title, 35 | 'thumbnail': self._og_search_thumbnail(webpage), 36 | 'description': self._og_search_description(webpage), 37 | 'uploader': uploader, 38 | } 39 | -------------------------------------------------------------------------------- /youtube_dl/extractor/phoenix.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .zdf import ZDFIE 4 | 5 | 6 | class PhoenixIE(ZDFIE): 7 | IE_NAME = 'phoenix.de' 8 | _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/ 9 | (?: 10 | phoenix/die_sendungen/(?:[^/]+/)? 11 | )? 12 | (?P[0-9]+)''' 13 | _TESTS = [ 14 | { 15 | 'url': 'http://www.phoenix.de/content/884301', 16 | 'md5': 'ed249f045256150c92e72dbb70eadec6', 17 | 'info_dict': { 18 | 'id': '884301', 19 | 'ext': 'mp4', 20 | 'title': 'Michael Krons mit Hans-Werner Sinn', 21 | 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr', 22 | 'upload_date': '20141025', 23 | 'uploader': 'Im Dialog', 24 | } 25 | }, 26 | { 27 | 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815', 28 | 'only_matching': True, 29 | }, 30 | { 31 | 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234', 32 | 'only_matching': True, 33 | }, 34 | ] 35 | 36 | def _real_extract(self, url): 37 | video_id = self._match_id(url) 38 | webpage = self._download_webpage(url, video_id) 39 | 40 | internal_id = self._search_regex( 41 | r'
    .+)\ \-\ (?P.+)' 20 | """ 21 | lastpos = 0 22 | regex = '' 23 | # replace %(..)s with regex group and escape other string parts 24 | for match in re.finditer(r'%\((\w+)\)s', fmt): 25 | regex += re.escape(fmt[lastpos:match.start()]) 26 | regex += r'(?P<' + match.group(1) + '>.+)' 27 | lastpos = match.end() 28 | if lastpos < len(fmt): 29 | regex += re.escape(fmt[lastpos:len(fmt)]) 30 | return regex 31 | 32 | def run(self, info): 33 | title = info['title'] 34 | match = re.match(self._titleregex, title) 35 | if match is None: 36 | self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat) 37 | return [], info 38 | for attribute, value in match.groupdict().items(): 39 | value = match.group(attribute) 40 | info[attribute] = value 41 | self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value) 42 | 43 | return [], info 44 | -------------------------------------------------------------------------------- /youtube_dl/downloader/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import FileDownloader 4 | from .f4m import F4mFD 5 | from .hls import HlsFD 6 | from .http import HttpFD 7 | from .rtmp import RtmpFD 8 | from .dash import DashSegmentsFD 9 | from .rtsp import RtspFD 10 | from .external import ( 11 | get_external_downloader, 12 | FFmpegFD, 13 | ) 14 | 15 | from ..utils import ( 16 | determine_protocol, 17 | ) 18 | 19 | PROTOCOL_MAP = { 20 | 'rtmp': RtmpFD, 21 | 'm3u8_native': HlsFD, 22 | 'm3u8': FFmpegFD, 23 | 'mms': RtspFD, 24 | 'rtsp': RtspFD, 25 | 'f4m': F4mFD, 26 | 'http_dash_segments': DashSegmentsFD, 27 | } 28 | 29 | 30 | def get_suitable_downloader(info_dict, params={}): 31 | """Get the downloader class that can handle the info dict.""" 32 | protocol = determine_protocol(info_dict) 33 | info_dict['protocol'] = protocol 34 | 35 | # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict): 36 | # return FFmpegFD 37 | 38 | external_downloader = params.get('external_downloader') 39 | if external_downloader is not None: 40 | ed = get_external_downloader(external_downloader) 41 | if ed.can_download(info_dict): 42 | return ed 43 | 44 | if protocol == 'm3u8' and params.get('hls_prefer_native') is True: 45 | return HlsFD 46 | 47 | if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False: 48 | return FFmpegFD 49 | 50 | return PROTOCOL_MAP.get(protocol, HttpFD) 51 | 52 | 53 | __all__ = [ 54 | 'get_suitable_downloader', 55 | 'FileDownloader', 56 | ] 57 | -------------------------------------------------------------------------------- /youtube_dl/extractor/ruleporn.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .nuevo import NuevoBaseIE 4 | 5 | 6 | class RulePornIE(NuevoBaseIE): 7 | _VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[^/?#&]+/)*(?P[^/?#&]+)' 8 | _TEST = { 9 | 'url': 'http://ruleporn.com/brunette-nympho-chick-takes-her-boyfriend-in-every-angle/', 10 | 'md5': '86861ebc624a1097c7c10eaf06d7d505', 11 | 'info_dict': { 12 | 'id': '48212', 13 | 'display_id': 'brunette-nympho-chick-takes-her-boyfriend-in-every-angle', 14 | 'ext': 'mp4', 15 | 'title': 'Brunette Nympho Chick Takes Her Boyfriend In Every Angle', 16 | 'description': 'md5:6d28be231b981fff1981deaaa03a04d5', 17 | 'age_limit': 18, 18 | 'duration': 635.1, 19 | } 20 | } 21 | 22 | def _real_extract(self, url): 23 | display_id = self._match_id(url) 24 | 25 | webpage = self._download_webpage(url, display_id) 26 | 27 | video_id = self._search_regex( 28 | r'lovehomeporn\.com/embed/(\d+)', webpage, 'video id') 29 | 30 | title = self._search_regex( 31 | r']+title=(["\'])(?P.+?)\1', 32 | webpage, 'title', group='url') 33 | description = self._html_search_meta('description', webpage) 34 | 35 | info = self._extract_nuevo( 36 | 'http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id, 37 | video_id) 38 | info.update({ 39 | 'display_id': display_id, 40 | 'title': title, 41 | 'description': description, 42 | 'age_limit': 18 43 | }) 44 | return info 45 | -------------------------------------------------------------------------------- /youtube_dl/extractor/odatv.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..utils import ( 6 | ExtractorError, 7 | NO_DEFAULT, 8 | remove_start 9 | ) 10 | 11 | 12 | class OdaTVIE(InfoExtractor): 13 | _VALID_URL = r'https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?.*\bid=(?P[^&]+)' 14 | _TESTS = [{ 15 | 'url': 'http://odatv.com/vid_video.php?id=8E388', 16 | 'md5': 'dc61d052f205c9bf2da3545691485154', 17 | 'info_dict': { 18 | 'id': '8E388', 19 | 'ext': 'mp4', 20 | 'title': 'Artık Davutoğlu ile devam edemeyiz' 21 | } 22 | }, { 23 | # mobile URL 24 | 'url': 'http://odatv.com/mob_video.php?id=8E388', 25 | 'only_matching': True, 26 | }, { 27 | # no video 28 | 'url': 'http://odatv.com/mob_video.php?id=8E900', 29 | 'only_matching': True, 30 | }] 31 | 32 | def _real_extract(self, url): 33 | video_id = self._match_id(url) 34 | webpage = self._download_webpage(url, video_id) 35 | 36 | no_video = 'NO VIDEO!' in webpage 37 | 38 | video_url = self._search_regex( 39 | r'mp4\s*:\s*(["\'])(?Phttp.+?)\1', webpage, 'video url', 40 | default=None if no_video else NO_DEFAULT, group='url') 41 | 42 | if no_video: 43 | raise ExtractorError('Video %s does not exist' % video_id, expected=True) 44 | 45 | return { 46 | 'id': video_id, 47 | 'url': video_url, 48 | 'title': remove_start(self._og_search_title(webpage), 'Video: '), 49 | 'thumbnail': self._og_search_thumbnail(webpage), 50 | } 51 | -------------------------------------------------------------------------------- /youtube_dl/extractor/oktoberfesttv.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class OktoberfestTVIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P[^/?#]+)' 9 | 10 | _TEST = { 11 | 'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt', 12 | 'info_dict': { 13 | 'id': 'hb-zelt', 14 | 'ext': 'mp4', 15 | 'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 16 | 'thumbnail': 're:^https?://.*\.jpg$', 17 | 'is_live': True, 18 | }, 19 | 'params': { 20 | 'skip_download': True, 21 | } 22 | } 23 | 24 | def _real_extract(self, url): 25 | video_id = self._match_id(url) 26 | webpage = self._download_webpage(url, video_id) 27 | 28 | title = self._live_title(self._html_search_regex( 29 | r'

    .*?(.*?)

    ', webpage, 'title')) 30 | 31 | clip = self._search_regex( 32 | r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip') 33 | ncurl = self._search_regex( 34 | r"netConnectionUrl:\s*'([^']+)'", webpage, 'rtmp base') 35 | video_url = ncurl + clip 36 | thumbnail = self._search_regex( 37 | r"canvas:\s*\{\s*backgroundImage:\s*'url\(([^)]+)\)'", webpage, 38 | 'thumbnail', fatal=False) 39 | 40 | return { 41 | 'id': video_id, 42 | 'title': title, 43 | 'url': video_url, 44 | 'ext': 'mp4', 45 | 'is_live': True, 46 | 'thumbnail': thumbnail, 47 | } 48 | -------------------------------------------------------------------------------- /youtube_dl/extractor/parliamentliveuk.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class ParliamentLiveUKIE(InfoExtractor): 7 | IE_NAME = 'parliamentlive.tv' 8 | IE_DESC = 'UK parliament videos' 9 | _VALID_URL = r'https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' 10 | 11 | _TEST = { 12 | 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 13 | 'info_dict': { 14 | 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 15 | 'ext': 'mp4', 16 | 'title': 'Home Affairs Committee', 17 | 'uploader_id': 'FFMPEG-01', 18 | 'timestamp': 1422696664, 19 | 'upload_date': '20150131', 20 | }, 21 | } 22 | 23 | def _real_extract(self, url): 24 | video_id = self._match_id(url) 25 | webpage = self._download_webpage( 26 | 'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id) 27 | widget_config = self._parse_json(self._search_regex( 28 | r'kWidgetConfig\s*=\s*({.+});', 29 | webpage, 'kaltura widget config'), video_id) 30 | kaltura_url = 'kaltura:%s:%s' % (widget_config['wid'][1:], widget_config['entry_id']) 31 | event_title = self._download_json( 32 | 'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title'] 33 | return { 34 | '_type': 'url_transparent', 35 | 'id': video_id, 36 | 'title': event_title, 37 | 'description': '', 38 | 'url': kaltura_url, 39 | 'ie_key': 'Kaltura', 40 | } 41 | -------------------------------------------------------------------------------- /youtube_dl/extractor/glide.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class GlideIE(InfoExtractor): 8 | IE_DESC = 'Glide mobile video messages (glide.me)' 9 | _VALID_URL = r'https?://share\.glide\.me/(?P[A-Za-z0-9\-=_+]+)' 10 | _TEST = { 11 | 'url': 'http://share.glide.me/UZF8zlmuQbe4mr+7dCiQ0w==', 12 | 'md5': '4466372687352851af2d131cfaa8a4c7', 13 | 'info_dict': { 14 | 'id': 'UZF8zlmuQbe4mr+7dCiQ0w==', 15 | 'ext': 'mp4', 16 | 'title': "Damon's Glide message", 17 | 'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$', 18 | } 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | 24 | webpage = self._download_webpage(url, video_id) 25 | 26 | title = self._html_search_regex( 27 | r'(.+?)', webpage, 28 | 'title', default=None) or self._og_search_title(webpage) 29 | video_url = self._proto_relative_url(self._search_regex( 30 | r']+src=(["\'])(?P.+?)\1', 31 | webpage, 'video URL', default=None, 32 | group='url')) or self._og_search_video_url(webpage) 33 | thumbnail = self._proto_relative_url(self._search_regex( 34 | r']+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P.+?)\1', 35 | webpage, 'thumbnail url', default=None, 36 | group='url')) or self._og_search_thumbnail(webpage) 37 | 38 | return { 39 | 'id': video_id, 40 | 'title': title, 41 | 'url': video_url, 42 | 'thumbnail': thumbnail, 43 | } 44 | -------------------------------------------------------------------------------- /test/versions.json: -------------------------------------------------------------------------------- 1 | { 2 | "latest": "2013.01.06", 3 | "signature": "72158cdba391628569ffdbea259afbcf279bbe3d8aeb7492690735dc1cfa6afa754f55c61196f3871d429599ab22f2667f1fec98865527b32632e7f4b3675a7ef0f0fbe084d359256ae4bba68f0d33854e531a70754712f244be71d4b92e664302aa99653ee4df19800d955b6c4149cd2b3f24288d6e4b40b16126e01f4c8ce6", 4 | "versions": { 5 | "2013.01.02": { 6 | "bin": [ 7 | "http://youtube-dl.org/downloads/2013.01.02/youtube-dl", 8 | "f5b502f8aaa77675c4884938b1e4871ebca2611813a0c0e74f60c0fbd6dcca6b" 9 | ], 10 | "exe": [ 11 | "http://youtube-dl.org/downloads/2013.01.02/youtube-dl.exe", 12 | "75fa89d2ce297d102ff27675aa9d92545bbc91013f52ec52868c069f4f9f0422" 13 | ], 14 | "tar": [ 15 | "http://youtube-dl.org/downloads/2013.01.02/youtube-dl-2013.01.02.tar.gz", 16 | "6a66d022ac8e1c13da284036288a133ec8dba003b7bd3a5179d0c0daca8c8196" 17 | ] 18 | }, 19 | "2013.01.06": { 20 | "bin": [ 21 | "http://youtube-dl.org/downloads/2013.01.06/youtube-dl", 22 | "64b6ed8865735c6302e836d4d832577321b4519aa02640dc508580c1ee824049" 23 | ], 24 | "exe": [ 25 | "http://youtube-dl.org/downloads/2013.01.06/youtube-dl.exe", 26 | "58609baf91e4389d36e3ba586e21dab882daaaee537e4448b1265392ae86ff84" 27 | ], 28 | "tar": [ 29 | "http://youtube-dl.org/downloads/2013.01.06/youtube-dl-2013.01.06.tar.gz", 30 | "fe77ab20a95d980ed17a659aa67e371fdd4d656d19c4c7950e7b720b0c2f1a86" 31 | ] 32 | } 33 | } 34 | } -------------------------------------------------------------------------------- /youtube_dl/extractor/thisamericanlife.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | 6 | class ThisAmericanLifeIE(InfoExtractor): 7 | _VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/(?:radio-archives/episode/|play_full\.php\?play=)(?P\d+)' 8 | _TESTS = [{ 9 | 'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one', 10 | 'md5': '8f7d2da8926298fdfca2ee37764c11ce', 11 | 'info_dict': { 12 | 'id': '487', 13 | 'ext': 'm4a', 14 | 'title': '487: Harper High School, Part One', 15 | 'description': 'md5:ee40bdf3fb96174a9027f76dbecea655', 16 | 'thumbnail': 're:^https?://.*\.jpg$', 17 | }, 18 | }, { 19 | 'url': 'http://www.thisamericanlife.org/play_full.php?play=487', 20 | 'only_matching': True, 21 | }] 22 | 23 | def _real_extract(self, url): 24 | video_id = self._match_id(url) 25 | 26 | webpage = self._download_webpage( 27 | 'http://www.thisamericanlife.org/radio-archives/episode/%s' % video_id, video_id) 28 | 29 | return { 30 | 'id': video_id, 31 | 'url': 'http://stream.thisamericanlife.org/{0}/stream/{0}_64k.m3u8'.format(video_id), 32 | 'protocol': 'm3u8_native', 33 | 'ext': 'm4a', 34 | 'acodec': 'aac', 35 | 'vcodec': 'none', 36 | 'abr': 64, 37 | 'title': self._html_search_meta(r'twitter:title', webpage, 'title', fatal=True), 38 | 'description': self._html_search_meta(r'description', webpage, 'description'), 39 | 'thumbnail': self._og_search_thumbnail(webpage), 40 | } 41 | -------------------------------------------------------------------------------- /youtube_dl/extractor/ruhd.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class RUHDIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P\d+)' 9 | _TEST = { 10 | 'url': 'http://www.ruhd.ru/play.php?vid=207', 11 | 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', 12 | 'info_dict': { 13 | 'id': '207', 14 | 'ext': 'divx', 15 | 'title': 'КОТ бааааам', 16 | 'description': 'классный кот)', 17 | 'thumbnail': 're:^http://.*\.jpg$', 18 | } 19 | } 20 | 21 | def _real_extract(self, url): 22 | video_id = self._match_id(url) 23 | webpage = self._download_webpage(url, video_id) 24 | 25 | video_url = self._html_search_regex( 26 | r'([^<]+)   RUHD.ru - Видео Высокого качества №1 в России!', 29 | webpage, 'title') 30 | description = self._html_search_regex( 31 | r'(?s)
    (.+?)', 32 | webpage, 'description', fatal=False) 33 | thumbnail = self._html_search_regex( 34 | r'\w+)(?:/.*)?' 11 | _TEST = { 12 | 'url': 'http://videopremium.tv/4w7oadjsf156', 13 | 'info_dict': { 14 | 'id': '4w7oadjsf156', 15 | 'ext': 'f4v', 16 | 'title': 'youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4' 17 | }, 18 | 'params': { 19 | 'skip_download': True, 20 | }, 21 | 'skip': 'Test file has been deleted.', 22 | } 23 | 24 | def _real_extract(self, url): 25 | video_id = self._match_id(url) 26 | webpage_url = 'http://videopremium.tv/' + video_id 27 | webpage = self._download_webpage(webpage_url, video_id) 28 | 29 | if re.match(r'^]*>window.location\s*=', webpage): 30 | # Download again, we need a cookie 31 | webpage = self._download_webpage( 32 | webpage_url, video_id, 33 | note='Downloading webpage again (with cookie)') 34 | 35 | video_title = self._html_search_regex( 36 | r'\s*(.+?)\s*<', webpage, 'video title') 37 | 38 | return { 39 | 'id': video_id, 40 | 'url': 'rtmp://e%d.md.iplay.md/play' % random.randint(1, 16), 41 | 'play_path': 'mp4:%s.f4v' % video_id, 42 | 'page_url': 'http://videopremium.tv/' + video_id, 43 | 'player_url': 'http://videopremium.tv/uplayer/uppod.swf', 44 | 'ext': 'f4v', 45 | 'title': video_title, 46 | } 47 | -------------------------------------------------------------------------------- /youtube_dl/downloader/rtsp.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import os 4 | import subprocess 5 | 6 | from .common import FileDownloader 7 | from ..utils import ( 8 | check_executable, 9 | encodeFilename, 10 | ) 11 | 12 | 13 | class RtspFD(FileDownloader): 14 | def real_download(self, filename, info_dict): 15 | url = info_dict['url'] 16 | self.report_destination(filename) 17 | tmpfilename = self.temp_name(filename) 18 | 19 | if check_executable('mplayer', ['-h']): 20 | args = [ 21 | 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', 22 | '-dumpstream', '-dumpfile', tmpfilename, url] 23 | elif check_executable('mpv', ['-h']): 24 | args = [ 25 | 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] 26 | else: 27 | self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') 28 | return False 29 | 30 | self._debug_cmd(args) 31 | 32 | retval = subprocess.call(args) 33 | if retval == 0: 34 | fsize = os.path.getsize(encodeFilename(tmpfilename)) 35 | self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) 36 | self.try_rename(tmpfilename, filename) 37 | self._hook_progress({ 38 | 'downloaded_bytes': fsize, 39 | 'total_bytes': fsize, 40 | 'filename': filename, 41 | 'status': 'finished', 42 | }) 43 | return True 44 | else: 45 | self.to_stderr('\n') 46 | self.report_error('%s exited with code %d' % (args[0], retval)) 47 | return False 48 | -------------------------------------------------------------------------------- /youtube_dl/extractor/makerschannel.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | from .common import InfoExtractor 7 | 8 | 9 | class MakersChannelIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?makerschannel\.com/.*(?Pvideo|production)_id=(?P[0-9]+)' 11 | _TEST = { 12 | 'url': 'http://makerschannel.com/en/zoomin/community-highlights?video_id=849', 13 | 'md5': '624a512c6969236b5967bf9286345ad1', 14 | 'info_dict': { 15 | 'id': '849', 16 | 'ext': 'mp4', 17 | 'title': 'Landing a bus on a plane is an epic win', 18 | 'uploader': 'ZoomIn', 19 | 'description': 'md5:cd9cca2ea7b69b78be81d07020c97139', 20 | } 21 | } 22 | 23 | def _real_extract(self, url): 24 | id_type, url_id = re.match(self._VALID_URL, url).groups() 25 | webpage = self._download_webpage(url, url_id) 26 | video_data = self._html_search_regex(r']+data-%s-id="%s"[^>]+)>' % (id_type, url_id), webpage, 'video data') 27 | 28 | def extract_data_val(attr, fatal=False): 29 | return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal) 30 | minoto_id = self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id') 31 | 32 | return { 33 | '_type': 'url_transparent', 34 | 'url': 'minoto:%s' % minoto_id, 35 | 'id': extract_data_val('video-id', True), 36 | 'title': extract_data_val('title', True), 37 | 'description': extract_data_val('description'), 38 | 'thumbnail': extract_data_val('image'), 39 | 'uploader': extract_data_val('channel'), 40 | } 41 | -------------------------------------------------------------------------------- /youtube_dl/extractor/goshgay.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..compat import ( 6 | compat_parse_qs, 7 | ) 8 | from ..utils import ( 9 | parse_duration, 10 | ) 11 | 12 | 13 | class GoshgayIE(InfoExtractor): 14 | _VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P\d+?)($|/)' 15 | _TEST = { 16 | 'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video', 17 | 'md5': '4b6db9a0a333142eb9f15913142b0ed1', 18 | 'info_dict': { 19 | 'id': '299069', 20 | 'ext': 'flv', 21 | 'title': 'DIESEL SFW XXX Video', 22 | 'thumbnail': 're:^http://.*\.jpg$', 23 | 'duration': 80, 24 | 'age_limit': 18, 25 | } 26 | } 27 | 28 | def _real_extract(self, url): 29 | video_id = self._match_id(url) 30 | webpage = self._download_webpage(url, video_id) 31 | 32 | title = self._html_search_regex( 33 | r'

    (.*?)<', webpage, 'title') 34 | duration = parse_duration(self._html_search_regex( 35 | r'\s*-?\s*(.*?)', 36 | webpage, 'duration', fatal=False)) 37 | 38 | flashvars = compat_parse_qs(self._html_search_regex( 39 | r'\d+)' 14 | 15 | _TEST = { 16 | 'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=1430&title=LES%20HEROS%20DU%2088e%20ETAGE', 17 | 'info_dict': { 18 | 'id': '5111223049001', 19 | 'ext': 'mp4', 20 | 'title': ': LES HEROS DU 88e ETAGE', 21 | 'description': 'Découvrez comment la bravoure de deux hommes dans la Tour Nord du World Trade Center a sauvé la vie d\'innombrables personnes le 11 septembre 2001.', 22 | 'uploader_id': '1969646226001', 23 | 'upload_date': '20160904', 24 | 'timestamp': 1472951103, 25 | }, 26 | 'params': { 27 | # rtmp download 28 | 'skip_download': True, 29 | }, 30 | 'skip': 'Only works from France', 31 | } 32 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' 33 | 34 | def _real_extract(self, url): 35 | video_id = self._match_id(url) 36 | webpage = self._download_webpage(url, video_id) 37 | brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) 38 | brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0] 39 | return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) 40 | -------------------------------------------------------------------------------- /youtube_dl/extractor/yourupload.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class YourUploadIE(InfoExtractor): 8 | _VALID_URL = r'''(?x)https?://(?:www\.)? 9 | (?:yourupload\.com/watch| 10 | embed\.yourupload\.com| 11 | embed\.yucache\.net 12 | )/(?P[A-Za-z0-9]+) 13 | ''' 14 | _TESTS = [ 15 | { 16 | 'url': 'http://yourupload.com/watch/14i14h', 17 | 'md5': '5e2c63385454c557f97c4c4131a393cd', 18 | 'info_dict': { 19 | 'id': '14i14h', 20 | 'ext': 'mp4', 21 | 'title': 'BigBuckBunny_320x180.mp4', 22 | 'thumbnail': 're:^https?://.*\.jpe?g', 23 | } 24 | }, 25 | { 26 | 'url': 'http://embed.yourupload.com/14i14h', 27 | 'only_matching': True, 28 | }, 29 | { 30 | 'url': 'http://embed.yucache.net/14i14h?client_file_id=803349', 31 | 'only_matching': True, 32 | }, 33 | ] 34 | 35 | def _real_extract(self, url): 36 | video_id = self._match_id(url) 37 | 38 | embed_url = 'http://embed.yucache.net/{0:}'.format(video_id) 39 | webpage = self._download_webpage(embed_url, video_id) 40 | 41 | title = self._og_search_title(webpage) 42 | video_url = self._og_search_video_url(webpage) 43 | thumbnail = self._og_search_thumbnail(webpage, default=None) 44 | 45 | return { 46 | 'id': video_id, 47 | 'title': title, 48 | 'url': video_url, 49 | 'thumbnail': thumbnail, 50 | 'http_headers': { 51 | 'Referer': embed_url, 52 | }, 53 | } 54 | -------------------------------------------------------------------------------- /youtube_dl/extractor/fktv.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from ..utils import ( 5 | clean_html, 6 | determine_ext, 7 | js_to_json, 8 | ) 9 | 10 | 11 | class FKTVIE(InfoExtractor): 12 | IE_NAME = 'fernsehkritik.tv' 13 | _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P[0-9]+)(?:/.*)?' 14 | 15 | _TEST = { 16 | 'url': 'http://fernsehkritik.tv/folge-1', 17 | 'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79', 18 | 'info_dict': { 19 | 'id': '1', 20 | 'ext': 'mp4', 21 | 'title': 'Folge 1 vom 10. April 2007', 22 | 'thumbnail': 're:^https?://.*\.jpg$', 23 | }, 24 | } 25 | 26 | def _real_extract(self, url): 27 | episode = self._match_id(url) 28 | 29 | webpage = self._download_webpage( 30 | 'http://fernsehkritik.tv/folge-%s/play' % episode, episode) 31 | title = clean_html(self._html_search_regex( 32 | '

    ([^<]+)

    ', webpage, 'title')) 33 | thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False) 34 | sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json) 35 | 36 | formats = [] 37 | for source in sources: 38 | furl = source.get('src') 39 | if furl: 40 | formats.append({ 41 | 'url': furl, 42 | 'format_id': determine_ext(furl), 43 | }) 44 | self._sort_formats(formats) 45 | 46 | return { 47 | 'id': episode, 48 | 'title': title, 49 | 'formats': formats, 50 | 'thumbnail': thumbnail, 51 | } 52 | -------------------------------------------------------------------------------- /youtube_dl/extractor/historicfilms.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from ..utils import parse_duration 5 | 6 | 7 | class HistoricFilmsIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P\d+)' 9 | _TEST = { 10 | 'url': 'http://www.historicfilms.com/tapes/4728', 11 | 'md5': 'd4a437aec45d8d796a38a215db064e9a', 12 | 'info_dict': { 13 | 'id': '4728', 14 | 'ext': 'mov', 15 | 'title': 'Historic Films: GP-7', 16 | 'description': 'md5:1a86a0f3ac54024e419aba97210d959a', 17 | 'thumbnail': 're:^https?://.*\.jpg$', 18 | 'duration': 2096, 19 | }, 20 | } 21 | 22 | def _real_extract(self, url): 23 | video_id = self._match_id(url) 24 | 25 | webpage = self._download_webpage(url, video_id) 26 | 27 | tape_id = self._search_regex( 28 | [r'class="tapeId"[^>]*>([^<]+)<', r'tapeId\s*:\s*"([^"]+)"'], 29 | webpage, 'tape id') 30 | 31 | title = self._og_search_title(webpage) 32 | description = self._og_search_description(webpage) 33 | thumbnail = self._html_search_meta( 34 | 'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage) 35 | duration = parse_duration(self._html_search_meta( 36 | 'duration', webpage, 'duration')) 37 | 38 | video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id) 39 | 40 | return { 41 | 'id': video_id, 42 | 'url': video_url, 43 | 'title': title, 44 | 'description': description, 45 | 'thumbnail': thumbnail, 46 | 'duration': duration, 47 | } 48 | -------------------------------------------------------------------------------- /youtube_dl/extractor/xnxx.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..compat import compat_urllib_parse_unquote 6 | 7 | 8 | class XNXXIE(InfoExtractor): 9 | _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P[0-9a-z]+)/' 10 | _TESTS = [{ 11 | 'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video', 12 | 'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0', 13 | 'info_dict': { 14 | 'id': '55awb78', 15 | 'ext': 'flv', 16 | 'title': 'Skyrim Test Video', 17 | 'age_limit': 18, 18 | }, 19 | }, { 20 | 'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', 21 | 'only_matching': True, 22 | }, { 23 | 'url': 'http://www.xnxx.com/video-55awb78/', 24 | 'only_matching': True, 25 | }] 26 | 27 | def _real_extract(self, url): 28 | video_id = self._match_id(url) 29 | webpage = self._download_webpage(url, video_id) 30 | 31 | video_url = self._search_regex(r'flv_url=(.*?)&', 32 | webpage, 'video URL') 33 | video_url = compat_urllib_parse_unquote(video_url) 34 | 35 | video_title = self._html_search_regex(r'(.*?)\s+-\s+XNXX.COM', 36 | webpage, 'title') 37 | 38 | video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&', 39 | webpage, 'thumbnail', fatal=False) 40 | 41 | return { 42 | 'id': video_id, 43 | 'url': video_url, 44 | 'title': video_title, 45 | 'ext': 'flv', 46 | 'thumbnail': video_thumbnail, 47 | 'age_limit': 18, 48 | } 49 | -------------------------------------------------------------------------------- /youtube_dl/extractor/rudo.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | from .jwplatform import JWPlatformBaseIE 7 | from ..utils import ( 8 | js_to_json, 9 | get_element_by_class, 10 | unified_strdate, 11 | ) 12 | 13 | 14 | class RudoIE(JWPlatformBaseIE): 15 | _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)' 16 | 17 | _TEST = { 18 | 'url': 'http://rudo.video/vod/oTzw0MGnyG', 19 | 'md5': '2a03a5b32dd90a04c83b6d391cf7b415', 20 | 'info_dict': { 21 | 'id': 'oTzw0MGnyG', 22 | 'ext': 'mp4', 23 | 'title': 'Comentario Tomás Mosciatti', 24 | 'upload_date': '20160617', 25 | }, 26 | } 27 | 28 | @classmethod 29 | def _extract_url(self, webpage): 30 | mobj = re.search( 31 | '<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', 32 | webpage) 33 | if mobj: 34 | return mobj.group('url') 35 | 36 | def _real_extract(self, url): 37 | video_id = self._match_id(url) 38 | 39 | webpage = self._download_webpage(url, video_id, encoding='iso-8859-1') 40 | 41 | jwplayer_data = self._parse_json(self._search_regex( 42 | r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id, 43 | transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) 44 | 45 | info_dict = self._parse_jwplayer_data( 46 | jwplayer_data, video_id, require_title=False, m3u8_id='hls') 47 | 48 | info_dict.update({ 49 | 'title': self._og_search_title(webpage), 50 | 'upload_date': unified_strdate(get_element_by_class('date', webpage)), 51 | }) 52 | 53 | return info_dict 54 | -------------------------------------------------------------------------------- /devscripts/fish-completion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import unicode_literals 3 | 4 | import optparse 5 | import os 6 | from os.path import dirname as dirn 7 | import sys 8 | 9 | sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) 10 | import youtube_dl 11 | from youtube_dl.utils import shell_quote 12 | 13 | FISH_COMPLETION_FILE = 'youtube-dl.fish' 14 | FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in' 15 | 16 | EXTRA_ARGS = { 17 | 'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'], 18 | 19 | # Options that need a file parameter 20 | 'download-archive': ['--require-parameter'], 21 | 'cookies': ['--require-parameter'], 22 | 'load-info': ['--require-parameter'], 23 | 'batch-file': ['--require-parameter'], 24 | } 25 | 26 | 27 | def build_completion(opt_parser): 28 | commands = [] 29 | 30 | for group in opt_parser.option_groups: 31 | for option in group.option_list: 32 | long_option = option.get_opt_string().strip('-') 33 | complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option] 34 | if option._short_opts: 35 | complete_cmd += ['--short-option', option._short_opts[0].strip('-')] 36 | if option.help != optparse.SUPPRESS_HELP: 37 | complete_cmd += ['--description', option.help] 38 | complete_cmd.extend(EXTRA_ARGS.get(long_option, [])) 39 | commands.append(shell_quote(complete_cmd)) 40 | 41 | with open(FISH_COMPLETION_TEMPLATE) as f: 42 | template = f.read() 43 | filled_template = template.replace('{{commands}}', '\n'.join(commands)) 44 | with open(FISH_COMPLETION_FILE, 'w') as f: 45 | f.write(filled_template) 46 | 47 | parser = youtube_dl.parseOpts()[0] 48 | build_completion(parser) 49 | -------------------------------------------------------------------------------- /youtube_dl/extractor/streetvoice.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..compat import compat_str 6 | from ..utils import unified_strdate 7 | 8 | 9 | class StreetVoiceIE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)' 11 | _TESTS = [{ 12 | 'url': 'http://streetvoice.com/skippylu/songs/94440/', 13 | 'md5': '15974627fc01a29e492c98593c2fd472', 14 | 'info_dict': { 15 | 'id': '94440', 16 | 'ext': 'mp3', 17 | 'title': '輸', 18 | 'description': 'Crispy脆樂團 - 輸', 19 | 'thumbnail': 're:^https?://.*\.jpg$', 20 | 'duration': 260, 21 | 'upload_date': '20091018', 22 | 'uploader': 'Crispy脆樂團', 23 | 'uploader_id': '627810', 24 | } 25 | }, { 26 | 'url': 'http://tw.streetvoice.com/skippylu/songs/94440/', 27 | 'only_matching': True, 28 | }] 29 | 30 | def _real_extract(self, url): 31 | song_id = self._match_id(url) 32 | 33 | song = self._download_json( 34 | 'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'') 35 | 36 | title = song['name'] 37 | author = song['user']['nickname'] 38 | 39 | return { 40 | 'id': song_id, 41 | 'url': song['file'], 42 | 'title': title, 43 | 'description': '%s - %s' % (author, title), 44 | 'thumbnail': self._proto_relative_url(song.get('image'), 'http:'), 45 | 'duration': song.get('length'), 46 | 'upload_date': unified_strdate(song.get('created_at')), 47 | 'uploader': author, 48 | 'uploader_id': compat_str(song['user']['id']), 49 | } 50 | -------------------------------------------------------------------------------- /test/test_cache.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | import shutil 7 | 8 | # Allow direct execution 9 | import os 10 | import sys 11 | import unittest 12 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 13 | 14 | 15 | from test.helper import FakeYDL 16 | from youtube_dl.cache import Cache 17 | 18 | 19 | def _is_empty(d): 20 | return not bool(os.listdir(d)) 21 | 22 | 23 | def _mkdir(d): 24 | if not os.path.exists(d): 25 | os.mkdir(d) 26 | 27 | 28 | class TestCache(unittest.TestCase): 29 | def setUp(self): 30 | TEST_DIR = os.path.dirname(os.path.abspath(__file__)) 31 | TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') 32 | _mkdir(TESTDATA_DIR) 33 | self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test') 34 | self.tearDown() 35 | 36 | def tearDown(self): 37 | if os.path.exists(self.test_dir): 38 | shutil.rmtree(self.test_dir) 39 | 40 | def test_cache(self): 41 | ydl = FakeYDL({ 42 | 'cachedir': self.test_dir, 43 | }) 44 | c = Cache(ydl) 45 | obj = {'x': 1, 'y': ['ä', '\\a', True]} 46 | self.assertEqual(c.load('test_cache', 'k.'), None) 47 | c.store('test_cache', 'k.', obj) 48 | self.assertEqual(c.load('test_cache', 'k2'), None) 49 | self.assertFalse(_is_empty(self.test_dir)) 50 | self.assertEqual(c.load('test_cache', 'k.'), obj) 51 | self.assertEqual(c.load('test_cache', 'y'), None) 52 | self.assertEqual(c.load('test_cache2', 'k.'), None) 53 | c.remove() 54 | self.assertFalse(os.path.exists(self.test_dir)) 55 | self.assertEqual(c.load('test_cache', 'k.'), None) 56 | 57 | 58 | if __name__ == '__main__': 59 | unittest.main() 60 | -------------------------------------------------------------------------------- /youtube_dl/extractor/tlc.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | from .common import InfoExtractor 7 | from .brightcove import BrightcoveLegacyIE 8 | from ..compat import ( 9 | compat_parse_qs, 10 | compat_urlparse, 11 | ) 12 | 13 | 14 | class TlcDeIE(InfoExtractor): 15 | IE_NAME = 'tlc.de' 16 | _VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' 17 | 18 | _TEST = { 19 | 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', 20 | 'info_dict': { 21 | 'id': '3235167922001', 22 | 'ext': 'mp4', 23 | 'title': 'Breaking Amish: Die Welt da draußen', 24 | 'description': ( 25 | 'Vier Amische und eine Mennonitin wagen in New York' 26 | ' den Sprung in ein komplett anderes Leben. Begleitet sie auf' 27 | ' ihrem spannenden Weg.'), 28 | 'timestamp': 1396598084, 29 | 'upload_date': '20140404', 30 | 'uploader_id': '1659832546', 31 | }, 32 | } 33 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s' 34 | 35 | def _real_extract(self, url): 36 | mobj = re.match(self._VALID_URL, url) 37 | brightcove_id = mobj.group('id') 38 | if not brightcove_id: 39 | title = mobj.group('title') 40 | webpage = self._download_webpage(url, title) 41 | brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) 42 | brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0] 43 | return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) 44 | -------------------------------------------------------------------------------- /youtube_dl/extractor/sztvhu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class SztvHuIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)' 9 | _TEST = { 10 | 'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909', 11 | 'md5': 'a6df607b11fb07d0e9f2ad94613375cb', 12 | 'info_dict': { 13 | 'id': '20130909', 14 | 'ext': 'mp4', 15 | 'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren', 16 | 'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...', 17 | }, 18 | } 19 | 20 | def _real_extract(self, url): 21 | video_id = self._match_id(url) 22 | webpage = self._download_webpage(url, video_id) 23 | video_file = self._search_regex( 24 | r'file: "...:(.*?)",', webpage, 'video file') 25 | title = self._html_search_regex( 26 | r'<meta name="title" content="([^"]*?) - [^-]*? - [^-]*?"', 27 | webpage, 'video title') 28 | description = self._html_search_regex( 29 | r'<meta name="description" content="([^"]*)"/>', 30 | webpage, 'video description', fatal=False) 31 | thumbnail = self._og_search_thumbnail(webpage) 32 | 33 | video_url = 'http://media.sztv.hu/vod/' + video_file 34 | 35 | return { 36 | 'id': video_id, 37 | 'url': video_url, 38 | 'title': title, 39 | 'description': description, 40 | 'thumbnail': thumbnail, 41 | } 42 | -------------------------------------------------------------------------------- /youtube_dl/extractor/behindkink.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | from .common import InfoExtractor 7 | from ..utils import url_basename 8 | 9 | 10 | class BehindKinkIE(InfoExtractor): 11 | _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)' 12 | _TEST = { 13 | 'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', 14 | 'md5': '507b57d8fdcd75a41a9a7bdb7989c762', 15 | 'info_dict': { 16 | 'id': '37127', 17 | 'ext': 'mp4', 18 | 'title': 'What are you passionate about – Marley Blaze', 19 | 'description': 'md5:aee8e9611b4ff70186f752975d9b94b4', 20 | 'upload_date': '20141205', 21 | 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg', 22 | 'age_limit': 18, 23 | } 24 | } 25 | 26 | def _real_extract(self, url): 27 | mobj = re.match(self._VALID_URL, url) 28 | display_id = mobj.group('id') 29 | 30 | webpage = self._download_webpage(url, display_id) 31 | 32 | video_url = self._search_regex( 33 | r'<source src="([^"]+)"', webpage, 'video URL') 34 | video_id = url_basename(video_url).split('_')[0] 35 | upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day') 36 | 37 | return { 38 | 'id': video_id, 39 | 'display_id': display_id, 40 | 'url': video_url, 41 | 'title': self._og_search_title(webpage), 42 | 'thumbnail': self._og_search_thumbnail(webpage), 43 | 'description': self._og_search_description(webpage), 44 | 'upload_date': upload_date, 45 | 'age_limit': 18, 46 | } 47 | -------------------------------------------------------------------------------- /youtube_dl/extractor/tvnoe.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .jwplatform import JWPlatformBaseIE 5 | from ..utils import ( 6 | clean_html, 7 | get_element_by_class, 8 | js_to_json, 9 | ) 10 | 11 | 12 | class TVNoeIE(JWPlatformBaseIE): 13 | _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)' 14 | _TEST = { 15 | 'url': 'http://www.tvnoe.cz/video/10362', 16 | 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca', 17 | 'info_dict': { 18 | 'id': '10362', 19 | 'ext': 'mp4', 20 | 'series': 'Noční univerzita', 21 | 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací', 22 | 'description': 'md5:f337bae384e1a531a52c55ebc50fff41', 23 | } 24 | } 25 | 26 | def _real_extract(self, url): 27 | video_id = self._match_id(url) 28 | webpage = self._download_webpage(url, video_id) 29 | 30 | iframe_url = self._search_regex( 31 | r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL') 32 | 33 | ifs_page = self._download_webpage(iframe_url, video_id) 34 | jwplayer_data = self._parse_json( 35 | self._find_jwplayer_data(ifs_page), 36 | video_id, transform_source=js_to_json) 37 | info_dict = self._parse_jwplayer_data( 38 | jwplayer_data, video_id, require_title=False, base_url=iframe_url) 39 | 40 | info_dict.update({ 41 | 'id': video_id, 42 | 'title': clean_html(get_element_by_class( 43 | 'field-name-field-podnazev', webpage)), 44 | 'description': clean_html(get_element_by_class( 45 | 'field-name-body', webpage)), 46 | 'series': clean_html(get_element_by_class('title', webpage)) 47 | }) 48 | 49 | return info_dict 50 | -------------------------------------------------------------------------------- /youtube_dl/extractor/footyroom.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class FootyRoomIE(InfoExtractor): 8 | _VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)' 9 | _TESTS = [{ 10 | 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', 11 | 'info_dict': { 12 | 'id': 'schalke-04-0-2-real-madrid-2015-02', 13 | 'title': 'Schalke 04 0 – 2 Real Madrid', 14 | }, 15 | 'playlist_count': 3, 16 | 'skip': 'Video for this match is not available', 17 | }, { 18 | 'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/', 19 | 'info_dict': { 20 | 'id': 'georgia-0-2-germany-2015-03', 21 | 'title': 'Georgia 0 – 2 Germany', 22 | }, 23 | 'playlist_count': 1, 24 | }] 25 | 26 | def _real_extract(self, url): 27 | playlist_id = self._match_id(url) 28 | 29 | webpage = self._download_webpage(url, playlist_id) 30 | 31 | playlist = self._parse_json( 32 | self._search_regex( 33 | r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'), 34 | playlist_id) 35 | 36 | playlist_title = self._og_search_title(webpage) 37 | 38 | entries = [] 39 | for video in playlist: 40 | payload = video.get('payload') 41 | if not payload: 42 | continue 43 | playwire_url = self._search_regex( 44 | r'data-config="([^"]+)"', payload, 45 | 'playwire url', default=None) 46 | if playwire_url: 47 | entries.append(self.url_result(self._proto_relative_url( 48 | playwire_url, 'http:'), 'Playwire')) 49 | 50 | return self.playlist_result(entries, playlist_id, playlist_title) 51 | -------------------------------------------------------------------------------- /youtube_dl/extractor/weiqitv.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | 6 | 7 | class WeiqiTVIE(InfoExtractor): 8 | IE_DESC = 'WQTV' 9 | _VALID_URL = r'https?://(?:www\.)?weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' 10 | 11 | _TESTS = [{ 12 | 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3', 13 | 'md5': '26450599afd64c513bc77030ad15db44', 14 | 'info_dict': { 15 | 'id': '53c744f09874f0e76a8b46f3', 16 | 'ext': 'mp4', 17 | 'title': '2013年度盘点', 18 | }, 19 | }, { 20 | 'url': 'http://www.weiqitv.com/index/video_play?videoId=567379a2d4c36cca518b4569', 21 | 'info_dict': { 22 | 'id': '567379a2d4c36cca518b4569', 23 | 'ext': 'mp4', 24 | 'title': '民国围棋史', 25 | }, 26 | }, { 27 | 'url': 'http://www.weiqitv.com/index/video_play?videoId=5430220a9874f088658b4567', 28 | 'info_dict': { 29 | 'id': '5430220a9874f088658b4567', 30 | 'ext': 'mp4', 31 | 'title': '二路托过的手段和运用', 32 | }, 33 | }] 34 | 35 | def _real_extract(self, url): 36 | media_id = self._match_id(url) 37 | page = self._download_webpage(url, media_id) 38 | 39 | info_json_str = self._search_regex( 40 | 'var\s+video\s*=\s*(.+});', page, 'info json str') 41 | info_json = self._parse_json(info_json_str, media_id) 42 | 43 | letvcloud_url = self._search_regex( 44 | 'var\s+letvurl\s*=\s*"([^"]+)', page, 'letvcloud url') 45 | 46 | return { 47 | '_type': 'url_transparent', 48 | 'ie_key': 'LetvCloud', 49 | 'url': letvcloud_url, 50 | 'title': info_json['name'], 51 | 'id': media_id, 52 | } 53 | -------------------------------------------------------------------------------- /youtube_dl/extractor/ir90tv.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..utils import remove_start 6 | 7 | 8 | class Ir90TvIE(InfoExtractor): 9 | _VALID_URL = r'https?://(?:www\.)?90tv\.ir/video/(?P<id>[0-9]+)/.*' 10 | _TESTS = [{ 11 | 'url': 'http://90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218', 12 | 'md5': '411dbd94891381960cb9e13daa47a869', 13 | 'info_dict': { 14 | 'id': '95719', 15 | 'ext': 'mp4', 16 | 'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18', 17 | 'thumbnail': 're:^https?://.*\.jpg$', 18 | } 19 | }, { 20 | 'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218', 21 | 'only_matching': True, 22 | }] 23 | 24 | def _real_extract(self, url): 25 | video_id = self._match_id(url) 26 | webpage = self._download_webpage(url, video_id) 27 | 28 | title = remove_start(self._html_search_regex( 29 | r'<title>([^<]+)', webpage, 'title'), '90tv.ir :: ') 30 | 31 | video_url = self._search_regex( 32 | r']+src="([^"]+)"', webpage, 'video url') 33 | 34 | thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url', fatal=False) 35 | 36 | return { 37 | 'url': video_url, 38 | 'id': video_id, 39 | 'title': title, 40 | 'video_url': video_url, 41 | 'thumbnail': thumbnail, 42 | } 43 | -------------------------------------------------------------------------------- /youtube_dl/extractor/cinchcast.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | from .common import InfoExtractor 5 | from ..utils import ( 6 | unified_strdate, 7 | xpath_text, 8 | ) 9 | 10 | 11 | class CinchcastIE(InfoExtractor): 12 | _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P[0-9]+)' 13 | _TEST = { 14 | # Actual test is run in generic, look for undergroundwellness 15 | 'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', 16 | 'only_matching': True, 17 | } 18 | 19 | def _real_extract(self, url): 20 | video_id = self._match_id(url) 21 | doc = self._download_xml( 22 | 'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id, 23 | video_id) 24 | 25 | item = doc.find('.//item') 26 | title = xpath_text(item, './title', fatal=True) 27 | date_str = xpath_text( 28 | item, './{http://developer.longtailvideo.com/trac/}date') 29 | upload_date = unified_strdate(date_str, day_first=False) 30 | # duration is present but wrong 31 | formats = [{ 32 | 'format_id': 'main', 33 | 'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'], 34 | }] 35 | backup_url = xpath_text( 36 | item, './{http://developer.longtailvideo.com/trac/}backupContent') 37 | if backup_url: 38 | formats.append({ 39 | 'preference': 2, # seems to be more reliable 40 | 'format_id': 'backup', 41 | 'url': backup_url, 42 | }) 43 | self._sort_formats(formats) 44 | 45 | return { 46 | 'id': video_id, 47 | 'title': title, 48 | 'upload_date': upload_date, 49 | 'formats': formats, 50 | } 51 | -------------------------------------------------------------------------------- /youtube_dl/extractor/thescene.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | 5 | from ..compat import compat_urlparse 6 | from ..utils import qualities 7 | 8 | 9 | class TheSceneIE(InfoExtractor): 10 | _VALID_URL = r'https?://thescene\.com/watch/[^/]+/(?P[^/#?]+)' 11 | 12 | _TEST = { 13 | 'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear', 14 | 'info_dict': { 15 | 'id': '520e8faac2b4c00e3c6e5f43', 16 | 'ext': 'mp4', 17 | 'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear', 18 | 'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear', 19 | }, 20 | } 21 | 22 | def _real_extract(self, url): 23 | display_id = self._match_id(url) 24 | 25 | webpage = self._download_webpage(url, display_id) 26 | 27 | player_url = compat_urlparse.urljoin( 28 | url, 29 | self._html_search_regex( 30 | r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url')) 31 | 32 | player = self._download_webpage(player_url, display_id) 33 | info = self._parse_json( 34 | self._search_regex( 35 | r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), 36 | display_id) 37 | 38 | qualities_order = qualities(('low', 'high')) 39 | formats = [{ 40 | 'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']), 41 | 'url': f['src'], 42 | 'quality': qualities_order(f['quality']), 43 | } for f in info['sources'][0]] 44 | self._sort_formats(formats) 45 | 46 | return { 47 | 'id': info['id'], 48 | 'display_id': display_id, 49 | 'title': info['title'], 50 | 'formats': formats, 51 | 'thumbnail': info.get('poster_frame'), 52 | } 53 | -------------------------------------------------------------------------------- /youtube_dl/extractor/localnews8.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | from .common import InfoExtractor 7 | 8 | 9 | class LocalNews8IE(InfoExtractor): 10 | _VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P[^/]+)/(?P[0-9]+)' 11 | _TEST = { 12 | 'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304', 13 | 'md5': 'be4d48aea61aa2bde7be2ee47691ad20', 14 | 'info_dict': { 15 | 'id': '35183304', 16 | 'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings', 17 | 'ext': 'mp4', 18 | 'title': 'Rexburg business turns carbon fiber scraps into wedding ring', 19 | 'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.', 20 | 'duration': 153, 21 | 'timestamp': 1441844822, 22 | 'upload_date': '20150910', 23 | 'uploader_id': 'api', 24 | } 25 | } 26 | 27 | def _real_extract(self, url): 28 | mobj = re.match(self._VALID_URL, url) 29 | video_id = mobj.group('id') 30 | display_id = mobj.group('display_id') 31 | 32 | webpage = self._download_webpage(url, display_id) 33 | 34 | partner_id = self._search_regex( 35 | r'partnerId\s*[:=]\s*(["\'])(?P\d+)\1', 36 | webpage, 'partner id', group='id') 37 | kaltura_id = self._search_regex( 38 | r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P[0-9a-z_]+)\1', 39 | webpage, 'videl id', group='id') 40 | 41 | return { 42 | '_type': 'url_transparent', 43 | 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), 44 | 'ie_key': 'Kaltura', 45 | 'id': video_id, 46 | 'display_id': display_id, 47 | } 48 | -------------------------------------------------------------------------------- /youtube_dl/extractor/charlierose.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from .common import InfoExtractor 4 | from ..utils import remove_end 5 | 6 | 7 | class CharlieRoseIE(InfoExtractor): 8 | _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P\d+)' 9 | _TESTS = [{ 10 | 'url': 'https://charlierose.com/videos/27996', 11 | 'md5': 'fda41d49e67d4ce7c2411fd2c4702e09', 12 | 'info_dict': { 13 | 'id': '27996', 14 | 'ext': 'mp4', 15 | 'title': 'Remembering Zaha Hadid', 16 | 'thumbnail': 're:^https?://.*\.jpg\?\d+', 17 | 'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.', 18 | 'subtitles': { 19 | 'en': [{ 20 | 'ext': 'vtt', 21 | }], 22 | }, 23 | }, 24 | }, { 25 | 'url': 'https://charlierose.com/videos/27996', 26 | 'only_matching': True, 27 | }] 28 | 29 | _PLAYER_BASE = 'https://charlierose.com/video/player/%s' 30 | 31 | def _real_extract(self, url): 32 | video_id = self._match_id(url) 33 | webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id) 34 | 35 | title = remove_end(self._og_search_title(webpage), ' - Charlie Rose') 36 | 37 | info_dict = self._parse_html5_media_entries( 38 | self._PLAYER_BASE % video_id, webpage, video_id, 39 | m3u8_entry_protocol='m3u8_native')[0] 40 | 41 | self._sort_formats(info_dict['formats']) 42 | self._remove_duplicate_formats(info_dict['formats']) 43 | 44 | info_dict.update({ 45 | 'id': video_id, 46 | 'title': title, 47 | 'thumbnail': self._og_search_thumbnail(webpage), 48 | 'description': self._og_search_description(webpage), 49 | }) 50 | 51 | return info_dict 52 | -------------------------------------------------------------------------------- /youtube_dl/extractor/morningstar.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | 6 | from .common import InfoExtractor 7 | 8 | 9 | class MorningstarIE(InfoExtractor): 10 | IE_DESC = 'morningstar.com' 11 | _VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P[0-9]+)' 12 | _TEST = { 13 | 'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869', 14 | 'md5': '6c0acface7a787aadc8391e4bbf7b0f5', 15 | 'info_dict': { 16 | 'id': '615869', 17 | 'ext': 'mp4', 18 | 'title': 'Get Ahead of the Curve on 2013 Taxes', 19 | 'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.", 20 | 'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$' 21 | } 22 | } 23 | 24 | def _real_extract(self, url): 25 | mobj = re.match(self._VALID_URL, url) 26 | video_id = mobj.group('id') 27 | 28 | webpage = self._download_webpage(url, video_id) 29 | title = self._html_search_regex( 30 | r'

    (.*?)

    ', webpage, 'title') 31 | video_url = self._html_search_regex( 32 | r'(.*?)

    ', 39 | webpage, 'description', fatal=False) 40 | 41 | return { 42 | 'id': video_id, 43 | 'title': title, 44 | 'url': video_url, 45 | 'thumbnail': thumbnail, 46 | 'description': description, 47 | } 48 | -------------------------------------------------------------------------------- /youtube_dl/extractor/kankan.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import re 4 | import hashlib 5 | 6 | from .common import InfoExtractor 7 | 8 | _md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() 9 | 10 | 11 | class KankanIE(InfoExtractor): 12 | _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P\d+)\.shtml' 13 | 14 | _TEST = { 15 | 'url': 'http://yinyue.kankan.com/vod/48/48863.shtml', 16 | 'md5': '29aca1e47ae68fc28804aca89f29507e', 17 | 'info_dict': { 18 | 'id': '48863', 19 | 'ext': 'flv', 20 | 'title': 'Ready To Go', 21 | }, 22 | 'skip': 'Only available from China', 23 | } 24 | 25 | def _real_extract(self, url): 26 | video_id = self._match_id(url) 27 | webpage = self._download_webpage(url, video_id) 28 | 29 | title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title') 30 | surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0) 31 | gcids = re.findall(r'http://.+?/.+?/(.+?)/', surls) 32 | gcid = gcids[-1] 33 | 34 | info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid 35 | video_info_page = self._download_webpage( 36 | info_url, video_id, 'Downloading video url info') 37 | ip = self._search_regex(r'ip:"(.+?)"', video_info_page, 'video url ip') 38 | path = self._search_regex(r'path:"(.+?)"', video_info_page, 'video url path') 39 | param1 = self._search_regex(r'param1:(\d+)', video_info_page, 'param1') 40 | param2 = self._search_regex(r'param2:(\d+)', video_info_page, 'param2') 41 | key = _md5('xl_mp43651' + param1 + param2) 42 | video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2) 43 | 44 | return { 45 | 'id': video_id, 46 | 'title': title, 47 | 'url': video_url, 48 | } 49 | --------------------------------------------------------------------------------