├── tests ├── __init__.py ├── test_util.py ├── test_common.py └── test.py ├── src └── you_get │ ├── util │ ├── __init__.py │ ├── term.py │ ├── strings.py │ ├── fs.py │ ├── git.py │ └── log.py │ ├── cli_wrapper │ ├── __init__.py │ ├── player │ │ ├── wmp.py │ │ ├── mplayer.py │ │ ├── dragonplayer.py │ │ ├── gnome_mplayer.py │ │ ├── vlc.py │ │ ├── __init__.py │ │ └── __main__.py │ ├── openssl │ │ └── __init__.py │ ├── transcoder │ │ ├── libav.py │ │ ├── __init__.py │ │ ├── ffmpeg.py │ │ └── mencoder.py │ └── downloader │ │ └── __init__.py │ ├── version.py │ ├── processor │ ├── __init__.py │ ├── join_ts.py │ └── rtmpdump.py │ ├── __init__.py │ ├── extractors │ ├── khan.py │ ├── alive.py │ ├── archive.py │ ├── cbs.py │ ├── freesound.py │ ├── bandcamp.py │ ├── magisto.py │ ├── quanmin.py │ ├── heavymusic.py │ ├── ted.py │ ├── giphy.py │ ├── metacafe.py │ ├── mixcloud.py │ ├── iqilu.py │ ├── douyin.py │ ├── theplatform.py │ ├── facebook.py │ ├── huomaotv.py │ ├── musicplayon.py │ ├── interest.py │ ├── ehow.py │ ├── vine.py │ ├── vidto.py │ ├── baomihua.py │ ├── dailymotion.py │ ├── yizhibo.py │ ├── naver.py │ ├── kuaishou.py │ ├── suntv.py │ ├── iwara.py │ ├── kuwo.py │ ├── veoh.py │ ├── w56.py │ ├── joy.py │ ├── panda.py │ ├── videomega.py │ ├── qingting.py │ ├── soundcloud.py │ ├── mtv81.py │ ├── qq_egame.py │ ├── pinterest.py │ ├── ifeng.py │ ├── nicovideo.py │ ├── yinyuetai.py │ ├── fantasy.py │ ├── infoq.py │ ├── nanagogo.py │ ├── miomio.py │ ├── kugou.py │ ├── miaopai.py │ ├── __init__.py │ ├── zhanqi.py │ ├── instagram.py │ ├── douban.py │ ├── lizhi.py │ ├── huaban.py │ ├── toutiao.py │ ├── fc2video.py │ ├── vk.py │ ├── bigthink.py │ ├── longzhu.py │ ├── tucao.py │ ├── cntv.py │ ├── imgur.py │ ├── douyutv.py │ ├── ku6.py │ ├── qie_video.py │ ├── sohu.py │ ├── bokecc.py │ ├── showroom.py │ ├── pixnet.py │ ├── qie.py │ ├── yixia.py │ ├── ckplayer.py │ ├── ixigua.py │ ├── twitter.py │ ├── coub.py │ ├── ximalaya.py │ ├── dilidili.py │ ├── tudou.py │ └── universal.py │ ├── json_output.py │ └── __main__.py ├── MANIFEST.in ├── setup.cfg ├── you-get.plugin.zsh ├── you-get ├── .travis.yml ├── Makefile ├── contrib └── completion │ ├── you-get-completion.bash │ ├── _you-get │ └── you-get.fish ├── .gitignore ├── LICENSE.txt ├── setup.py ├── you-get.json ├── CONTRIBUTING.md ├── .github ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/wmp.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/openssl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/mplayer.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/libav.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/downloader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/dragonplayer.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/gnome_mplayer.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/ffmpeg.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/mencoder.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/vlc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .mplayer import * 4 | -------------------------------------------------------------------------------- /src/you_get/version.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | script_name = 'you-get' 4 | __version__ = '0.4.1040' 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst 2 | include *.txt 3 | include Makefile 4 | include README.md 5 | include you-get 6 | include you-get.json 7 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [build] 2 | force = 0 3 | 4 | [global] 5 | verbose = 0 6 | 7 | [egg_info] 8 | tag_build = 9 | tag_date = 0 10 | tag_svn_revision = 0 11 | -------------------------------------------------------------------------------- /you-get.plugin.zsh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zsh 2 | alias you-get="noglob python3 $(dirname $0)/you-get" 3 | alias you-vlc="noglob python3 $(dirname $0)/you-get --player vlc" 4 | -------------------------------------------------------------------------------- /src/you_get/processor/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .join_flv import concat_flv 4 | from .join_mp4 import concat_mp4 5 | from .ffmpeg import * 6 | from .rtmpdump import * 7 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' WIP 4 | def main(): 5 | script_main('you-get', any_download, any_download_playlist) 6 | 7 | if __name__ == "__main__": 8 | main() 9 | ''' 10 | -------------------------------------------------------------------------------- /src/you_get/util/term.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | def get_terminal_size(): 4 | """Get (width, height) of the current terminal.""" 5 | try: 6 | import fcntl, termios, struct # fcntl module only available on Unix 7 | return struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234')) 8 | except: 9 | return (40, 80) 10 | -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | 5 | from you_get.util.fs import * 6 | 7 | class TestUtil(unittest.TestCase): 8 | def test_legitimize(self): 9 | self.assertEqual(legitimize("1*2", os="Linux"), "1*2") 10 | self.assertEqual(legitimize("1*2", os="Darwin"), "1*2") 11 | self.assertEqual(legitimize("1*2", os="Windows"), "1-2") 12 | -------------------------------------------------------------------------------- /tests/test_common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | 5 | from you_get.common import * 6 | 7 | class TestCommon(unittest.TestCase): 8 | 9 | def test_match1(self): 10 | self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)'), '1234567890A') 11 | self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)', r'youtu.(\w+)'), ['1234567890A', 'be']) 12 | -------------------------------------------------------------------------------- /src/you_get/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This file is Python 2 compliant. 3 | 4 | import sys 5 | 6 | if sys.version_info[0] == 3: 7 | #from .extractor import Extractor, VideoExtractor 8 | #from .util import log 9 | 10 | from .__main__ import * 11 | 12 | #from .common import * 13 | #from .version import * 14 | #from .cli_wrapper import * 15 | #from .extractor import * 16 | else: 17 | # Don't import anything. 18 | pass 19 | -------------------------------------------------------------------------------- /you-get: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, sys 3 | 4 | _srcdir = '%s/src/' % os.path.dirname(os.path.realpath(__file__)) 5 | _filepath = os.path.dirname(sys.argv[0]) 6 | sys.path.insert(1, os.path.join(_filepath, _srcdir)) 7 | 8 | if sys.version_info[0] == 3: 9 | import you_get 10 | if __name__ == '__main__': 11 | you_get.main(repo_path=_filepath) 12 | else: # Python 2 13 | from you_get.util import log 14 | log.e("[fatal] Python 3 is required!") 15 | log.wtf("try to run this script using 'python3 you-get'.") 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # https://travis-ci.org/soimort/you-get 2 | language: python 3 | python: 4 | - "3.2" 5 | - "3.3" 6 | - "3.4" 7 | - "3.5" 8 | - "3.6" 9 | - "nightly" 10 | - "pypy3" 11 | script: make test 12 | sudo: false 13 | notifications: 14 | webhooks: 15 | urls: 16 | - https://webhooks.gitter.im/e/43cd57826e88ed8f2152 17 | on_success: change # options: [always|never|change] default: always 18 | on_failure: always # options: [always|never|change] default: always 19 | on_start: never # options: [always|never|change] default: always 20 | -------------------------------------------------------------------------------- /src/you_get/extractors/khan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['khan_download'] 4 | 5 | from ..common import * 6 | from .youtube import YouTube 7 | 8 | def khan_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | html = get_content(url) 10 | youtube_url = re.search(' unicode''' 16 | s = m.group(0)[2:].rstrip(';;') 17 | if s.startswith('x'): 18 | return chr(int('0'+s, 16)) 19 | else: 20 | return chr(int(s)) 21 | 22 | from .fs import legitimize 23 | 24 | def get_filename(htmlstring): 25 | return legitimize(unescape_html(htmlstring)) 26 | 27 | def parameterize(string): 28 | return "'%s'" % string.replace("'", r"'\''") 29 | -------------------------------------------------------------------------------- /src/you_get/extractors/magisto.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['magisto_download'] 4 | 5 | from ..common import * 6 | import json 7 | 8 | def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | html = get_html(url) 10 | 11 | video_hash = r1(r'video\/([a-zA-Z0-9]+)', url) 12 | api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash) 13 | content = get_html(api_url) 14 | data = json.loads(content) 15 | title1 = data['title'] 16 | title2 = data['creator'] 17 | title = "%s - %s" % (title1, title2) 18 | url = data['video_direct_url'] 19 | type, ext, size = url_info(url) 20 | 21 | print_info(site_info, title, type, size) 22 | if not info_only: 23 | download_urls([url], title, ext, size, output_dir, merge=merge) 24 | 25 | site_info = "Magisto.com" 26 | download = magisto_download 27 | download_playlist = playlist_not_supported('magisto') 28 | -------------------------------------------------------------------------------- /src/you_get/extractors/quanmin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['quanmin_download'] 4 | 5 | from ..common import * 6 | import json 7 | 8 | def quanmin_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 9 | roomid = url.split('/')[3].split('?')[0] 10 | 11 | json_request_url = 'http://m.quanmin.tv/json/rooms/{}/noinfo6.json'.format(roomid) 12 | content = get_html(json_request_url) 13 | data = json.loads(content) 14 | 15 | title = data["title"] 16 | 17 | if not data["play_status"]: 18 | raise ValueError("The live stream is not online!") 19 | 20 | real_url = data["live"]["ws"]["flv"]["5"]["src"] 21 | 22 | print_info(site_info, title, 'flv', float('inf')) 23 | if not info_only: 24 | download_urls([real_url], title, 'flv', None, output_dir, merge = merge) 25 | 26 | site_info = "quanmin.tv" 27 | download = quanmin_download 28 | download_playlist = playlist_not_supported('quanmin') 29 | -------------------------------------------------------------------------------- /src/you_get/extractors/heavymusic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['heavymusic_download'] 4 | 5 | from ..common import * 6 | 7 | def heavymusic_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | html = get_html(url) 9 | tracks = re.findall(r'href="(online2\.php[^"]+)"', html) 10 | for track in tracks: 11 | band = r1(r'band=([^&]*)', track) 12 | album = r1(r'album=([^&]*)', track) 13 | title = r1(r'track=([^&]*)', track) 14 | file_url = 'http://www.heavy-music.ru/online2.php?band=%s&album=%s&track=%s' % (parse.quote(band), parse.quote(album), parse.quote(title)) 15 | _, _, size = url_info(file_url) 16 | 17 | print_info(site_info, title, 'mp3', size) 18 | if not info_only: 19 | download_urls([file_url], title[:-4], 'mp3', size, output_dir, merge=merge) 20 | 21 | site_info = "heavy-music.ru" 22 | download = heavymusic_download 23 | download_playlist = heavymusic_download 24 | -------------------------------------------------------------------------------- /src/you_get/extractors/ted.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['ted_download'] 4 | 5 | from ..common import * 6 | import json 7 | 8 | def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | html = get_html(url) 10 | patt = r'"__INITIAL_DATA__"\s*:\s*\{(.+)\}' 11 | metadata = json.loads('{' + match1(html, patt) + '}') 12 | title = metadata['talks'][0]['title'] 13 | nativeDownloads = metadata['talks'][0]['downloads']['nativeDownloads'] 14 | for quality in ['high', 'medium', 'low']: 15 | if quality in nativeDownloads: 16 | url = nativeDownloads[quality] 17 | type, ext, size = url_info(url) 18 | print_info(site_info, title, type, size) 19 | if not info_only: 20 | download_urls([url], title, ext, size, output_dir, merge=merge) 21 | break 22 | 23 | site_info = "TED.com" 24 | download = ted_download 25 | download_playlist = playlist_not_supported('ted') 26 | -------------------------------------------------------------------------------- /src/you_get/extractors/giphy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['giphy_download'] 4 | 5 | from ..common import * 6 | import json 7 | 8 | def giphy_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | html = get_html(url) 10 | 11 | url = list(set([ 12 | unicodize(str.replace(i, '\\/', '/')) 13 | for i in re.findall(r'', html) 14 | ])) 15 | 16 | title = r1(r'', html) 17 | 18 | if title is None: 19 | title = url[0] 20 | 21 | type, ext, size = url_info(url[0], True) 22 | size = urls_size(url) 23 | 24 | type = "video/mp4" 25 | ext = "mp4" 26 | 27 | print_info(site_info, title, type, size) 28 | if not info_only: 29 | download_urls(url, title, ext, size, output_dir, merge=False) 30 | 31 | site_info = "Giphy.com" 32 | download = giphy_download 33 | download_playlist = playlist_not_supported('giphy') 34 | -------------------------------------------------------------------------------- /src/you_get/extractors/metacafe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['metacafe_download'] 4 | 5 | from ..common import * 6 | import urllib.error 7 | from urllib.parse import unquote 8 | 9 | def metacafe_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 10 | if re.match(r'http://www.metacafe.com/watch/\w+', url): 11 | html =get_content(url) 12 | title = r1(r'>> import you_get" % you_get.version.__version__)') 9 | 10 | test: 11 | $(SETUP) test 12 | 13 | clean: 14 | zenity --question 15 | rm -fr build/ dist/ src/*.egg-info/ 16 | find . | grep __pycache__ | xargs rm -fr 17 | find . | grep .pyc | xargs rm -f 18 | 19 | all: build sdist bdist bdist_egg bdist_wheel 20 | 21 | html: 22 | pandoc README.md > README.html 23 | 24 | rst: 25 | pandoc -s -t rst README.md > README.rst 26 | 27 | build: 28 | $(SETUP) build 29 | 30 | sdist: 31 | $(SETUP) sdist 32 | 33 | bdist: 34 | $(SETUP) bdist 35 | 36 | bdist_egg: 37 | $(SETUP) bdist_egg 38 | 39 | bdist_wheel: 40 | $(SETUP) bdist_wheel 41 | 42 | install: 43 | $(SETUP) install --user --prefix= 44 | 45 | release: 46 | zenity --question 47 | $(SETUP) sdist bdist_wheel upload --sign 48 | -------------------------------------------------------------------------------- /src/you_get/extractors/mixcloud.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['mixcloud_download'] 4 | 5 | from ..common import * 6 | 7 | def mixcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | html = get_html(url, faker=True) 9 | title = r1(r'(.+)', html) 12 | 13 | if title is None: 14 | title = url 15 | 16 | sd_urls = list(set([ 17 | unicodize(str.replace(i, '\\/', '/')) 18 | for i in re.findall(r'sd_src_no_ratelimit:"([^"]*)"', html) 19 | ])) 20 | hd_urls = list(set([ 21 | unicodize(str.replace(i, '\\/', '/')) 22 | for i in re.findall(r'hd_src_no_ratelimit:"([^"]*)"', html) 23 | ])) 24 | urls = hd_urls if hd_urls else sd_urls 25 | 26 | type, ext, size = url_info(urls[0], True) 27 | size = urls_size(urls) 28 | 29 | print_info(site_info, title, type, size) 30 | if not info_only: 31 | download_urls(urls, title, ext, size, output_dir, merge=False) 32 | 33 | site_info = "Facebook.com" 34 | download = facebook_download 35 | download_playlist = playlist_not_supported('facebook') 36 | -------------------------------------------------------------------------------- /src/you_get/extractors/huomaotv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['huomaotv_download'] 4 | 5 | from ..common import * 6 | 7 | 8 | def get_mobile_room_url(room_id): 9 | return 'http://www.huomao.com/mobile/mob_live/%s' % room_id 10 | 11 | 12 | def get_m3u8_url(stream_id): 13 | return 'http://live-ws.huomaotv.cn/live/%s/playlist.m3u8' % stream_id 14 | 15 | 16 | def huomaotv_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 17 | room_id_pattern = r'huomao.com/(\d+)' 18 | room_id = match1(url, room_id_pattern) 19 | html = get_content(get_mobile_room_url(room_id)) 20 | 21 | stream_id_pattern = r'id="html_stream" value="(\w+)"' 22 | stream_id = match1(html, stream_id_pattern) 23 | 24 | m3u8_url = get_m3u8_url(stream_id) 25 | 26 | title = match1(html, r'([^<]{1,9999})') 27 | 28 | print_info(site_info, title, 'm3u8', float('inf')) 29 | 30 | if not info_only: 31 | download_url_ffmpeg(m3u8_url, title, 'm3u8', None, output_dir=output_dir, merge=merge) 32 | 33 | 34 | site_info = 'huomao.com' 35 | download = huomaotv_download 36 | download_playlist = playlist_not_supported('huomao') 37 | -------------------------------------------------------------------------------- /src/you_get/extractors/musicplayon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from ..extractor import VideoExtractor 5 | 6 | import json 7 | 8 | class MusicPlayOn(VideoExtractor): 9 | name = "MusicPlayOn" 10 | 11 | stream_types = [ 12 | {'id': '720p HD'}, 13 | {'id': '360p SD'}, 14 | ] 15 | 16 | def prepare(self, **kwargs): 17 | content = get_content(self.url) 18 | 19 | self.title = match1(content, 20 | r'setup\[\'title\'\] = "([^"]+)";') 21 | 22 | for s in self.stream_types: 23 | quality = s['id'] 24 | src = match1(content, 25 | r'src: "([^"]+)", "data-res": "%s"' % quality) 26 | if src is not None: 27 | url = 'http://en.musicplayon.com%s' % src 28 | self.streams[quality] = {'url': url} 29 | 30 | def extract(self, **kwargs): 31 | for i in self.streams: 32 | s = self.streams[i] 33 | _, s['container'], s['size'] = url_info(s['url']) 34 | s['src'] = [s['url']] 35 | 36 | site = MusicPlayOn() 37 | download = site.download_by_url 38 | # TBD: implement download_playlist 39 | -------------------------------------------------------------------------------- /src/you_get/util/fs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import platform 4 | 5 | def legitimize(text, os=platform.system()): 6 | """Converts a string to a valid filename. 7 | """ 8 | 9 | # POSIX systems 10 | text = text.translate({ 11 | 0: None, 12 | ord('/'): '-', 13 | ord('|'): '-', 14 | }) 15 | 16 | if os == 'Windows': 17 | # Windows (non-POSIX namespace) 18 | text = text.translate({ 19 | # Reserved in Windows VFAT and NTFS 20 | ord(':'): '-', 21 | ord('*'): '-', 22 | ord('?'): '-', 23 | ord('\\'): '-', 24 | ord('\"'): '\'', 25 | # Reserved in Windows VFAT 26 | ord('+'): '-', 27 | ord('<'): '-', 28 | ord('>'): '-', 29 | ord('['): '(', 30 | ord(']'): ')', 31 | }) 32 | else: 33 | # *nix 34 | if os == 'Darwin': 35 | # Mac OS HFS+ 36 | text = text.translate({ 37 | ord(':'): '-', 38 | }) 39 | 40 | # Remove leading . 41 | if text.startswith("."): 42 | text = text[1:] 43 | 44 | text = text[:80] # Trim to 82 Unicode characters long 45 | return text 46 | -------------------------------------------------------------------------------- /src/you_get/extractors/interest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from json import loads 5 | 6 | def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 7 | #http://ch.interest.me/zhtv/VOD/View/114789 8 | #http://program.interest.me/zhtv/sonja/8/Vod/View/15794 9 | html = get_content(url) 10 | #get title 11 | title = match1(html, r'', html) 13 | vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html) 14 | assert vid 15 | 16 | xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid) 17 | 18 | from xml.dom.minidom import parseString 19 | doc = parseString(xml) 20 | tab = doc.getElementsByTagName('related')[0].firstChild 21 | 22 | for video in tab.childNodes: 23 | if re.search(contentid, video.attributes['link'].value): 24 | url = video.attributes['flv'].value 25 | break 26 | 27 | title = video.attributes['title'].value 28 | assert title 29 | 30 | type, ext, size = url_info(url) 31 | print_info(site_info, title, type, size) 32 | 33 | if not info_only: 34 | download_urls([url], title, ext, size, output_dir, merge = merge) 35 | 36 | site_info = "ehow.com" 37 | download = ehow_download 38 | download_playlist = playlist_not_supported('ehow') 39 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | ============================================== 2 | This is a copy of the MIT license. 3 | ============================================== 4 | Copyright (C) 2012-2017 Mort Yao 5 | Copyright (C) 2012 Boyu Guo 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | this software and associated documentation files (the "Software"), to deal in 9 | the Software without restriction, including without limitation the rights to 10 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | of the Software, and to permit persons to whom the Software is furnished to do 12 | so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /src/you_get/extractors/vine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['vine_download'] 4 | 5 | from ..common import * 6 | import json 7 | 8 | 9 | def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 10 | html = get_content(url) 11 | 12 | video_id = r1(r'vine.co/v/([^/]+)', url) 13 | title = r1(r'([^<]*)', html) 14 | stream = r1(r'', html) 15 | if not stream: # https://vine.co/v/.../card 16 | stream = r1(r'"videoUrl":"([^"]+)"', html) 17 | if stream: 18 | stream = stream.replace('\\/', '/') 19 | else: 20 | posts_url = 'https://archive.vine.co/posts/' + video_id + '.json' 21 | json_data = json.loads(get_content(posts_url)) 22 | stream = json_data['videoDashUrl'] 23 | title = json_data['description'] 24 | if title == "": 25 | title = json_data['username'].replace(" ", "_") + "_" + video_id 26 | 27 | mime, ext, size = url_info(stream) 28 | 29 | print_info(site_info, title, mime, size) 30 | if not info_only: 31 | download_urls([stream], title, ext, size, output_dir, merge=merge) 32 | 33 | 34 | site_info = "Vine.co" 35 | download = vine_download 36 | download_playlist = playlist_not_supported('vine') 37 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | 5 | from you_get.extractors import ( 6 | imgur, 7 | magisto, 8 | youtube, 9 | bilibili, 10 | ) 11 | 12 | 13 | class YouGetTests(unittest.TestCase): 14 | def test_imgur(self): 15 | imgur.download('http://imgur.com/WVLk5nD', info_only=True) 16 | imgur.download('http://imgur.com/gallery/WVLk5nD', info_only=True) 17 | 18 | def test_magisto(self): 19 | magisto.download( 20 | 'http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA', 21 | info_only=True 22 | ) 23 | 24 | def test_youtube(self): 25 | youtube.download( 26 | 'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True 27 | ) 28 | youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True) 29 | youtube.download( 30 | 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa 31 | info_only=True 32 | ) 33 | 34 | def test_bilibili(self): 35 | bilibili.download( 36 | 'https://www.bilibili.com/video/av16907446/', info_only=True 37 | ) 38 | bilibili.download( 39 | 'https://www.bilibili.com/video/av13228063/', info_only=True 40 | ) 41 | 42 | 43 | if __name__ == '__main__': 44 | unittest.main() 45 | -------------------------------------------------------------------------------- /src/you_get/extractors/vidto.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['vidto_download'] 4 | 5 | from ..common import * 6 | import pdb 7 | import time 8 | 9 | 10 | def vidto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 11 | html = get_content(url) 12 | params = {} 13 | r = re.findall( 14 | r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html) 15 | for name, value in r: 16 | params[name] = value 17 | data = parse.urlencode(params).encode('utf-8') 18 | req = request.Request(url) 19 | print("Please wait for 6 seconds...") 20 | time.sleep(6) 21 | print("Starting") 22 | new_html = request.urlopen(req, data).read().decode('utf-8', 'replace') 23 | new_stff = re.search('lnk_download" href="(.*?)">', new_html) 24 | if(new_stff): 25 | url = new_stff.group(1) 26 | title = params['fname'] 27 | type = "" 28 | ext = "" 29 | a, b, size = url_info(url) 30 | print_info(site_info, title, type, size) 31 | if not info_only: 32 | download_urls([url], title, ext, size, output_dir, merge=merge) 33 | else: 34 | print("cannot find link, please review") 35 | pdb.set_trace() 36 | 37 | 38 | site_info = "vidto.me" 39 | download = vidto_download 40 | download_playlist = playlist_not_supported('vidto') 41 | -------------------------------------------------------------------------------- /src/you_get/extractors/baomihua.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['baomihua_download', 'baomihua_download_by_id'] 4 | 5 | from ..common import * 6 | 7 | import urllib 8 | 9 | def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs): 10 | html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id) 11 | host = r1(r'host=([^&]*)', html) 12 | assert host 13 | type = r1(r'videofiletype=([^&]*)', html) 14 | assert type 15 | vid = r1(r'&stream_name=([^&]*)', html) 16 | assert vid 17 | dir_str = r1(r'&dir=([^&]*)', html).strip() 18 | url = "http://%s/%s/%s.%s" % (host, dir_str, vid, type) 19 | _, ext, size = url_info(url) 20 | print_info(site_info, title, type, size) 21 | if not info_only: 22 | download_urls([url], title, ext, size, output_dir, merge = merge) 23 | 24 | def baomihua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 25 | html = get_html(url) 26 | title = r1(r'(.*)', html) 27 | assert title 28 | id = r1(r'flvid\s*=\s*(\d+)', html) 29 | assert id 30 | baomihua_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only) 31 | 32 | site_info = "baomihua.com" 33 | download = baomihua_download 34 | download_playlist = playlist_not_supported('baomihua') 35 | -------------------------------------------------------------------------------- /src/you_get/extractors/dailymotion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['dailymotion_download'] 4 | 5 | from ..common import * 6 | import urllib.parse 7 | 8 | def rebuilt_url(url): 9 | path = urllib.parse.urlparse(url).path 10 | aid = path.split('/')[-1].split('_')[0] 11 | return 'http://www.dailymotion.com/embed/video/{}?autoplay=1'.format(aid) 12 | 13 | def dailymotion_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 14 | """Downloads Dailymotion videos by URL. 15 | """ 16 | 17 | html = get_content(rebuilt_url(url)) 18 | info = json.loads(match1(html, r'qualities":({.+?}),"')) 19 | title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \ 20 | match1(html, r'"title"\s*:\s*"([^"]+)"') 21 | title = unicodize(title) 22 | 23 | for quality in ['1080','720','480','380','240','144','auto']: 24 | try: 25 | real_url = info[quality][1]["url"] 26 | if real_url: 27 | break 28 | except KeyError: 29 | pass 30 | 31 | mime, ext, size = url_info(real_url) 32 | 33 | print_info(site_info, title, mime, size) 34 | if not info_only: 35 | download_urls([real_url], title, ext, size, output_dir=output_dir, merge=merge) 36 | 37 | site_info = "Dailymotion.com" 38 | download = dailymotion_download 39 | download_playlist = playlist_not_supported('dailymotion') 40 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | PROJ_NAME = 'you-get' 4 | PACKAGE_NAME = 'you_get' 5 | 6 | PROJ_METADATA = '%s.json' % PROJ_NAME 7 | 8 | import os, json, imp 9 | here = os.path.abspath(os.path.dirname(__file__)) 10 | proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read()) 11 | try: 12 | README = open(os.path.join(here, 'README.rst'), encoding='utf-8').read() 13 | except: 14 | README = "" 15 | CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst'), encoding='utf-8').read() 16 | VERSION = imp.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__ 17 | 18 | from setuptools import setup, find_packages 19 | setup( 20 | name = proj_info['name'], 21 | version = VERSION, 22 | 23 | author = proj_info['author'], 24 | author_email = proj_info['author_email'], 25 | url = proj_info['url'], 26 | license = proj_info['license'], 27 | 28 | description = proj_info['description'], 29 | keywords = proj_info['keywords'], 30 | 31 | long_description = README, 32 | 33 | packages = find_packages('src'), 34 | package_dir = {'' : 'src'}, 35 | 36 | test_suite = 'tests', 37 | 38 | platforms = 'any', 39 | zip_safe = True, 40 | include_package_data = True, 41 | 42 | classifiers = proj_info['classifiers'], 43 | 44 | entry_points = {'console_scripts': proj_info['console_scripts']} 45 | ) 46 | -------------------------------------------------------------------------------- /you-get.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "you-get", 3 | "author": "Mort Yao", 4 | "author_email": "mort.yao@gmail.com", 5 | "url": "https://you-get.org/", 6 | "license": "MIT", 7 | 8 | "description": "Dumb downloader that scrapes the web", 9 | "keywords": "video download youtube youku niconico", 10 | 11 | "classifiers": [ 12 | "Development Status :: 4 - Beta", 13 | "Environment :: Console", 14 | "Intended Audience :: Developers", 15 | "Intended Audience :: End Users/Desktop", 16 | "License :: OSI Approved :: MIT License", 17 | "Operating System :: OS Independent", 18 | "Programming Language :: Python", 19 | "Programming Language :: Python :: 3", 20 | "Programming Language :: Python :: 3 :: Only", 21 | "Programming Language :: Python :: 3.0", 22 | "Programming Language :: Python :: 3.1", 23 | "Programming Language :: Python :: 3.2", 24 | "Programming Language :: Python :: 3.3", 25 | "Programming Language :: Python :: 3.4", 26 | "Programming Language :: Python :: 3.5", 27 | "Programming Language :: Python :: 3.6", 28 | "Topic :: Internet", 29 | "Topic :: Internet :: WWW/HTTP", 30 | "Topic :: Multimedia", 31 | "Topic :: Multimedia :: Graphics", 32 | "Topic :: Multimedia :: Sound/Audio", 33 | "Topic :: Multimedia :: Video", 34 | "Topic :: Utilities" 35 | ], 36 | 37 | "console_scripts": [ 38 | "you-get = you_get.__main__:main" 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /src/you_get/extractors/yizhibo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['yizhibo_download'] 4 | 5 | from ..common import * 6 | import json 7 | import time 8 | 9 | def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 10 | video_id = url[url.rfind('/')+1:].split(".")[0] 11 | json_request_url = 'http://www.yizhibo.com/live/h5api/get_basic_live_info?scid={}'.format(video_id) 12 | content = get_content(json_request_url) 13 | error = json.loads(content)['result'] 14 | if (error != 1): 15 | raise ValueError("Error : {}".format(error)) 16 | 17 | data = json.loads(content) 18 | title = data.get('data')['live_title'] 19 | if (title == ''): 20 | title = data.get('data')['nickname'] 21 | m3u8_url = data.get('data')['play_url'] 22 | m3u8 = get_content(m3u8_url) 23 | base_url = "/".join(data.get('data')['play_url'].split("/")[:7])+"/" 24 | part_url = re.findall(r'([0-9]+\.ts)', m3u8) 25 | real_url = [] 26 | for i in part_url: 27 | url = base_url + i 28 | real_url.append(url) 29 | print_info(site_info, title, 'ts', float('inf')) 30 | if not info_only: 31 | if player: 32 | launch_player(player, [m3u8_url]) 33 | download_urls(real_url, title, 'ts', float('inf'), output_dir, merge = merge) 34 | 35 | site_info = "yizhibo.com" 36 | download = yizhibo_download 37 | download_playlist = playlist_not_supported('yizhibo') 38 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | `you-get` is currently experimenting with an aggressive approach to handling issues. Namely, a bug report must be addressed with some code via a pull request. 4 | 5 | ## Report a broken extractor 6 | 7 | **How-To:** Please open a new pull request with the following changes: 8 | 9 | * Add a new test case in [tests/test.py](https://github.com/soimort/you-get/blob/develop/tests/test.py), with the failing URL(s). 10 | 11 | The Travis CI build will (ideally) fail showing a :x:, which means you have successfully reported a broken extractor. 12 | 13 | Such a valid PR will be either *closed* if it's fixed by another PR, or *merged* if it's fixed by follow-up commits from the reporter himself/herself. 14 | 15 | ## Report other issues / Suggest a new feature 16 | 17 | **How-To:** Please open a pull request with the proposed changes directly. 18 | 19 | A valid PR need not be complete (i.e., can be WIP), but it should contain at least one sensible, nontrivial commit. 20 | 21 | ## Hints 22 | 23 | * The [`develop`](https://github.com/soimort/you-get/tree/develop) branch is where your pull request goes. 24 | * Remember to rebase. 25 | * Document your PR clearly, and if applicable, provide some sample links for reviewers to test with. 26 | * Write well-formatted, easy-to-understand commit messages. If you don't know how, look at existing ones. 27 | * We will not ask you to sign a CLA, but you must assure that your code can be legally redistributed (under the terms of the MIT license). 28 | -------------------------------------------------------------------------------- /src/you_get/extractors/naver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import urllib.request 4 | import urllib.parse 5 | import json 6 | import re 7 | 8 | from ..util import log 9 | from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size 10 | 11 | __all__ = ['naver_download_by_url'] 12 | 13 | 14 | def naver_download_by_url(url, info_only=False, **kwargs): 15 | ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}' 16 | page = get_content(url) 17 | og_video_url = re.search(r"", page).group(1) 18 | params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query) 19 | vid = params_dict['vid'][0] 20 | key = params_dict['outKey'][0] 21 | meta_str = get_content(ep.format(vid, key)) 22 | meta_json = json.loads(meta_str) 23 | if 'errorCode' in meta_json: 24 | log.wtf(meta_json['errorCode']) 25 | title = meta_json['meta']['subject'] 26 | videos = meta_json['videos']['list'] 27 | video_list = sorted(videos, key=lambda video: video['encodingOption']['width']) 28 | video_url = video_list[-1]['source'] 29 | # size = video_list[-1]['size'] 30 | # result wrong size 31 | size = url_size(video_url) 32 | print_info(site_info, title, 'mp4', size) 33 | if not info_only: 34 | download_urls([video_url], title, 'mp4', size, **kwargs) 35 | 36 | site_info = "naver.com" 37 | download = naver_download_by_url 38 | download_playlist = playlist_not_supported('naver') 39 | -------------------------------------------------------------------------------- /src/you_get/extractors/kuaishou.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import urllib.request 4 | import urllib.parse 5 | import json 6 | import re 7 | 8 | from ..util import log 9 | from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size 10 | 11 | __all__ = ['kuaishou_download_by_url'] 12 | 13 | 14 | def kuaishou_download_by_url(url, info_only=False, **kwargs): 15 | page = get_content(url) 16 | # size = video_list[-1]['size'] 17 | # result wrong size 18 | try: 19 | og_video_url = re.search(r"", page).group(1) 20 | video_url = og_video_url 21 | title = url.split('/')[-1] 22 | size = url_size(video_url) 23 | video_format = video_url.split('.')[-1] 24 | print_info(site_info, title, video_format, size) 25 | if not info_only: 26 | download_urls([video_url], title, video_format, size, **kwargs) 27 | except:# extract image 28 | og_image_url = re.search(r"", page).group(1) 29 | image_url = og_image_url 30 | title = url.split('/')[-1] 31 | size = url_size(image_url) 32 | image_format = image_url.split('.')[-1] 33 | print_info(site_info, title, image_format, size) 34 | if not info_only: 35 | download_urls([image_url], title, image_format, size, **kwargs) 36 | 37 | site_info = "kuaishou.com" 38 | download = kuaishou_download_by_url 39 | download_playlist = playlist_not_supported('kuaishou') 40 | -------------------------------------------------------------------------------- /src/you_get/extractors/suntv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['suntv_download'] 4 | 5 | from ..common import * 6 | import urllib 7 | import re 8 | 9 | def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 10 | if re.match(r'http://www.isuntv.com/\w+', url): 11 | API_URL = "http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,App_Web_playcatemp4.ascx.9f08f04f.ashx" 12 | 13 | itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html') 14 | values = {"itemid" : itemid, "vodid": ""} 15 | 16 | data = str(values).replace("'", '"') 17 | data = data.encode('utf-8') 18 | req = urllib.request.Request(API_URL, data) 19 | req.add_header('AjaxPro-Method', 'ToPlay') #important! 20 | resp = urllib.request.urlopen(req) 21 | respData = resp.read() 22 | respData = respData.decode('ascii').strip('"') #Ahhhhhhh! 23 | 24 | video_url = 'http://www.isuntv.com' + str(respData) 25 | 26 | html = get_content(url, decoded=False) 27 | html = html.decode('gbk') 28 | title = match1(html, '([^<]+)').strip() #get rid of \r\n s 29 | 30 | type_ = '' 31 | size = 0 32 | type, ext, size = url_info(video_url) 33 | 34 | print_info(site_info, title, type, size) 35 | if not info_only: 36 | download_urls([url], title, 'mp4', size, output_dir, merge=merge) 37 | 38 | site_info = "SunTV" 39 | download = suntv_download 40 | download_playlist = playlist_not_supported('suntv') 41 | -------------------------------------------------------------------------------- /src/you_get/extractors/iwara.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | __all__ = ['iwara_download'] 3 | from ..common import * 4 | headers = { 5 | 'DNT': '1', 6 | 'Accept-Encoding': 'gzip, deflate, sdch, br', 7 | 'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2', 8 | 'Upgrade-Insecure-Requests': '1', 9 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36', 10 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 11 | 'Cache-Control': 'max-age=0', 12 | 13 | 'Connection': 'keep-alive', 14 | 'Save-Data': 'on', 15 | 'Cookie':'has_js=1;show_adult=1', 16 | } 17 | 18 | def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 19 | global headers 20 | video_hash=match1(url, r'http://\w+.iwara.tv/videos/(\w+)') 21 | video_url=match1(url, r'(http://\w+.iwara.tv)/videos/\w+') 22 | html = get_content(url,headers=headers) 23 | title = r1(r'<title>(.*)', html) 24 | api_url=video_url+'/api/video/'+video_hash 25 | content=get_content(api_url,headers=headers) 26 | data=json.loads(content) 27 | type,ext,size=url_info(data[0]['uri'], headers=headers) 28 | down_urls=data[0]['uri'] 29 | print_info(down_urls,title+data[0]['resolution'],type,size) 30 | 31 | if not info_only: 32 | download_urls([down_urls], title, ext, size, output_dir, merge = merge,headers=headers) 33 | 34 | site_info = "iwara" 35 | download = iwara_download 36 | download_playlist = playlist_not_supported('iwara') 37 | -------------------------------------------------------------------------------- /src/you_get/extractors/kuwo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['kuwo_download'] 4 | 5 | from ..common import * 6 | import re 7 | 8 | def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False): 9 | html=get_content("http://player.kuwo.cn/webmusic/st/getNewMuiseByRid?rid=MUSIC_%s"%rid) 10 | title=match1(html,r"(.*)") 11 | #to get title 12 | #format =aac|mp3 ->to get aac format=mp3 ->to get mp3 13 | url=get_content("http://antiserver.kuwo.cn/anti.s?format=mp3&rid=MUSIC_%s&type=convert_url&response=url"%rid) 14 | songtype, ext, size = url_info(url) 15 | print_info(site_info, title, songtype, size) 16 | if not info_only: 17 | download_urls([url], title, ext, size, output_dir) 18 | 19 | def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 20 | html=get_content(url) 21 | matched=set(re.compile("yinyue/(\d+)").findall(html))#reduce duplicated 22 | for rid in matched: 23 | kuwo_download_by_rid(rid,output_dir,merge,info_only) 24 | 25 | 26 | 27 | def kuwo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 28 | if "www.kuwo.cn/yinyue" in url: 29 | rid=match1(url,'yinyue/(\d+)') 30 | kuwo_download_by_rid(rid,output_dir, merge, info_only) 31 | else: 32 | kuwo_playlist_download(url,output_dir,merge,info_only) 33 | 34 | site_info = "kuwo.cn" 35 | download = kuwo_download 36 | # download_playlist = playlist_not_supported("kugou") 37 | # download_playlist=playlist_not_supported("kuwo") 38 | download_playlist=kuwo_playlist_download 39 | -------------------------------------------------------------------------------- /src/you_get/extractors/veoh.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['veoh_download'] 4 | 5 | from ..common import * 6 | import urllib.error 7 | 8 | def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): 9 | '''Get item_id''' 10 | if re.match(r'http://www.veoh.com/watch/\w+', url): 11 | item_id = match1(url, r'http://www.veoh.com/watch/(\w+)') 12 | elif re.match(r'http://www.veoh.com/m/watch.php\?v=\.*', url): 13 | item_id = match1(url, r'http://www.veoh.com/m/watch.php\?v=(\w+)') 14 | else: 15 | raise NotImplementedError('Cannot find item ID') 16 | veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = info_only, **kwargs) 17 | 18 | #---------------------------------------------------------------------- 19 | def veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = False, **kwargs): 20 | """Source: Android mobile""" 21 | webpage_url = 'http://www.veoh.com/m/watch.php?v={item_id}&quality=1'.format(item_id = item_id) 22 | 23 | #grab download URL 24 | a = get_content(webpage_url, decoded=True) 25 | url = match1(a, r'(?:)?', xml) 21 | urls = re.findall(r']*>(?:)?', xml) 22 | hostpath = r1(r']*>(?:)?', xml) 23 | 24 | return name, urls, hostpath 25 | 26 | def joy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 27 | channel_id = r1(r'[^_]channelId\s*:\s*"([^\"]+)"', get_html(url)) 28 | program_id = r1(r'[^_]programId\s*:\s*"([^\"]+)"', get_html(url)) 29 | volumn_id = r1(r'[^_]videoId\s*:\s*"([^\"]+)"', get_html(url)) 30 | 31 | title, urls, hostpath = video_info(channel_id, program_id, volumn_id) 32 | urls = [hostpath + url for url in urls] 33 | 34 | size = 0 35 | for url in urls: 36 | _, ext, temp = url_info(url) 37 | size += temp 38 | 39 | print_info(site_info, title, ext, size) 40 | if not info_only: 41 | download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) 42 | 43 | site_info = "Joy.cn" 44 | download = joy_download 45 | download_playlist = playlist_not_supported('joy') 46 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Please make sure these boxes are checked before submitting your issue – thank you! 2 | 3 | - [ ] You can actually watch the video in your browser or mobile application, but not download them with `you-get`. 4 | - [ ] Your `you-get` is up-to-date. 5 | - [ ] I have read and tried to do so. 6 | - [ ] The issue is not yet reported on or . If so, please add your comments under the existing issue. 7 | - [ ] The issue (or question) is really about `you-get`, not about some other code or project. 8 | 9 | Run the command with the `--debug` option, and paste the full output inside the fences: 10 | 11 | ``` 12 | [PASTE IN ME] 13 | ``` 14 | 15 | If there's anything else you would like to say (e.g. in case your issue is not about downloading a specific video; it might as well be a general discussion or proposal for a new feature), fill in the box below; otherwise, you may want to post an emoji or meme instead: 16 | 17 | > [WRITE SOMETHING] 18 | > [OR HAVE SOME :icecream:!] 19 | 20 | 汉语翻译最终日期:2016年02月26日 21 | 22 | 在提交前,请确保您已经检查了以下内容! 23 | 24 | - [ ] 你可以在浏览器或移动端中观看视频,但不能使用`you-get`下载. 25 | - [ ] 您的`you-get`为最新版. 26 | - [ ] 我已经阅读并按 中的指引进行了操作. 27 | - [ ] 您的问题没有在 , 报告,否则请在原有issue下报告. 28 | - [ ] 本问题确实关于`you-get`, 而不是其他项目. 29 | 30 | 请使用`--debug`运行,并将输出粘贴在下面: 31 | 32 | ``` 33 | [在这里粘贴完整日志] 34 | ``` 35 | 36 | 如果您有其他附言,例如问题只在某个视频发生,或者是一般性讨论或者提出新功能,请在下面添加;或者您可以卖个萌: 37 | 38 | > [您的内容] 39 | > [舔 :icecream:!] 40 | -------------------------------------------------------------------------------- /src/you_get/extractors/panda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['panda_download'] 4 | 5 | from ..common import * 6 | from ..util.log import * 7 | import json 8 | import time 9 | 10 | def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 11 | roomid = re.search('/(\d+)', url) 12 | if roomid is None: 13 | log.wtf('Cannot found room id for this url') 14 | roomid = roomid.group(1) 15 | json_request_url ="http://www.panda.tv/api_room_v2?roomid={}&__plat=pc_web&_={}".format(roomid, int(time.time())) 16 | content = get_html(json_request_url) 17 | api_json = json.loads(content) 18 | 19 | errno = api_json["errno"] 20 | errmsg = api_json["errmsg"] 21 | if errno: 22 | raise ValueError("Errno : {}, Errmsg : {}".format(errno, errmsg)) 23 | data = api_json["data"] 24 | title = data["roominfo"]["name"] 25 | room_key = data["videoinfo"]["room_key"] 26 | plflag = data["videoinfo"]["plflag"].split("_") 27 | status = data["videoinfo"]["status"] 28 | if status is not "2": 29 | raise ValueError("The live stream is not online! (status:%s)" % status) 30 | 31 | data2 = json.loads(data["videoinfo"]["plflag_list"]) 32 | rid = data2["auth"]["rid"] 33 | sign = data2["auth"]["sign"] 34 | ts = data2["auth"]["time"] 35 | real_url = "http://pl{}.live.panda.tv/live_panda/{}.flv?sign={}&ts={}&rid={}".format(plflag[1], room_key, sign, ts, rid) 36 | 37 | print_info(site_info, title, 'flv', float('inf')) 38 | if not info_only: 39 | download_urls([real_url], title, 'flv', None, output_dir, merge = merge) 40 | 41 | site_info = "panda.tv" 42 | download = panda_download 43 | download_playlist = playlist_not_supported('panda') 44 | -------------------------------------------------------------------------------- /src/you_get/extractors/videomega.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['videomega_download'] 4 | 5 | from ..common import * 6 | import ssl 7 | 8 | def videomega_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | # Hot-plug cookie handler 10 | ssl_context = request.HTTPSHandler( 11 | context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) 12 | cookie_handler = request.HTTPCookieProcessor() 13 | opener = request.build_opener(ssl_context, cookie_handler) 14 | opener.addheaders = [('Referer', url), 15 | ('Cookie', 'noadvtday=0')] 16 | request.install_opener(opener) 17 | 18 | if re.search(r'view\.php', url): 19 | php_url = url 20 | else: 21 | content = get_content(url) 22 | m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content) 23 | ref = m.group(1) 24 | width, height = m.group(2), m.group(3) 25 | php_url = 'http://videomega.tv/view.php?ref=%s&width=%s&height=%s' % (ref, width, height) 26 | content = get_content(php_url) 27 | 28 | title = match1(content, r'(.*)') 29 | js = match1(content, r'(eval.*)') 30 | t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)') 31 | t = re.sub(r'(\w)', r'{\1}', t) 32 | t = t.translate({87 + i: str(i) for i in range(10, 36)}) 33 | s = match1(js, r"'([^']+)'\.split").split('|') 34 | src = t.format(*s) 35 | 36 | type, ext, size = url_info(src, faker=True) 37 | 38 | print_info(site_info, title, type, size) 39 | if not info_only: 40 | download_urls([src], title, ext, size, output_dir, merge=merge, faker=True) 41 | 42 | site_info = "Videomega.tv" 43 | download = videomega_download 44 | download_playlist = playlist_not_supported('videomega') 45 | -------------------------------------------------------------------------------- /src/you_get/extractors/qingting.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | 4 | from ..common import get_content, playlist_not_supported, url_size 5 | from ..extractors import VideoExtractor 6 | from ..util import log 7 | 8 | __all__ = ['qingting_download_by_url'] 9 | 10 | 11 | class Qingting(VideoExtractor): 12 | # every resource is described by its channel id and program id 13 | # so vid is tuple (chaanel_id, program_id) 14 | 15 | name = 'Qingting' 16 | stream_types = [ 17 | {'id': '_default'} 18 | ] 19 | 20 | ep = 'http://i.qingting.fm/wapi/channels/{}/programs/{}' 21 | file_host = 'http://od.qingting.fm/{}' 22 | mobile_pt = r'channels\/(\d+)\/programs/(\d+)' 23 | 24 | def prepare(self, **kwargs): 25 | if self.vid is None: 26 | hit = re.search(self.__class__.mobile_pt, self.url) 27 | self.vid = (hit.group(1), hit.group(2)) 28 | 29 | ep_url = self.__class__.ep.format(self.vid[0], self.vid[1]) 30 | meta = json.loads(get_content(ep_url)) 31 | 32 | if meta['code'] != 0: 33 | log.wtf(meta['message']['errormsg']) 34 | 35 | file_path = self.__class__.file_host.format(meta['data']['file_path']) 36 | self.title = meta['data']['name'] 37 | duration = str(meta['data']['duration']) + 's' 38 | 39 | self.streams['_default'] = {'src': [file_path], 'video_profile': duration, 'container': 'm4a'} 40 | 41 | def extract(self, **kwargs): 42 | self.streams['_default']['size'] = url_size(self.streams['_default']['src'][0]) 43 | 44 | 45 | def qingting_download_by_url(url, **kwargs): 46 | Qingting().download_by_url(url, **kwargs) 47 | 48 | site_info = 'Qingting' 49 | download = qingting_download_by_url 50 | download_playlist = playlist_not_supported('Qingting') 51 | -------------------------------------------------------------------------------- /src/you_get/extractors/soundcloud.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['soundcloud_download', 'soundcloud_download_by_id'] 4 | 5 | from ..common import * 6 | import json 7 | import urllib.error 8 | 9 | client_id = 'WKcQQdEZw7Oi01KqtHWxeVSxNyRzgT8M' 10 | 11 | def soundcloud_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False): 12 | assert title 13 | url = 'https://api.soundcloud.com/tracks/{}/{}?client_id={}'.format(id, 'stream', client_id) 14 | 15 | type, ext, size = url_info(url) 16 | 17 | print_info(site_info, title, type, size) 18 | 19 | if not info_only: 20 | download_urls([url], title, ext, size, output_dir, merge = merge) 21 | 22 | def soundcloud_i1_api(track_id): 23 | url = 'https://api.soundcloud.com/i1/tracks/{}/streams?client_id={}'.format(track_id, client_id) 24 | return json.loads(get_content(url))['http_mp3_128_url'] 25 | 26 | def soundcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 27 | url = 'https://api.soundcloud.com/resolve.json?url={}&client_id={}'.format(url, client_id) 28 | metadata = get_content(url) 29 | info = json.loads(metadata) 30 | title = info["title"] 31 | real_url = info.get('download_url') 32 | if real_url is None: 33 | real_url = info.get('steram_url') 34 | if real_url is None: 35 | raise Exception('Cannot get media URI for {}'.format(url)) 36 | real_url = soundcloud_i1_api(info['id']) 37 | mime, ext, size = url_info(real_url) 38 | print_info(site_info, title, mime, size) 39 | if not info_only: 40 | download_urls([real_url], title, ext, size, output_dir, merge=merge) 41 | 42 | site_info = "SoundCloud.com" 43 | download = soundcloud_download 44 | download_playlist = playlist_not_supported('soundcloud') 45 | -------------------------------------------------------------------------------- /src/you_get/extractors/mtv81.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['mtv81_download'] 4 | 5 | from ..common import * 6 | 7 | from xml.dom.minidom import parseString 8 | 9 | from html.parser import HTMLParser 10 | 11 | 12 | def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 13 | html = get_content(url) 14 | title = HTMLParser().unescape( 15 | "|".join(match1(html, r"(.*?)").split("|")[:-2])) 16 | 17 | # mgid%3Auma%3Avideo%3Amtv81.com%3A897974 18 | vid = match1(html, r'getTheVideo\("(.*?)"') 19 | xml = parseString( 20 | get_content("http://intl.esperanto.mtvi.com/www/xml/media/mediaGen.jhtml?uri={}&flashPlayer=LNX%2013,0,0,206&geo=CN&sid=123456".format(vid))) 21 | 22 | url = sorted( 23 | map(lambda x: x.firstChild.nodeValue, xml.getElementsByTagName("src")), 24 | key=lambda x: int(match1(x, r'_(\d+?)_')))[-1] 25 | 26 | mediatype, ext, size = 'mp4', 'mp4', 0 27 | print_info(site_info, title, mediatype, size) 28 | # 29 | # rtmpdump -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf 30 | # 31 | # because rtmpdump is unstable,may try serveral times 32 | # 33 | if not info_only: 34 | # import pdb 35 | # pdb.set_trace() 36 | download_rtmp_url(url=url, title=title, ext=ext, params={ 37 | "--swfVfy": "http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf"}, output_dir=output_dir) 38 | 39 | 40 | site_info = "mtv81.com" 41 | download = mtv81_download 42 | download_playlist = playlist_not_supported('mtv81') 43 | -------------------------------------------------------------------------------- /src/you_get/extractors/qq_egame.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | 4 | from ..common import get_content 5 | from ..extractors import VideoExtractor 6 | from ..util import log 7 | from ..util.strings import unescape_html 8 | 9 | __all__ = ['qq_egame_download'] 10 | 11 | 12 | class QQEgame(VideoExtractor): 13 | stream_types = [ 14 | {'id': 'original', 'video_profile': '0', 'container': 'flv'}, 15 | {'id': '900', 'video_profile': '900kb/s', 'container': 'flv'}, 16 | {'id': '550', 'video_profile': '550kb/s', 'container': 'flv'} 17 | ] 18 | name = 'QQEgame' 19 | 20 | def prepare(self, **kwargs): 21 | page = get_content(self.url) 22 | server_data = re.search(r'serverData\s*=\s*({.+?});', page) 23 | if server_data is None: 24 | log.wtf('cannot find server_data') 25 | json_data = json.loads(server_data.group(1)) 26 | live_info = json_data['liveInfo']['data'] 27 | self.title = '{}_{}'.format(live_info['profileInfo']['nickName'], live_info['videoInfo']['title']) 28 | for exsited_stream in live_info['videoInfo']['streamInfos']: 29 | for s in self.__class__.stream_types: 30 | if re.search(r'(\d+)', s['video_profile']).group(1) == exsited_stream['bitrate']: 31 | current_stream_id = s['id'] 32 | stream_info = dict(src=[unescape_html(exsited_stream['playUrl'])]) 33 | stream_info['video_profile'] = exsited_stream['desc'] 34 | stream_info['container'] = s['container'] 35 | stream_info['size'] = float('inf') 36 | self.streams[current_stream_id] = stream_info 37 | 38 | 39 | def qq_egame_download(url, **kwargs): 40 | QQEgame().download_by_url(url, **kwargs) 41 | # url dispatching has been done in qq.py 42 | -------------------------------------------------------------------------------- /src/you_get/extractors/pinterest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from ..extractor import VideoExtractor 5 | 6 | class Pinterest(VideoExtractor): 7 | # site name 8 | name = "Pinterest" 9 | 10 | # ordered list of supported stream types / qualities on this site 11 | # order: high quality -> low quality 12 | stream_types = [ 13 | {'id': 'original'}, # contains an 'id' or 'itag' field at minimum 14 | {'id': 'small'}, 15 | ] 16 | 17 | def prepare(self, **kwargs): 18 | # scrape the html 19 | content = get_content(self.url) 20 | 21 | # extract title 22 | self.title = match1(content, 23 | r'(.+?)', html) 35 | #title = unicodize(r1(r']*>([^<]+)', html)) 36 | 37 | vid = url.split('/')[-1].split('?')[0] 38 | api_html = get_html('http://flapi.nicovideo.jp/api/getflv?v=%s' % vid) 39 | real_url = parse.unquote(r1(r'url=([^&]+)&', api_html)) 40 | 41 | type, ext, size = url_info(real_url) 42 | 43 | print_info(site_info, title, type, size) 44 | if not info_only: 45 | download_urls([real_url], title, ext, size, output_dir, merge = merge) 46 | 47 | site_info = "Nicovideo.jp" 48 | download = nicovideo_download 49 | download_playlist = playlist_not_supported('nicovideo') 50 | -------------------------------------------------------------------------------- /src/you_get/processor/rtmpdump.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os.path 4 | import subprocess 5 | 6 | def get_usable_rtmpdump(cmd): 7 | try: 8 | p = subprocess.Popen([cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 9 | out, err = p.communicate() 10 | return cmd 11 | except: 12 | return None 13 | 14 | RTMPDUMP = get_usable_rtmpdump('rtmpdump') 15 | 16 | def has_rtmpdump_installed(): 17 | return RTMPDUMP is not None 18 | 19 | # 20 | #params ={"-y":"playlist","-q":None,} 21 | #if Only Key ,Value should be None 22 | #-r -o should not be included in params 23 | 24 | def download_rtmpdump_stream(url, title, ext,params={},output_dir='.'): 25 | filename = '%s.%s' % (title, ext) 26 | filepath = os.path.join(output_dir, filename) 27 | 28 | cmdline = [RTMPDUMP, '-r'] 29 | cmdline.append(url) 30 | cmdline.append('-o') 31 | cmdline.append(filepath) 32 | 33 | for key in params.keys(): 34 | cmdline.append(key) 35 | if params[key]!=None: 36 | cmdline.append(params[key]) 37 | 38 | # cmdline.append('-y') 39 | # cmdline.append(playpath) 40 | print("Call rtmpdump:\n"+" ".join(cmdline)+"\n") 41 | subprocess.call(cmdline) 42 | return 43 | 44 | # 45 | def play_rtmpdump_stream(player, url, params={}): 46 | 47 | #construct left side of pipe 48 | cmdline = [RTMPDUMP, '-r'] 49 | cmdline.append(url) 50 | 51 | #append other params if exist 52 | for key in params.keys(): 53 | cmdline.append(key) 54 | if params[key]!=None: 55 | cmdline.append(params[key]) 56 | 57 | cmdline.append('-o') 58 | cmdline.append('-') 59 | 60 | #pipe start 61 | cmdline.append('|') 62 | cmdline.append(player) 63 | cmdline.append('-') 64 | 65 | #logging 66 | print("Call rtmpdump:\n"+" ".join(cmdline)+"\n") 67 | 68 | #call RTMPDump! 69 | subprocess.call(cmdline) 70 | 71 | # os.system("rtmpdump -r '%s' -y '%s' -o - | %s -" % (url, playpath, player)) 72 | return 73 | -------------------------------------------------------------------------------- /src/you_get/extractors/yinyuetai.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['yinyuetai_download', 'yinyuetai_download_by_id'] 4 | 5 | from ..common import * 6 | 7 | def yinyuetai_download_by_id(vid, title=None, output_dir='.', merge=True, info_only=False): 8 | video_info = json.loads(get_html('http://www.yinyuetai.com/insite/get-video-info?json=true&videoId=%s' % vid)) 9 | url_models = video_info['videoInfo']['coreVideoInfo']['videoUrlModels'] 10 | url_models = sorted(url_models, key=lambda i: i['qualityLevel']) 11 | url = url_models[-1]['videoUrl'] 12 | type = ext = r1(r'\.(flv|mp4)', url) 13 | _, _, size = url_info(url) 14 | 15 | print_info(site_info, title, type, size) 16 | if not info_only: 17 | download_urls([url], title, ext, size, output_dir, merge = merge) 18 | 19 | def yinyuetai_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 20 | id = r1(r'http://\w+.yinyuetai.com/video/(\d+)', url) or \ 21 | r1(r'http://\w+.yinyuetai.com/video/h5/(\d+)', url) 22 | if not id: 23 | yinyuetai_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only) 24 | return 25 | 26 | html = get_html(url, 'utf-8') 27 | title = r1(r'', html) or r1(r'(.*)', html) 28 | assert title 29 | title = parse.unquote(title) 30 | title = escape_file_path(title) 31 | yinyuetai_download_by_id(id, title, output_dir, merge=merge, info_only=info_only) 32 | 33 | def yinyuetai_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs): 34 | playlist = r1(r'http://\w+.yinyuetai.com/playlist/(\d+)', url) 35 | html = get_html(url) 36 | data_ids = re.findall(r'data-index="\d+"\s*data-id=(\d+)', html) 37 | for data_id in data_ids: 38 | yinyuetai_download('http://v.yinyuetai.com/video/' + data_id, 39 | output_dir=output_dir, merge=merge, info_only=info_only) 40 | 41 | site_info = "YinYueTai.com" 42 | download = yinyuetai_download 43 | download_playlist = yinyuetai_download_playlist 44 | -------------------------------------------------------------------------------- /src/you_get/extractors/fantasy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['fantasy_download'] 4 | 5 | from ..common import * 6 | import json 7 | import random 8 | from urllib.parse import urlparse, parse_qs 9 | 10 | 11 | def fantasy_download_by_id_channelId(id = 0, channelId = 0, output_dir = '.', merge = True, info_only = False, 12 | **kwargs): 13 | api_url = 'http://www.fantasy.tv/tv/playDetails.action?' \ 14 | 'myChannelId=1&id={id}&channelId={channelId}&t={t}'.format(id = id, 15 | channelId = channelId, 16 | t = str(random.random()) 17 | ) 18 | html = get_content(api_url) 19 | html = json.loads(html) 20 | 21 | if int(html['status']) != 100000: 22 | raise Exception('API error!') 23 | 24 | title = html['data']['tv']['title'] 25 | 26 | video_url = html['data']['tv']['videoPath'] 27 | headers = fake_headers.copy() 28 | headers['Referer'] = api_url 29 | type, ext, size = url_info(video_url, headers=headers) 30 | 31 | print_info(site_info, title, type, size) 32 | if not info_only: 33 | download_urls([video_url], title, ext, size, output_dir, merge = merge, headers = headers) 34 | 35 | 36 | def fantasy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 37 | if 'fantasy.tv' not in url: 38 | raise Exception('Wrong place!') 39 | 40 | q = parse_qs(urlparse(url).query) 41 | 42 | if 'tvId' not in q or 'channelId' not in q: 43 | raise Exception('No enough arguments!') 44 | 45 | tvId = q['tvId'][0] 46 | channelId = q['channelId'][0] 47 | 48 | fantasy_download_by_id_channelId(id = tvId, channelId = channelId, output_dir = output_dir, merge = merge, 49 | info_only = info_only, **kwargs) 50 | 51 | 52 | site_info = "fantasy.tv" 53 | download = fantasy_download 54 | download_playlist = playlist_not_supported('fantasy.tv') 55 | -------------------------------------------------------------------------------- /src/you_get/extractors/infoq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from ..extractor import VideoExtractor 5 | 6 | import ssl 7 | 8 | class Infoq(VideoExtractor): 9 | name = "InfoQ" 10 | 11 | stream_types = [ 12 | {'id': 'video'}, 13 | {'id': 'audio'}, 14 | {'id': 'slides'} 15 | ] 16 | 17 | def prepare(self, **kwargs): 18 | content = get_content(self.url) 19 | self.title = match1(content, r'<title>([^<]+)') 20 | s = match1(content, r'P\.s\s*=\s*\'([^\']+)\'') 21 | scp = match1(content, r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'') 22 | scs = match1(content, r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'') 23 | sck = match1(content, r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'') 24 | 25 | mp3 = match1(content, r'name="filename"\s*value="([^"]+\.mp3)"') 26 | if mp3: mp3 = 'http://res.infoq.com/downloads/mp3downloads/%s' % mp3 27 | 28 | pdf = match1(content, r'name="filename"\s*value="([^"]+\.pdf)"') 29 | if pdf: pdf = 'http://res.infoq.com/downloads/pdfdownloads/%s' % pdf 30 | 31 | # cookie handler 32 | ssl_context = request.HTTPSHandler( 33 | context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) 34 | cookie_handler = request.HTTPCookieProcessor() 35 | opener = request.build_opener(ssl_context, cookie_handler) 36 | opener.addheaders = [ 37 | ('Referer', self.url), 38 | ('Cookie', 39 | 'CloudFront-Policy=%s;CloudFront-Signature=%s;CloudFront-Key-Pair-Id=%s' % (scp, scs, sck)) 40 | ] 41 | request.install_opener(opener) 42 | 43 | if s: self.streams['video'] = {'url': s } 44 | if mp3: self.streams['audio'] = { 'url': mp3 } 45 | if pdf: self.streams['slides'] = { 'url': pdf } 46 | 47 | def extract(self, **kwargs): 48 | for i in self.streams: 49 | s = self.streams[i] 50 | _, s['container'], s['size'] = url_info(s['url']) 51 | s['src'] = [s['url']] 52 | 53 | site = Infoq() 54 | download = site.download_by_url 55 | download_playlist = site.download_by_url 56 | -------------------------------------------------------------------------------- /src/you_get/extractors/nanagogo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['nanagogo_download'] 4 | 5 | from ..common import * 6 | from .universal import * 7 | 8 | def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | if re.match(r'https?://stat.7gogo.jp', url): 10 | universal_download(url, output_dir, merge=merge, info_only=info_only) 11 | return 12 | 13 | talk_id = r1(r'7gogo.jp/([^/]+)/', url) 14 | post_id = r1(r'7gogo.jp/[^/]+/(\d+)', url) 15 | title = '%s_%s' % (talk_id, post_id) 16 | api_url = 'https://api.7gogo.jp/web/v2/talks/%s/posts/%s' % (talk_id, post_id) 17 | info = json.loads(get_content(api_url)) 18 | 19 | items = [] 20 | if info['data']['posts']['post'] is None: 21 | return 22 | if info['data']['posts']['post']['body'] is None: 23 | return 24 | for i in info['data']['posts']['post']['body']: 25 | if 'image' in i: 26 | image_url = i['image'] 27 | if image_url[:2] == '//': continue # skip stamp images 28 | _, ext, size = url_info(image_url) 29 | items.append({'title': title, 30 | 'url': image_url, 31 | 'ext': ext, 32 | 'size': size}) 33 | elif 'movieUrlHq' in i: 34 | movie_url = i['movieUrlHq'] 35 | _, ext, size = url_info(movie_url) 36 | items.append({'title': title, 37 | 'url': movie_url, 38 | 'ext': ext, 39 | 'size': size}) 40 | 41 | size = sum([i['size'] for i in items]) 42 | if size == 0: return # do not fail the whole process 43 | print_info(site_info, title, ext, size) 44 | if not info_only: 45 | for i in items: 46 | print_info(site_info, i['title'], i['ext'], i['size']) 47 | download_urls([i['url']], i['title'], i['ext'], i['size'], 48 | output_dir=output_dir, 49 | merge=merge) 50 | 51 | site_info = "7gogo.jp" 52 | download = nanagogo_download 53 | download_playlist = playlist_not_supported('nanagogo') 54 | -------------------------------------------------------------------------------- /src/you_get/extractors/miomio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['miomio_download'] 4 | 5 | from ..common import * 6 | 7 | from .tudou import tudou_download_by_id 8 | from .youku import youku_download_by_vid 9 | from xml.dom.minidom import parseString 10 | 11 | def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 12 | html = get_html(url) 13 | 14 | title = r1(r'list 43 | Convert XML to URL List. 44 | From Biligrab. 45 | """ 46 | rawurl = [] 47 | dom = parseString(xml_data) 48 | for node in dom.getElementsByTagName('durl'): 49 | url = node.getElementsByTagName('url')[0] 50 | rawurl.append(url.childNodes[0].data) 51 | return rawurl 52 | 53 | site_info = "MioMio.tv" 54 | download = miomio_download 55 | download_playlist = playlist_not_supported('miomio') 56 | -------------------------------------------------------------------------------- /src/you_get/extractors/kugou.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['kugou_download'] 4 | 5 | from ..common import * 6 | from json import loads 7 | from base64 import b64decode 8 | import re 9 | import hashlib 10 | 11 | def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs): 12 | if url.lower().find("5sing")!=-1: 13 | #for 5sing.kugou.com 14 | html=get_html(url) 15 | ticket=r1(r'"ticket":\s*"(.*)"',html) 16 | j=loads(str(b64decode(ticket),encoding="utf-8")) 17 | url=j['file'] 18 | title=j['songName'] 19 | songtype, ext, size = url_info(url) 20 | print_info(site_info, title, songtype, size) 21 | if not info_only: 22 | download_urls([url], title, ext, size, output_dir, merge=merge) 23 | else: 24 | #for the www.kugou.com/ 25 | return kugou_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only) 26 | # raise NotImplementedError(url) 27 | 28 | def kugou_download_by_hash(title,hash_val,output_dir = '.', merge = True, info_only = False): 29 | #sample 30 | #url_sample:http://www.kugou.com/yy/album/single/536957.html 31 | #hash ->key md5(hash+kgcloud")->key decompile swf 32 | #cmd 4 for mp3 cmd 3 for m4a 33 | key=hashlib.new('md5',(hash_val+"kgcloud").encode("utf-8")).hexdigest() 34 | html=get_html("http://trackercdn.kugou.com/i/?pid=6&key=%s&acceptMp3=1&cmd=4&hash=%s"%(key,hash_val)) 35 | j=loads(html) 36 | url=j['url'] 37 | songtype, ext, size = url_info(url) 38 | print_info(site_info, title, songtype, size) 39 | if not info_only: 40 | download_urls([url], title, ext, size, output_dir, merge=merge) 41 | 42 | def kugou_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs): 43 | html=get_html(url) 44 | pattern=re.compile('title="(.*?)".* data="(\w*)\|.*?"') 45 | pairs=pattern.findall(html) 46 | for title,hash_val in pairs: 47 | kugou_download_by_hash(title,hash_val,output_dir,merge,info_only) 48 | 49 | 50 | site_info = "kugou.com" 51 | download = kugou_download 52 | # download_playlist = playlist_not_supported("kugou") 53 | download_playlist=kugou_download_playlist 54 | -------------------------------------------------------------------------------- /src/you_get/extractors/miaopai.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['miaopai_download'] 4 | 5 | from ..common import * 6 | import urllib.error 7 | import urllib.parse 8 | 9 | fake_headers_mobile = { 10 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 11 | 'Accept-Charset': 'UTF-8,*;q=0.5', 12 | 'Accept-Encoding': 'gzip,deflate,sdch', 13 | 'Accept-Language': 'en-US,en;q=0.8', 14 | 'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36' 15 | } 16 | 17 | def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs): 18 | '''Source: Android mobile''' 19 | page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4' 20 | 21 | mobile_page = get_content(page_url, headers=fake_headers_mobile) 22 | url = match1(mobile_page, r'