├── tests ├── __init__.py ├── test_common.py ├── test_util.py └── test.py ├── src └── you_get │ ├── util │ ├── __init__.py │ ├── term.py │ ├── strings.py │ ├── os.py │ ├── fs.py │ ├── git.py │ └── log.py │ ├── cli_wrapper │ ├── __init__.py │ ├── player │ │ ├── wmp.py │ │ ├── mplayer.py │ │ ├── dragonplayer.py │ │ ├── gnome_mplayer.py │ │ ├── vlc.py │ │ ├── __init__.py │ │ └── __main__.py │ ├── openssl │ │ └── __init__.py │ ├── transcoder │ │ ├── libav.py │ │ ├── __init__.py │ │ ├── ffmpeg.py │ │ └── mencoder.py │ └── downloader │ │ └── __init__.py │ ├── version.py │ ├── processor │ ├── __init__.py │ ├── join_ts.py │ └── rtmpdump.py │ ├── __init__.py │ ├── extractors │ ├── khan.py │ ├── alive.py │ ├── archive.py │ ├── cbs.py │ ├── freesound.py │ ├── bandcamp.py │ ├── magisto.py │ ├── giphy.py │ ├── heavymusic.py │ ├── ted.py │ ├── metacafe.py │ ├── mixcloud.py │ ├── iqilu.py │ ├── theplatform.py │ ├── huomaotv.py │ ├── facebook.py │ ├── interest.py │ ├── ehow.py │ ├── dailymotion.py │ ├── yizhibo.py │ ├── suntv.py │ ├── veoh.py │ ├── qq_egame.py │ ├── kuwo.py │ ├── w56.py │ ├── naver.py │ ├── joy.py │ ├── qingting.py │ ├── mtv81.py │ ├── kuaishou.py │ ├── pinterest.py │ ├── xinpianchang.py │ ├── baomihua.py │ ├── ifeng.py │ ├── nicovideo.py │ ├── kakao.py │ ├── tiktok.py │ ├── infoq.py │ ├── zhibo.py │ ├── nanagogo.py │ ├── miomio.py │ ├── douyin.py │ ├── __init__.py │ ├── iwara.py │ ├── zhanqi.py │ ├── douban.py │ ├── fc2video.py │ ├── vk.py │ ├── bigthink.py │ ├── soundcloud.py │ ├── instagram.py │ ├── lizhi.py │ ├── tucao.py │ ├── longzhu.py │ ├── cntv.py │ ├── toutiao.py │ ├── sohu.py │ ├── douyutv.py │ ├── ku6.py │ ├── imgur.py │ ├── lrts.py │ ├── qie_video.py │ ├── twitter.py │ ├── bokecc.py │ ├── zhihu.py │ ├── showroom.py │ ├── pixnet.py │ ├── kugou.py │ ├── qie.py │ ├── ckplayer.py │ ├── coub.py │ ├── ximalaya.py │ ├── tudou.py │ ├── yixia.py │ ├── sina.py │ └── wanmen.py │ ├── json_output.py │ └── __main__.py ├── requirements.txt ├── SECURITY.md ├── setup.cfg ├── you-get.plugin.zsh ├── MANIFEST.in ├── you-get ├── Makefile ├── contrib └── completion │ ├── you-get-completion.bash │ ├── _you-get │ └── you-get.fish ├── CONTRIBUTING.md ├── LICENSE.txt ├── .gitignore ├── you-get.json ├── .github └── workflows │ └── python-package.yml ├── setup.py └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/wmp.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/openssl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/mplayer.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/libav.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/downloader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/dragonplayer.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/gnome_mplayer.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/ffmpeg.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/mencoder.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # runtime dependencies 2 | dukpy 3 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/vlc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .mplayer import * 4 | -------------------------------------------------------------------------------- /src/you_get/version.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | script_name = 'you-get' 4 | __version__ = '0.4.1743' 5 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | Please report security issues to . 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [build] 2 | force = 0 3 | 4 | [global] 5 | verbose = 0 6 | 7 | [egg_info] 8 | tag_build = 9 | tag_date = 0 10 | tag_svn_revision = 0 11 | -------------------------------------------------------------------------------- /you-get.plugin.zsh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zsh 2 | alias you-get="noglob python3 $(dirname $0)/you-get" 3 | alias you-vlc="noglob python3 $(dirname $0)/you-get --player vlc" 4 | -------------------------------------------------------------------------------- /src/you_get/processor/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .join_flv import concat_flv 4 | from .join_mp4 import concat_mp4 5 | from .ffmpeg import * 6 | from .rtmpdump import * 7 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst 2 | include *.txt 3 | include Makefile 4 | include CONTRIBUTING.md 5 | include README.md 6 | include you-get 7 | include you-get.json 8 | include you-get.plugin.zsh 9 | recursive-include contrib * 10 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' WIP 4 | def main(): 5 | script_main('you-get', any_download, any_download_playlist) 6 | 7 | if __name__ == "__main__": 8 | main() 9 | ''' 10 | -------------------------------------------------------------------------------- /src/you_get/util/term.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | def get_terminal_size(): 4 | """Get (width, height) of the current terminal.""" 5 | try: 6 | import fcntl, termios, struct # fcntl module only available on Unix 7 | return struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234')) 8 | except: 9 | return (40, 80) 10 | -------------------------------------------------------------------------------- /tests/test_common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | 5 | from you_get.common import * 6 | 7 | class TestCommon(unittest.TestCase): 8 | 9 | def test_match1(self): 10 | self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)'), '1234567890A') 11 | self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)', r'youtu.(\w+)'), ['1234567890A', 'be']) 12 | -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | 5 | from you_get.util.fs import * 6 | 7 | class TestUtil(unittest.TestCase): 8 | def test_legitimize(self): 9 | self.assertEqual(legitimize("1*2", os="linux"), "1*2") 10 | self.assertEqual(legitimize("1*2", os="mac"), "1*2") 11 | self.assertEqual(legitimize("1*2", os="windows"), "1-2") 12 | self.assertEqual(legitimize("1*2", os="wsl"), "1-2") 13 | -------------------------------------------------------------------------------- /src/you_get/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This file is Python 2 compliant. 3 | 4 | import sys 5 | 6 | if sys.version_info[0] == 3: 7 | #from .extractor import Extractor, VideoExtractor 8 | #from .util import log 9 | 10 | from .__main__ import * 11 | 12 | #from .common import * 13 | #from .version import * 14 | #from .cli_wrapper import * 15 | #from .extractor import * 16 | else: 17 | # Don't import anything. 18 | pass 19 | -------------------------------------------------------------------------------- /you-get: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, sys 3 | 4 | _srcdir = '%s/src/' % os.path.dirname(os.path.realpath(__file__)) 5 | _filepath = os.path.dirname(sys.argv[0]) 6 | sys.path.insert(1, os.path.join(_filepath, _srcdir)) 7 | 8 | if sys.version_info[0] == 3: 9 | import you_get 10 | if __name__ == '__main__': 11 | you_get.main(repo_path=_filepath) 12 | else: # Python 2 13 | from you_get.util import log 14 | log.e("[fatal] Python 3 is required!") 15 | log.wtf("try to run this script using 'python3 you-get'.") 16 | -------------------------------------------------------------------------------- /src/you_get/extractors/khan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['khan_download'] 4 | 5 | from ..common import * 6 | from .youtube import YouTube 7 | 8 | def khan_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | html = get_content(url) 10 | youtube_url = re.search('>> import you_get" % you_get.version.__version__)') 7 | 8 | test: 9 | (cd src; python -m unittest discover -s ../tests) 10 | 11 | clean: 12 | zenity --question 13 | rm -fr build/ dist/ src/*.egg-info/ 14 | find . | grep __pycache__ | xargs rm -fr 15 | find . | grep .pyc | xargs rm -f 16 | 17 | all: build 18 | 19 | html: 20 | pandoc README.md > README.html 21 | 22 | rst: 23 | pandoc -s -t rst README.md > README.rst 24 | 25 | build: 26 | python -m build 27 | 28 | install: 29 | python -m pip install . 30 | 31 | release: build 32 | @echo 'Upload new version to PyPI using:' 33 | @echo ' twine upload --sign dist/you_get-VERSION*' 34 | -------------------------------------------------------------------------------- /src/you_get/extractors/bandcamp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['bandcamp_download'] 4 | 5 | from ..common import * 6 | 7 | def bandcamp_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | html = get_html(url) 9 | trackinfo = json.loads(r1(r'(\[{"(video_poster_url|video_caption)".*}\]),', html)) 10 | for track in trackinfo: 11 | track_num = track['track_num'] 12 | title = '%s. %s' % (track_num, track['title']) 13 | file_url = 'http:' + track['file']['mp3-128'] 14 | mime, ext, size = url_info(file_url) 15 | 16 | print_info(site_info, title, mime, size) 17 | if not info_only: 18 | download_urls([file_url], title, ext, size, output_dir, merge=merge) 19 | 20 | site_info = "Bandcamp.com" 21 | download = bandcamp_download 22 | download_playlist = bandcamp_download 23 | -------------------------------------------------------------------------------- /src/you_get/util/strings.py: -------------------------------------------------------------------------------- 1 | try: 2 | # py 3.4 3 | from html import unescape as unescape_html 4 | except ImportError: 5 | import re 6 | from html.entities import entitydefs 7 | 8 | def unescape_html(string): 9 | '''HTML entity decode''' 10 | string = re.sub(r'&#[^;]+;', _sharp2uni, string) 11 | string = re.sub(r'&[^;]+;', lambda m: entitydefs[m.group(0)[1:-1]], string) 12 | return string 13 | 14 | def _sharp2uni(m): 15 | '''&#...; ==> unicode''' 16 | s = m.group(0)[2:].rstrip(';;') 17 | if s.startswith('x'): 18 | return chr(int('0'+s, 16)) 19 | else: 20 | return chr(int(s)) 21 | 22 | from .fs import legitimize 23 | 24 | def get_filename(htmlstring): 25 | return legitimize(unescape_html(htmlstring)) 26 | 27 | def parameterize(string): 28 | return "'%s'" % string.replace("'", r"'\''") 29 | -------------------------------------------------------------------------------- /src/you_get/extractors/magisto.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['magisto_download'] 4 | 5 | from ..common import * 6 | import json 7 | 8 | def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | html = get_html(url) 10 | 11 | video_hash = r1(r'video\/([a-zA-Z0-9]+)', url) 12 | api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash) 13 | content = get_html(api_url) 14 | data = json.loads(content) 15 | title1 = data['title'] 16 | title2 = data['creator'] 17 | title = "%s - %s" % (title1, title2) 18 | url = data['video_direct_url'] 19 | type, ext, size = url_info(url) 20 | 21 | print_info(site_info, title, type, size) 22 | if not info_only: 23 | download_urls([url], title, ext, size, output_dir, merge=merge) 24 | 25 | site_info = "Magisto.com" 26 | download = magisto_download 27 | download_playlist = playlist_not_supported('magisto') 28 | -------------------------------------------------------------------------------- /src/you_get/util/os.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from platform import system 4 | 5 | def detect_os(): 6 | """Detect operating system. 7 | """ 8 | 9 | # Inspired by: 10 | # https://github.com/scivision/pybashutils/blob/78b7f2b339cb03b1c37df94015098bbe462f8526/pybashutils/windows_linux_detect.py 11 | 12 | syst = system().lower() 13 | os = 'unknown' 14 | 15 | if 'cygwin' in syst: 16 | os = 'cygwin' 17 | elif 'darwin' in syst: 18 | os = 'mac' 19 | elif 'linux' in syst: 20 | os = 'linux' 21 | # detect WSL https://github.com/Microsoft/BashOnWindows/issues/423 22 | try: 23 | with open('/proc/version', 'r') as f: 24 | if 'microsoft' in f.read().lower(): 25 | os = 'wsl' 26 | except: pass 27 | elif 'windows' in syst: 28 | os = 'windows' 29 | elif 'bsd' in syst: 30 | os = 'bsd' 31 | 32 | return os 33 | -------------------------------------------------------------------------------- /src/you_get/extractors/giphy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['giphy_download'] 4 | 5 | from ..common import * 6 | 7 | def giphy_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | html = get_html(url) 9 | 10 | url = list(set([ 11 | unicodize(str.replace(i, '\\/', '/')) 12 | for i in re.findall(r'', html) 13 | ])) 14 | 15 | title = r1(r'', html) 16 | 17 | if title is None: 18 | title = url[0] 19 | 20 | type, ext, size = url_info(url[0], True) 21 | size = urls_size(url) 22 | 23 | type = "video/mp4" 24 | ext = "mp4" 25 | 26 | print_info(site_info, title, type, size) 27 | if not info_only: 28 | download_urls(url, title, ext, size, output_dir, merge=False) 29 | 30 | site_info = "Giphy.com" 31 | download = giphy_download 32 | download_playlist = playlist_not_supported('giphy') 33 | -------------------------------------------------------------------------------- /src/you_get/extractors/heavymusic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['heavymusic_download'] 4 | 5 | from ..common import * 6 | 7 | def heavymusic_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | html = get_html(url) 9 | tracks = re.findall(r'href="(online2\.php[^"]+)"', html) 10 | for track in tracks: 11 | band = r1(r'band=([^&]*)', track) 12 | album = r1(r'album=([^&]*)', track) 13 | title = r1(r'track=([^&]*)', track) 14 | file_url = 'http://www.heavy-music.ru/online2.php?band=%s&album=%s&track=%s' % (parse.quote(band), parse.quote(album), parse.quote(title)) 15 | _, _, size = url_info(file_url) 16 | 17 | print_info(site_info, title, 'mp3', size) 18 | if not info_only: 19 | download_urls([file_url], title[:-4], 'mp3', size, output_dir, merge=merge) 20 | 21 | site_info = "heavy-music.ru" 22 | download = heavymusic_download 23 | download_playlist = heavymusic_download 24 | -------------------------------------------------------------------------------- /src/you_get/extractors/ted.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['ted_download'] 4 | 5 | from ..common import * 6 | import json 7 | 8 | def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | html = get_html(url) 10 | patt = r'"__INITIAL_DATA__"\s*:\s*\{(.+)\}' 11 | metadata = json.loads('{' + match1(html, patt) + '}') 12 | title = metadata['talks'][0]['title'] 13 | nativeDownloads = metadata['talks'][0]['downloads']['nativeDownloads'] 14 | for quality in ['high', 'medium', 'low']: 15 | if quality in nativeDownloads: 16 | url = nativeDownloads[quality] 17 | type, ext, size = url_info(url) 18 | print_info(site_info, title, type, size) 19 | if not info_only: 20 | download_urls([url], title, ext, size, output_dir, merge=merge) 21 | break 22 | 23 | site_info = "TED.com" 24 | download = ted_download 25 | download_playlist = playlist_not_supported('ted') 26 | -------------------------------------------------------------------------------- /src/you_get/extractors/metacafe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['metacafe_download'] 4 | 5 | from ..common import * 6 | import urllib.error 7 | from urllib.parse import unquote 8 | 9 | def metacafe_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 10 | if re.match(r'http://www.metacafe.com/watch/\w+', url): 11 | html =get_content(url) 12 | title = r1(r'([^<]{1,9999})') 27 | 28 | print_info(site_info, title, 'm3u8', float('inf')) 29 | 30 | if not info_only: 31 | download_url_ffmpeg(m3u8_url, title, 'm3u8', None, output_dir=output_dir, merge=merge) 32 | 33 | 34 | site_info = 'huomao.com' 35 | download = huomaotv_download 36 | download_playlist = playlist_not_supported('huomao') 37 | -------------------------------------------------------------------------------- /src/you_get/extractors/facebook.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['facebook_download'] 4 | 5 | from ..common import * 6 | 7 | def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | url = re.sub(r'//.*?facebook.com','//facebook.com',url) 9 | html = get_html(url) 10 | 11 | title = r1(r'(.+)', html) 12 | 13 | if title is None: 14 | title = url 15 | 16 | sd_urls = list(set([ 17 | unicodize(str.replace(i, '\\/', '/')) 18 | for i in re.findall(r'sd_src_no_ratelimit:"([^"]*)"', html) 19 | ])) 20 | hd_urls = list(set([ 21 | unicodize(str.replace(i, '\\/', '/')) 22 | for i in re.findall(r'hd_src_no_ratelimit:"([^"]*)"', html) 23 | ])) 24 | urls = hd_urls if hd_urls else sd_urls 25 | 26 | type, ext, size = url_info(urls[0], True) 27 | size = urls_size(urls) 28 | 29 | print_info(site_info, title, type, size) 30 | if not info_only: 31 | download_urls(urls, title, ext, size, output_dir, merge=False) 32 | 33 | site_info = "Facebook.com" 34 | download = facebook_download 35 | download_playlist = playlist_not_supported('facebook') 36 | -------------------------------------------------------------------------------- /src/you_get/extractors/interest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from json import loads 5 | 6 | def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 7 | #http://ch.interest.me/zhtv/VOD/View/114789 8 | #http://program.interest.me/zhtv/sonja/8/Vod/View/15794 9 | html = get_content(url) 10 | #get title 11 | title = match1(html, r'', html) 13 | vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html) 14 | assert vid 15 | 16 | xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid) 17 | 18 | from xml.dom.minidom import parseString 19 | doc = parseString(xml) 20 | tab = doc.getElementsByTagName('related')[0].firstChild 21 | 22 | for video in tab.childNodes: 23 | if re.search(contentid, video.attributes['link'].value): 24 | url = video.attributes['flv'].value 25 | break 26 | 27 | title = video.attributes['title'].value 28 | assert title 29 | 30 | type, ext, size = url_info(url) 31 | print_info(site_info, title, type, size) 32 | 33 | if not info_only: 34 | download_urls([url], title, ext, size, output_dir, merge = merge) 35 | 36 | site_info = "ehow.com" 37 | download = ehow_download 38 | download_playlist = playlist_not_supported('ehow') 39 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2012-2024 Mort Yao and other contributors 4 | (https://github.com/soimort/you-get/graphs/contributors) 5 | Copyright (c) 2012 Boyu Guo 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | # Misc 62 | _* 63 | *_ 64 | *.3gp 65 | *.asf 66 | *.download 67 | *.f4v 68 | *.flv 69 | *.gif 70 | *.html 71 | *.jpg 72 | *.lrc 73 | *.mkv 74 | *.mp3 75 | *.mp4 76 | *.mpg 77 | *.png 78 | *.srt 79 | *.ts 80 | *.webm 81 | *.xml 82 | *.json 83 | /.env 84 | /.idea 85 | *.m4a 86 | *.DS_Store 87 | *.txt 88 | *.sw[a-p] 89 | 90 | *.zip 91 | 92 | .emacs* 93 | .vscode 94 | -------------------------------------------------------------------------------- /src/you_get/util/fs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .os import detect_os 4 | 5 | def legitimize(text, os=detect_os()): 6 | """Converts a string to a valid filename. 7 | """ 8 | 9 | # POSIX systems 10 | text = text.translate({ 11 | 0: None, 12 | ord('/'): '-', 13 | ord('|'): '-', 14 | }) 15 | 16 | # FIXME: do some filesystem detection 17 | if os == 'windows' or os == 'cygwin' or os == 'wsl': 18 | # Windows (non-POSIX namespace) 19 | text = text.translate({ 20 | # Reserved in Windows VFAT and NTFS 21 | ord(':'): '-', 22 | ord('*'): '-', 23 | ord('?'): '-', 24 | ord('\\'): '-', 25 | ord('\"'): '\'', 26 | # Reserved in Windows VFAT 27 | ord('+'): '-', 28 | ord('<'): '-', 29 | ord('>'): '-', 30 | ord('['): '(', 31 | ord(']'): ')', 32 | ord('\t'): ' ', 33 | }) 34 | else: 35 | # *nix 36 | if os == 'mac': 37 | # Mac OS HFS+ 38 | text = text.translate({ 39 | ord(':'): '-', 40 | }) 41 | 42 | # Remove leading . 43 | if text.startswith("."): 44 | text = text[1:] 45 | 46 | text = text[:80] # Trim to 82 Unicode characters long 47 | return text 48 | -------------------------------------------------------------------------------- /you-get.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "you-get", 3 | "author": "Mort Yao", 4 | "author_email": "mort.yao@gmail.com", 5 | "url": "https://you-get.org/", 6 | "license": "MIT", 7 | 8 | "description": "Dumb downloader that scrapes the web", 9 | "keywords": "video download youtube youku niconico", 10 | 11 | "classifiers": [ 12 | "Development Status :: 4 - Beta", 13 | "Environment :: Console", 14 | "Intended Audience :: Developers", 15 | "Intended Audience :: End Users/Desktop", 16 | "License :: OSI Approved :: MIT License", 17 | "Operating System :: OS Independent", 18 | "Programming Language :: Python", 19 | "Programming Language :: Python :: 3", 20 | "Programming Language :: Python :: 3 :: Only", 21 | "Programming Language :: Python :: 3.7", 22 | "Programming Language :: Python :: 3.8", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | "Programming Language :: Python :: 3.12", 27 | "Topic :: Internet", 28 | "Topic :: Internet :: WWW/HTTP", 29 | "Topic :: Multimedia", 30 | "Topic :: Multimedia :: Graphics", 31 | "Topic :: Multimedia :: Sound/Audio", 32 | "Topic :: Multimedia :: Video", 33 | "Topic :: Utilities" 34 | ], 35 | 36 | "console_scripts": [ 37 | "you-get = you_get.__main__:main" 38 | ] 39 | } 40 | -------------------------------------------------------------------------------- /src/you_get/extractors/dailymotion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['dailymotion_download'] 4 | 5 | from ..common import * 6 | import urllib.parse 7 | 8 | def rebuilt_url(url): 9 | path = urllib.parse.urlparse(url).path 10 | aid = path.split('/')[-1].split('_')[0] 11 | return 'http://www.dailymotion.com/embed/video/{}?autoplay=1'.format(aid) 12 | 13 | def dailymotion_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 14 | """Downloads Dailymotion videos by URL. 15 | """ 16 | 17 | html = get_content(rebuilt_url(url)) 18 | info = json.loads(match1(html, r'qualities":({.+?}),"')) 19 | title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \ 20 | match1(html, r'"title"\s*:\s*"([^"]+)"') 21 | title = unicodize(title) 22 | 23 | for quality in ['1080','720','480','380','240','144','auto']: 24 | try: 25 | real_url = info[quality][1]["url"] 26 | if real_url: 27 | break 28 | except KeyError: 29 | pass 30 | 31 | mime, ext, size = url_info(real_url) 32 | 33 | print_info(site_info, title, mime, size) 34 | if not info_only: 35 | download_urls([real_url], title, ext, size, output_dir=output_dir, merge=merge) 36 | 37 | site_info = "Dailymotion.com" 38 | download = dailymotion_download 39 | download_playlist = playlist_not_supported('dailymotion') 40 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | 3 | name: develop 4 | 5 | on: 6 | push: 7 | branches: [ develop ] 8 | pull_request: 9 | branches: [ develop ] 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [3.8, 3.9, '3.10', '3.11', '3.12', '3.13', pypy-3.8, pypy-3.9, pypy-3.10] 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip setuptools 29 | pip install flake8 30 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 31 | - name: Lint with flake8 32 | run: | 33 | # stop the build if there are Python syntax errors or undefined names 34 | flake8 . --count --select=E9,F63,F7,F82 --ignore=F824 --show-source --statistics 35 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 36 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 37 | - name: Test with unittest 38 | run: | 39 | make test 40 | -------------------------------------------------------------------------------- /src/you_get/extractors/yizhibo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['yizhibo_download'] 4 | 5 | from ..common import * 6 | import json 7 | 8 | def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 9 | video_id = url[url.rfind('/')+1:].split(".")[0] 10 | json_request_url = 'http://www.yizhibo.com/live/h5api/get_basic_live_info?scid={}'.format(video_id) 11 | content = get_content(json_request_url) 12 | error = json.loads(content)['result'] 13 | if (error != 1): 14 | raise ValueError("Error : {}".format(error)) 15 | 16 | data = json.loads(content) 17 | title = data.get('data')['live_title'] 18 | if (title == ''): 19 | title = data.get('data')['nickname'] 20 | m3u8_url = data.get('data')['play_url'] 21 | m3u8 = get_content(m3u8_url) 22 | base_url = "/".join(data.get('data')['play_url'].split("/")[:7])+"/" 23 | part_url = re.findall(r'([0-9]+\.ts)', m3u8) 24 | real_url = [] 25 | for i in part_url: 26 | url = base_url + i 27 | real_url.append(url) 28 | print_info(site_info, title, 'ts', float('inf')) 29 | if not info_only: 30 | if player: 31 | launch_player(player, [m3u8_url]) 32 | download_urls(real_url, title, 'ts', float('inf'), output_dir, merge = merge) 33 | 34 | site_info = "yizhibo.com" 35 | download = yizhibo_download 36 | download_playlist = playlist_not_supported('yizhibo') 37 | -------------------------------------------------------------------------------- /src/you_get/extractors/suntv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['suntv_download'] 4 | 5 | from ..common import * 6 | import urllib 7 | import re 8 | 9 | def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 10 | if re.match(r'http://www.isuntv.com/\w+', url): 11 | API_URL = "http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,App_Web_playcatemp4.ascx.9f08f04f.ashx" 12 | 13 | itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html') 14 | values = {"itemid" : itemid, "vodid": ""} 15 | 16 | data = str(values).replace("'", '"') 17 | data = data.encode('utf-8') 18 | req = urllib.request.Request(API_URL, data) 19 | req.add_header('AjaxPro-Method', 'ToPlay') #important! 20 | resp = urllib.request.urlopen(req) 21 | respData = resp.read() 22 | respData = respData.decode('ascii').strip('"') #Ahhhhhhh! 23 | 24 | video_url = 'http://www.isuntv.com' + str(respData) 25 | 26 | html = get_content(url, decoded=False) 27 | html = html.decode('gbk') 28 | title = match1(html, '([^<]+)').strip() #get rid of \r\n s 29 | 30 | size = 0 31 | type, ext, size = url_info(video_url) 32 | 33 | print_info(site_info, title, type, size) 34 | if not info_only: 35 | download_urls([url], title, 'mp4', size, output_dir, merge=merge) 36 | 37 | site_info = "SunTV" 38 | download = suntv_download 39 | download_playlist = playlist_not_supported('suntv') 40 | -------------------------------------------------------------------------------- /src/you_get/extractors/veoh.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['veoh_download'] 4 | 5 | from ..common import * 6 | 7 | def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): 8 | '''Get item_id''' 9 | if re.match(r'http://www.veoh.com/watch/\w+', url): 10 | item_id = match1(url, r'http://www.veoh.com/watch/(\w+)') 11 | elif re.match(r'http://www.veoh.com/m/watch.php\?v=\.*', url): 12 | item_id = match1(url, r'http://www.veoh.com/m/watch.php\?v=(\w+)') 13 | else: 14 | raise NotImplementedError('Cannot find item ID') 15 | veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = info_only, **kwargs) 16 | 17 | #---------------------------------------------------------------------- 18 | def veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = False, **kwargs): 19 | """Source: Android mobile""" 20 | webpage_url = 'http://www.veoh.com/m/watch.php?v={item_id}&quality=1'.format(item_id = item_id) 21 | 22 | #grab download URL 23 | a = get_content(webpage_url, decoded=True) 24 | url = match1(a, r'<source src="(.*?)\"\W') 25 | 26 | #grab title 27 | title = match1(a, r'<meta property="og:title" content="([^"]*)"') 28 | 29 | type_, ext, size = url_info(url) 30 | print_info(site_info, title, type_, size) 31 | if not info_only: 32 | download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge) 33 | 34 | 35 | site_info = "Veoh" 36 | download = veoh_download 37 | download_playlist = playlist_not_supported('veoh') 38 | -------------------------------------------------------------------------------- /src/you_get/extractors/qq_egame.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | 4 | from ..common import * 5 | from ..extractors import VideoExtractor 6 | from ..util import log 7 | from ..util.strings import unescape_html 8 | 9 | __all__ = ['qq_egame_download'] 10 | 11 | 12 | def qq_egame_download(url, 13 | output_dir='.', 14 | merge=True, 15 | info_only=False, 16 | **kwargs): 17 | uid = re.search('\d\d\d+', url) 18 | an_url = "https://m.egame.qq.com/live?anchorid={}&".format(uid.group(0)) 19 | page = get_content(an_url) 20 | server_data = re.search(r'window\.serverData\s*=\s*({.+?});', page) 21 | if server_data is None: 22 | log.wtf('Can not find window.server_data') 23 | json_data = json.loads(server_data.group(1)) 24 | if json_data['anchorInfo']['data']['isLive'] == 0: 25 | log.wtf('Offline...') 26 | live_info = json_data['liveInfo']['data'] 27 | title = '{}_{}'.format(live_info['profileInfo']['nickName'], 28 | live_info['videoInfo']['title']) 29 | real_url = live_info['videoInfo']['streamInfos'][0]['playUrl'] 30 | 31 | print_info(site_info, title, 'flv', float('inf')) 32 | if not info_only: 33 | download_url_ffmpeg( 34 | real_url, 35 | title, 36 | 'flv', 37 | params={}, 38 | output_dir=output_dir, 39 | merge=merge) 40 | 41 | 42 | site_info = "egame.qq.com" 43 | download = qq_egame_download 44 | download_playlist = playlist_not_supported('qq_egame') 45 | -------------------------------------------------------------------------------- /src/you_get/extractors/kuwo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['kuwo_download'] 4 | 5 | from ..common import * 6 | import re 7 | 8 | def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False): 9 | html=get_content("http://player.kuwo.cn/webmusic/st/getNewMuiseByRid?rid=MUSIC_%s"%rid) 10 | title=match1(html,r"<name>(.*)</name>") 11 | #to get title 12 | #format =aac|mp3 ->to get aac format=mp3 ->to get mp3 13 | url=get_content("http://antiserver.kuwo.cn/anti.s?format=mp3&rid=MUSIC_%s&type=convert_url&response=url"%rid) 14 | songtype, ext, size = url_info(url) 15 | print_info(site_info, title, songtype, size) 16 | if not info_only: 17 | download_urls([url], title, ext, size, output_dir) 18 | 19 | def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 20 | html=get_content(url) 21 | matched=set(re.compile(r"yinyue/(\d+)").findall(html))#reduce duplicated 22 | for rid in matched: 23 | kuwo_download_by_rid(rid,output_dir,merge,info_only) 24 | 25 | 26 | 27 | def kuwo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 28 | if "www.kuwo.cn/yinyue" in url: 29 | rid=match1(url, r'yinyue/(\d+)') 30 | kuwo_download_by_rid(rid,output_dir, merge, info_only) 31 | else: 32 | kuwo_playlist_download(url,output_dir,merge,info_only) 33 | 34 | site_info = "kuwo.cn" 35 | download = kuwo_download 36 | # download_playlist = playlist_not_supported("kugou") 37 | # download_playlist=playlist_not_supported("kuwo") 38 | download_playlist=kuwo_playlist_download 39 | -------------------------------------------------------------------------------- /src/you_get/extractors/w56.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['w56_download', 'w56_download_by_id'] 4 | 5 | from ..common import * 6 | 7 | from .sohu import sohu_download 8 | 9 | import json 10 | 11 | def w56_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): 12 | content = json.loads(get_html('http://vxml.56.com/json/%s/?src=site' % id)) 13 | info = content['info'] 14 | title = title or info['Subject'] 15 | assert title 16 | hd = info['hd'] 17 | assert hd in (0, 1, 2) 18 | hd_types = [['normal', 'qvga'], ['clear', 'vga'], ['super', 'wvga']][hd] 19 | files = [x for x in info['rfiles'] if x['type'] in hd_types] 20 | assert len(files) == 1 21 | size = int(files[0]['filesize']) 22 | url = files[0]['url'] + '&prod=56' 23 | ext = 'mp4' 24 | 25 | print_info(site_info, title, ext, size) 26 | if not info_only: 27 | download_urls([url], title, ext, size, output_dir = output_dir, merge = merge) 28 | 29 | def w56_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 30 | content = get_content(url) 31 | sohu_url = r1(r"url:\s*'([^']*)'", content) 32 | if sohu_url: 33 | sohu_download(sohu_url, output_dir, merge=merge, info_only=info_only, **kwargs) 34 | return 35 | 36 | id = r1(r'http://www.56.com/u\d+/v_(\w+).html', url) or \ 37 | r1(r'http://www.56.com/.*vid-(\w+).html', url) 38 | w56_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only) 39 | 40 | site_info = "56.com" 41 | download = w56_download 42 | download_playlist = playlist_not_supported('56') 43 | -------------------------------------------------------------------------------- /contrib/completion/_you-get: -------------------------------------------------------------------------------- 1 | #compdef you-get 2 | 3 | # Zsh completion definition for soimort/you-get. 4 | 5 | setopt localoptions noshwordsplit noksharrays 6 | local -a args 7 | 8 | args=( 9 | '(- : *)'{-V,--version}'[print version and exit]' 10 | '(- : *)'{-h,--help}'[print help and exit]' 11 | '(-i --info)'{-i,--info}'[print extracted information]' 12 | '(-u --url)'{-u,--url}'[print extracted information with URLs]' 13 | '(--json)--json[print extracted URLs in JSON format]' 14 | '(-n --no-merge)'{-n,--no-merge}'[do not merge video parts]' 15 | '(--no-caption)--no-caption[do not download captions]' 16 | '(-f --force)'{-f,--force}'[force overwrite existing files]' 17 | '(-F --format)'{-F,--format}'[set video format to the specified stream id]:stream id' 18 | '(-O --output-filename)'{-O,--output-filename}'[set output filename]:filename:_files' 19 | '(-o --output-dir)'{-o,--output-dir}'[set output directory]:directory:_files -/' 20 | '(-p --player)'{-p,--player}'[stream extracted URL to the specified player]:player and options' 21 | '(-c --cookies)'{-c,--cookies}'[load cookies.txt or cookies.sqlite]:cookies file:_files' 22 | '(-x --http-proxy)'{-x,--http-proxy}'[use the specified HTTP proxy for downloading]:host\:port:' 23 | '(-y --extractor-proxy)'{-y,--extractor-proxy}'[use the specified HTTP proxy for extraction only]:host\:port' 24 | '(--no-proxy)--no-proxy[do not use a proxy]' 25 | '(-t --timeout)'{-t,--timeout}'[set socket timeout]:seconds' 26 | '(-d --debug)'{-d,--debug}'[show traceback and other debug info]' 27 | '*: :_guard "^-*" url' 28 | ) 29 | _arguments -S -s $args 30 | -------------------------------------------------------------------------------- /src/you_get/extractors/naver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import urllib.request 4 | import urllib.parse 5 | import json 6 | import re 7 | 8 | from ..util import log 9 | from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size 10 | from .universal import * 11 | 12 | __all__ = ['naver_download_by_url'] 13 | 14 | 15 | def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs): 16 | ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}' 17 | page = get_content(url) 18 | try: 19 | vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1) 20 | key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1) 21 | meta_str = get_content(ep.format(vid, key)) 22 | meta_json = json.loads(meta_str) 23 | if 'errorCode' in meta_json: 24 | log.wtf(meta_json['errorCode']) 25 | title = meta_json['meta']['subject'] 26 | videos = meta_json['videos']['list'] 27 | video_list = sorted(videos, key=lambda video: video['encodingOption']['width']) 28 | video_url = video_list[-1]['source'] 29 | # size = video_list[-1]['size'] 30 | # result wrong size 31 | size = url_size(video_url) 32 | print_info(site_info, title, 'mp4', size) 33 | if not info_only: 34 | download_urls([video_url], title, 'mp4', size, output_dir, **kwargs) 35 | except: 36 | universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) 37 | 38 | site_info = "naver.com" 39 | download = naver_download_by_url 40 | download_playlist = playlist_not_supported('naver') 41 | -------------------------------------------------------------------------------- /src/you_get/util/git.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import subprocess 5 | from ..version import __version__ 6 | 7 | def get_head(repo_path): 8 | """Get (branch, commit) from HEAD of a git repo.""" 9 | try: 10 | ref = open(os.path.join(repo_path, '.git', 'HEAD'), 'r').read().strip()[5:].split('/') 11 | branch = ref[-1] 12 | commit = open(os.path.join(repo_path, '.git', *ref), 'r').read().strip()[:7] 13 | return branch, commit 14 | except: 15 | return None 16 | 17 | def get_version(repo_path): 18 | try: 19 | version = __version__.split('.') 20 | major, minor, cn = [int(i) for i in version] 21 | p = subprocess.Popen(['git', 22 | '--git-dir', os.path.join(repo_path, '.git'), 23 | '--work-tree', repo_path, 24 | 'rev-list', 'HEAD', '--count'], 25 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) 26 | raw, err = p.communicate() 27 | c_head = int(raw.decode('ascii')) 28 | q = subprocess.Popen(['git', 29 | '--git-dir', os.path.join(repo_path, '.git'), 30 | '--work-tree', repo_path, 31 | 'rev-list', 'master', '--count'], 32 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) 33 | raw, err = q.communicate() 34 | c_master = int(raw.decode('ascii')) 35 | cc = c_head - c_master 36 | assert cc 37 | return '%s.%s.%s' % (major, minor, cn + cc) 38 | except: 39 | return __version__ 40 | -------------------------------------------------------------------------------- /contrib/completion/you-get.fish: -------------------------------------------------------------------------------- 1 | # Fish completion definition for you-get. 2 | 3 | complete -c you-get -s V -l version -d 'print version and exit' 4 | complete -c you-get -s h -l help -d 'print help and exit' 5 | complete -c you-get -s i -l info -d 'print extracted information' 6 | complete -c you-get -s u -l url -d 'print extracted information' 7 | complete -c you-get -l json -d 'print extracted URLs in JSON format' 8 | complete -c you-get -s n -l no-merge -d 'do not merge video parts' 9 | complete -c you-get -l no-caption -d 'do not download captions' 10 | complete -c you-get -s f -l force -d 'force overwrite existing files' 11 | complete -c you-get -s F -l format -x -d 'set video format to the specified stream id' 12 | complete -c you-get -s O -l output-filename -d 'set output filename' \ 13 | -x -a '(__fish_complete_path (commandline -ct) "output filename")' 14 | complete -c you-get -s o -l output-dir -d 'set output directory' \ 15 | -x -a '(__fish_complete_directories (commandline -ct) "output directory")' 16 | complete -c you-get -s p -l player -x -d 'stream extracted URL to the specified player' 17 | complete -c you-get -s c -l cookies -d 'load cookies.txt or cookies.sqlite' \ 18 | -x -a '(__fish_complete_path (commandline -ct) "cookies.txt or cookies.sqlite")' 19 | complete -c you-get -s x -l http-proxy -x -d 'use the specified HTTP proxy for downloading' 20 | complete -c you-get -s y -l extractor-proxy -x -d 'use the specified HTTP proxy for extraction only' 21 | complete -c you-get -l no-proxy -d 'do not use a proxy' 22 | complete -c you-get -s t -l timeout -x -d 'set socket timeout' 23 | complete -c you-get -s d -l debug -d 'show traceback and other debug info' 24 | -------------------------------------------------------------------------------- /src/you_get/extractors/joy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['joy_download'] 4 | 5 | from ..common import * 6 | 7 | def video_info(channel_id, program_id, volumn_id): 8 | url = 'http://msx.app.joy.cn/service.php' 9 | if program_id: 10 | url += '?action=vodmsxv6' 11 | url += '&channelid=%s' % channel_id 12 | url += '&programid=%s' % program_id 13 | url += '&volumnid=%s' % volumn_id 14 | else: 15 | url += '?action=msxv6' 16 | url += '&videoid=%s' % volumn_id 17 | 18 | xml = get_html(url) 19 | 20 | name = r1(r'<Title>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?', xml) 21 | urls = re.findall(r']*>(?:)?', xml) 22 | hostpath = r1(r']*>(?:)?', xml) 23 | 24 | return name, urls, hostpath 25 | 26 | def joy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 27 | channel_id = r1(r'[^_]channelId\s*:\s*"([^\"]+)"', get_html(url)) 28 | program_id = r1(r'[^_]programId\s*:\s*"([^\"]+)"', get_html(url)) 29 | volumn_id = r1(r'[^_]videoId\s*:\s*"([^\"]+)"', get_html(url)) 30 | 31 | title, urls, hostpath = video_info(channel_id, program_id, volumn_id) 32 | urls = [hostpath + url for url in urls] 33 | 34 | size = 0 35 | for url in urls: 36 | _, ext, temp = url_info(url) 37 | size += temp 38 | 39 | print_info(site_info, title, ext, size) 40 | if not info_only: 41 | download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) 42 | 43 | site_info = "Joy.cn" 44 | download = joy_download 45 | download_playlist = playlist_not_supported('joy') 46 | -------------------------------------------------------------------------------- /src/you_get/extractors/qingting.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | 4 | from ..common import get_content, playlist_not_supported, url_size 5 | from ..extractors import VideoExtractor 6 | from ..util import log 7 | 8 | __all__ = ['qingting_download_by_url'] 9 | 10 | 11 | class Qingting(VideoExtractor): 12 | # every resource is described by its channel id and program id 13 | # so vid is tuple (channel_id, program_id) 14 | 15 | name = 'Qingting' 16 | stream_types = [ 17 | {'id': '_default'} 18 | ] 19 | 20 | ep = 'http://i.qingting.fm/wapi/channels/{}/programs/{}' 21 | file_host = 'http://od.qingting.fm/{}' 22 | mobile_pt = r'channels\/(\d+)\/programs/(\d+)' 23 | 24 | def prepare(self, **kwargs): 25 | if self.vid is None: 26 | hit = re.search(self.__class__.mobile_pt, self.url) 27 | self.vid = (hit.group(1), hit.group(2)) 28 | 29 | ep_url = self.__class__.ep.format(self.vid[0], self.vid[1]) 30 | meta = json.loads(get_content(ep_url)) 31 | 32 | if meta['code'] != 0: 33 | log.wtf(meta['message']['errormsg']) 34 | 35 | file_path = self.__class__.file_host.format(meta['data']['file_path']) 36 | self.title = meta['data']['name'] 37 | duration = str(meta['data']['duration']) + 's' 38 | 39 | self.streams['_default'] = {'src': [file_path], 'video_profile': duration, 'container': 'm4a'} 40 | 41 | def extract(self, **kwargs): 42 | self.streams['_default']['size'] = url_size(self.streams['_default']['src'][0]) 43 | 44 | 45 | def qingting_download_by_url(url, **kwargs): 46 | Qingting().download_by_url(url, **kwargs) 47 | 48 | site_info = 'Qingting' 49 | download = qingting_download_by_url 50 | download_playlist = playlist_not_supported('Qingting') 51 | -------------------------------------------------------------------------------- /src/you_get/extractors/mtv81.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['mtv81_download'] 4 | 5 | from ..common import * 6 | 7 | from xml.dom.minidom import parseString 8 | 9 | from html.parser import HTMLParser 10 | 11 | 12 | def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 13 | html = get_content(url) 14 | title = HTMLParser().unescape( 15 | "|".join(match1(html, r"(.*?)").split("|")[:-2])) 16 | 17 | # mgid%3Auma%3Avideo%3Amtv81.com%3A897974 18 | vid = match1(html, r'getTheVideo\("(.*?)"') 19 | xml = parseString( 20 | get_content("http://intl.esperanto.mtvi.com/www/xml/media/mediaGen.jhtml?uri={}&flashPlayer=LNX%2013,0,0,206&geo=CN&sid=123456".format(vid))) 21 | 22 | url = sorted( 23 | map(lambda x: x.firstChild.nodeValue, xml.getElementsByTagName("src")), 24 | key=lambda x: int(match1(x, r'_(\d+?)_')))[-1] 25 | 26 | mediatype, ext, size = 'mp4', 'mp4', 0 27 | print_info(site_info, title, mediatype, size) 28 | # 29 | # rtmpdump -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf 30 | # 31 | # because rtmpdump is unstable,may try several times 32 | # 33 | if not info_only: 34 | # import pdb 35 | # pdb.set_trace() 36 | download_rtmp_url(url=url, title=title, ext=ext, params={ 37 | "--swfVfy": "http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf"}, output_dir=output_dir) 38 | 39 | 40 | site_info = "mtv81.com" 41 | download = mtv81_download 42 | download_playlist = playlist_not_supported('mtv81') 43 | -------------------------------------------------------------------------------- /src/you_get/extractors/kuaishou.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import urllib.request 4 | import urllib.parse 5 | import re 6 | 7 | from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size 8 | 9 | __all__ = ['kuaishou_download_by_url'] 10 | 11 | 12 | def kuaishou_download_by_url(url, info_only=False, **kwargs): 13 | page = get_content(url) 14 | # size = video_list[-1]['size'] 15 | # result wrong size 16 | try: 17 | search_result=re.search(r"\"playUrls\":\[(\{\"quality\"\:\"\w+\",\"url\":\".*?\"\})+\]", page) 18 | all_video_info_str = search_result.group(1) 19 | all_video_infos=re.findall(r"\{\"quality\"\:\"(\w+)\",\"url\":\"(.*?)\"\}", all_video_info_str) 20 | # get the one of the best quality 21 | video_url = all_video_infos[0][1].encode("utf-8").decode('unicode-escape') 22 | title = re.search(r"(.*?)", page).group(1) 23 | size = url_size(video_url) 24 | video_format = "flv"#video_url.split('.')[-1] 25 | print_info(site_info, title, video_format, size) 26 | if not info_only: 27 | download_urls([video_url], title, video_format, size, **kwargs) 28 | except:# extract image 29 | og_image_url = re.search(r"", page).group(1) 30 | image_url = og_image_url 31 | title = url.split('/')[-1] 32 | size = url_size(image_url) 33 | image_format = image_url.split('.')[-1] 34 | print_info(site_info, title, image_format, size) 35 | if not info_only: 36 | download_urls([image_url], title, image_format, size, **kwargs) 37 | 38 | site_info = "kuaishou.com" 39 | download = kuaishou_download_by_url 40 | download_playlist = playlist_not_supported('kuaishou') 41 | -------------------------------------------------------------------------------- /src/you_get/extractors/pinterest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from ..extractor import VideoExtractor 5 | 6 | class Pinterest(VideoExtractor): 7 | # site name 8 | name = "Pinterest" 9 | 10 | # ordered list of supported stream types / qualities on this site 11 | # order: high quality -> low quality 12 | stream_types = [ 13 | {'id': 'original'}, # contains an 'id' or 'itag' field at minimum 14 | {'id': 'small'}, 15 | ] 16 | 17 | def prepare(self, **kwargs): 18 | # scrape the html 19 | content = get_content(self.url) 20 | 21 | # extract title 22 | self.title = match1(content, 23 | r'(.*)', html) 37 | assert title 38 | id = r1(r'flvid\s*=\s*(\d+)', html) 39 | assert id 40 | baomihua_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only) 41 | 42 | site_info = "baomihua.com" 43 | download = baomihua_download 44 | download_playlist = playlist_not_supported('baomihua') 45 | -------------------------------------------------------------------------------- /src/you_get/processor/join_ts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import struct 4 | from io import BytesIO 5 | 6 | ################################################## 7 | # main 8 | ################################################## 9 | 10 | def guess_output(inputs): 11 | import os.path 12 | inputs = map(os.path.basename, inputs) 13 | n = min(map(len, inputs)) 14 | for i in reversed(range(1, n)): 15 | if len(set(s[:i] for s in inputs)) == 1: 16 | return inputs[0][:i] + '.ts' 17 | return 'output.ts' 18 | 19 | def concat_ts(ts_parts, output = None): 20 | assert ts_parts, 'no ts files found' 21 | import os.path 22 | if not output: 23 | output = guess_output(ts_parts) 24 | elif os.path.isdir(output): 25 | output = os.path.join(output, guess_output(ts_parts)) 26 | 27 | print('Merging video parts...') 28 | 29 | ts_out_file = open(output, "wb") 30 | for ts_in in ts_parts: 31 | ts_in_file = open(ts_in, "rb") 32 | ts_in_data = ts_in_file.read() 33 | ts_in_file.close() 34 | ts_out_file.write(ts_in_data) 35 | ts_out_file.close() 36 | return output 37 | 38 | def usage(): 39 | print('Usage: [python3] join_ts.py --output TARGET.ts ts...') 40 | 41 | def main(): 42 | import sys, getopt 43 | try: 44 | opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="]) 45 | except getopt.GetoptError as err: 46 | usage() 47 | sys.exit(1) 48 | output = None 49 | for o, a in opts: 50 | if o in ("-h", "--help"): 51 | usage() 52 | sys.exit() 53 | elif o in ("-o", "--output"): 54 | output = a 55 | else: 56 | usage() 57 | sys.exit(1) 58 | if not args: 59 | usage() 60 | sys.exit(1) 61 | 62 | concat_ts(args, output) 63 | 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /src/you_get/extractors/ifeng.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['ifeng_download', 'ifeng_download_by_id'] 4 | 5 | from ..common import * 6 | 7 | def ifeng_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): 8 | assert r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', id), id 9 | url = 'http://vxml.ifengimg.com/video_info_new/%s/%s/%s.xml' % (id[-2], id[-2:], id) 10 | xml = get_html(url, 'utf-8') 11 | title = r1(r'Name="([^"]+)"', xml) 12 | title = unescape_html(title) 13 | url = r1(r'VideoPlayUrl="([^"]+)"', xml) 14 | from random import randint 15 | r = randint(10, 19) 16 | url = url.replace('http://wideo.ifeng.com/', 'http://ips.ifeng.com/wideo.ifeng.com/') 17 | type, ext, size = url_info(url) 18 | 19 | print_info(site_info, title, ext, size) 20 | if not info_only: 21 | download_urls([url], title, ext, size, output_dir, merge = merge) 22 | 23 | def ifeng_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 24 | # old pattern /uuid.shtml 25 | # now it could be #uuid 26 | id = r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', url) 27 | if id: 28 | return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only) 29 | 30 | html = get_content(url) 31 | uuid_pattern = r'"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"' 32 | id = r1(r'var vid="([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"', html) 33 | if id is None: 34 | video_pattern = r'"vid"\s*:\s*' + uuid_pattern 35 | id = match1(html, video_pattern) 36 | assert id, "can't find video info" 37 | return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only) 38 | 39 | site_info = "ifeng.com" 40 | download = ifeng_download 41 | download_playlist = playlist_not_supported('ifeng') 42 | -------------------------------------------------------------------------------- /src/you_get/extractors/nicovideo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['nicovideo_download'] 4 | 5 | from ..common import * 6 | 7 | def nicovideo_login(user, password): 8 | data = "current_form=login&mail=" + user +"&password=" + password + "&login_submit=Log+In" 9 | response = request.urlopen(request.Request("https://secure.nicovideo.jp/secure/login?site=niconico", headers=fake_headers, data=data.encode('utf-8'))) 10 | return response.headers 11 | 12 | def nicovideo_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 13 | import ssl 14 | ssl_context = request.HTTPSHandler( 15 | context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) 16 | cookie_handler = request.HTTPCookieProcessor() 17 | opener = request.build_opener(ssl_context, cookie_handler) 18 | request.install_opener(opener) 19 | 20 | import netrc, getpass 21 | try: 22 | info = netrc.netrc().authenticators('nicovideo') 23 | except: 24 | info = None 25 | if info is None: 26 | user = input("User: ") 27 | password = getpass.getpass("Password: ") 28 | else: 29 | user, password = info[0], info[2] 30 | print("Logging in...") 31 | nicovideo_login(user, password) 32 | 33 | html = get_html(url) # necessary! 34 | title = r1(r'(.+?)', html) 35 | #title = unicodize(r1(r']*>([^<]+)', html)) 36 | 37 | vid = url.split('/')[-1].split('?')[0] 38 | api_html = get_html('http://flapi.nicovideo.jp/api/getflv?v=%s' % vid) 39 | real_url = parse.unquote(r1(r'url=([^&]+)&', api_html)) 40 | 41 | type, ext, size = url_info(real_url) 42 | 43 | print_info(site_info, title, type, size) 44 | if not info_only: 45 | download_urls([real_url], title, ext, size, output_dir, merge = merge) 46 | 47 | site_info = "Nicovideo.jp" 48 | download = nicovideo_download 49 | download_playlist = playlist_not_supported('nicovideo') 50 | -------------------------------------------------------------------------------- /src/you_get/extractors/kakao.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from .universal import * 5 | 6 | __all__ = ['kakao_download'] 7 | 8 | 9 | def kakao_download(url, output_dir='.', info_only=False, **kwargs): 10 | json_request_url = 'https://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?vid={}' 11 | 12 | # in this implementation playlist not supported so use url_without_playlist 13 | # if want to support playlist need to change that 14 | if re.search('playlistId', url): 15 | url = re.search(r"(.+)\?.+?", url).group(1) 16 | 17 | page = get_content(url) 18 | try: 19 | vid = re.search(r"", page).group(1) 20 | title = re.search(r"", page).group(1) 21 | 22 | meta_str = get_content(json_request_url.format(vid)) 23 | meta_json = json.loads(meta_str) 24 | 25 | standard_preset = meta_json['output_list']['standard_preset'] 26 | output_videos = meta_json['output_list']['output_list'] 27 | size = '' 28 | if meta_json['svcname'] == 'smr_pip': 29 | for v in output_videos: 30 | if v['preset'] == 'mp4_PIP_SMR_480P': 31 | size = int(v['filesize']) 32 | break 33 | else: 34 | for v in output_videos: 35 | if v['preset'] == standard_preset: 36 | size = int(v['filesize']) 37 | break 38 | 39 | video_url = meta_json['location']['url'] 40 | 41 | print_info(site_info, title, 'mp4', size) 42 | if not info_only: 43 | download_urls([video_url], title, 'mp4', size, output_dir, **kwargs) 44 | except: 45 | universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs) 46 | 47 | 48 | site_info = "tv.kakao.com" 49 | download = kakao_download 50 | download_playlist = playlist_not_supported('kakao') 51 | -------------------------------------------------------------------------------- /src/you_get/json_output.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | 4 | # save info from common.print_info() 5 | last_info = None 6 | 7 | def output(video_extractor, pretty_print=True): 8 | ve = video_extractor 9 | out = {} 10 | out['url'] = ve.url 11 | out['title'] = ve.title 12 | out['site'] = ve.name 13 | out['streams'] = ve.streams 14 | try: 15 | if ve.dash_streams: 16 | out['streams'].update(ve.dash_streams) 17 | except AttributeError: 18 | pass 19 | try: 20 | if ve.audiolang: 21 | out['audiolang'] = ve.audiolang 22 | except AttributeError: 23 | pass 24 | extra = {} 25 | if getattr(ve, 'referer', None) is not None: 26 | extra["referer"] = ve.referer 27 | if getattr(ve, 'ua', None) is not None: 28 | extra["ua"] = ve.ua 29 | if extra: 30 | out["extra"] = extra 31 | if pretty_print: 32 | print(json.dumps(out, indent=4, ensure_ascii=False)) 33 | else: 34 | print(json.dumps(out)) 35 | 36 | # a fake VideoExtractor object to save info 37 | class VideoExtractor(object): 38 | pass 39 | 40 | def print_info(site_info=None, title=None, type=None, size=None): 41 | global last_info 42 | # create a VideoExtractor and save info for download_urls() 43 | ve = VideoExtractor() 44 | last_info = ve 45 | ve.name = site_info 46 | ve.title = title 47 | ve.url = None 48 | 49 | def download_urls(urls=None, title=None, ext=None, total_size=None, refer=None): 50 | ve = last_info 51 | if not ve: 52 | ve = VideoExtractor() 53 | ve.name = '' 54 | ve.url = urls 55 | ve.title=title 56 | # save download info in streams 57 | stream = {} 58 | stream['container'] = ext 59 | stream['size'] = total_size 60 | stream['src'] = urls 61 | if refer: 62 | stream['refer'] = refer 63 | stream['video_profile'] = '__default__' 64 | ve.streams = {} 65 | ve.streams['__default__'] = stream 66 | output(ve) 67 | -------------------------------------------------------------------------------- /src/you_get/processor/rtmpdump.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os.path 4 | import subprocess 5 | 6 | def get_usable_rtmpdump(cmd): 7 | try: 8 | p = subprocess.Popen([cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 9 | out, err = p.communicate() 10 | return cmd 11 | except: 12 | return None 13 | 14 | RTMPDUMP = get_usable_rtmpdump('rtmpdump') 15 | 16 | def has_rtmpdump_installed(): 17 | return RTMPDUMP is not None 18 | 19 | # 20 | #params ={"-y":"playlist","-q":None,} 21 | #if Only Key ,Value should be None 22 | #-r -o should not be included in params 23 | 24 | def download_rtmpdump_stream(url, title, ext,params={},output_dir='.'): 25 | filename = '%s.%s' % (title, ext) 26 | filepath = os.path.join(output_dir, filename) 27 | 28 | cmdline = [RTMPDUMP, '-r'] 29 | cmdline.append(url) 30 | cmdline.append('-o') 31 | cmdline.append(filepath) 32 | 33 | for key in params.keys(): 34 | cmdline.append(key) 35 | if params[key]!=None: 36 | cmdline.append(params[key]) 37 | 38 | # cmdline.append('-y') 39 | # cmdline.append(playpath) 40 | print("Call rtmpdump:\n"+" ".join(cmdline)+"\n") 41 | subprocess.call(cmdline) 42 | return 43 | 44 | # 45 | def play_rtmpdump_stream(player, url, params={}): 46 | 47 | #construct left side of pipe 48 | cmdline = [RTMPDUMP, '-r'] 49 | cmdline.append(url) 50 | 51 | #append other params if exist 52 | for key in params.keys(): 53 | cmdline.append(key) 54 | if params[key]!=None: 55 | cmdline.append(params[key]) 56 | 57 | cmdline.append('-o') 58 | cmdline.append('-') 59 | 60 | #pipe start 61 | cmdline.append('|') 62 | cmdline.append(player) 63 | cmdline.append('-') 64 | 65 | #logging 66 | print("Call rtmpdump:\n"+" ".join(cmdline)+"\n") 67 | 68 | #call RTMPDump! 69 | subprocess.call(cmdline) 70 | 71 | # os.system("rtmpdump -r '%s' -y '%s' -o - | %s -" % (url, playpath, player)) 72 | return 73 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | PROJ_NAME = 'you-get' 4 | PACKAGE_NAME = 'you_get' 5 | 6 | PROJ_METADATA = '%s.json' % PROJ_NAME 7 | 8 | import importlib.util 9 | import importlib.machinery 10 | 11 | def load_source(modname, filename): 12 | loader = importlib.machinery.SourceFileLoader(modname, filename) 13 | spec = importlib.util.spec_from_file_location(modname, filename, loader=loader) 14 | module = importlib.util.module_from_spec(spec) 15 | # The module is always executed and not cached in sys.modules. 16 | # Uncomment the following line to cache the module. 17 | # sys.modules[module.__name__] = module 18 | loader.exec_module(module) 19 | return module 20 | 21 | import os, json 22 | here = os.path.abspath(os.path.dirname(__file__)) 23 | proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read()) 24 | try: 25 | README = open(os.path.join(here, 'README.rst'), encoding='utf-8').read() 26 | except: 27 | README = "" 28 | CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst'), encoding='utf-8').read() 29 | VERSION = load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__ 30 | 31 | from setuptools import setup, find_packages 32 | setup( 33 | name = proj_info['name'], 34 | version = VERSION, 35 | 36 | author = proj_info['author'], 37 | author_email = proj_info['author_email'], 38 | url = proj_info['url'], 39 | license = proj_info['license'], 40 | 41 | description = proj_info['description'], 42 | keywords = proj_info['keywords'], 43 | 44 | long_description = README, 45 | 46 | packages = find_packages('src'), 47 | package_dir = {'' : 'src'}, 48 | 49 | test_suite = 'tests', 50 | 51 | platforms = 'any', 52 | zip_safe = True, 53 | include_package_data = True, 54 | 55 | classifiers = proj_info['classifiers'], 56 | 57 | entry_points = {'console_scripts': proj_info['console_scripts']}, 58 | 59 | install_requires = ['dukpy'], 60 | extras_require = { 61 | 'socks': ['PySocks'], 62 | } 63 | ) 64 | -------------------------------------------------------------------------------- /src/you_get/extractors/tiktok.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['tiktok_download'] 4 | 5 | from ..common import * 6 | 7 | def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | headers = { 9 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', 10 | 'Accept-Encoding': 'gzip, deflate', 11 | 'Accept': '*/*', 12 | 'Referer': 'https://www.tiktok.com/', 13 | 'Connection': 'keep-alive' # important 14 | } 15 | 16 | m = re.match('(https?://)?([^/]+)(/.*)', url) 17 | host = m.group(2) 18 | if host != 'www.tiktok.com': # non-canonical URL 19 | if host == 'vt.tiktok.com': # short URL 20 | url = get_location(url) 21 | vid = r1(r'/video/(\d+)', url) 22 | url = 'https://www.tiktok.com/@/video/%s/' % vid 23 | host = 'www.tiktok.com' 24 | else: 25 | url = m.group(3).split('?')[0] 26 | vid = url.split('/')[3] # should be a string of numbers 27 | 28 | html, set_cookie = getHttps(host, url, headers=headers) 29 | tt_chain_token = r1('tt_chain_token=([^;]+);', set_cookie) 30 | headers['Cookie'] = 'tt_chain_token=%s' % tt_chain_token 31 | 32 | data = r1(r'', html) 33 | info = json.loads(data) 34 | itemStruct = info['__DEFAULT_SCOPE__']['webapp.video-detail']['itemInfo']['itemStruct'] 35 | downloadAddr = itemStruct['video']['downloadAddr'] 36 | author = itemStruct['author']['uniqueId'] 37 | nickname = itemStruct['author']['nickname'] 38 | title = '%s [%s]' % (nickname or author, vid) 39 | 40 | mime, ext, size = url_info(downloadAddr, headers=headers) 41 | 42 | print_info(site_info, title, mime, size) 43 | if not info_only: 44 | download_urls([downloadAddr], title, ext, size, output_dir=output_dir, merge=merge, headers=headers) 45 | 46 | site_info = "TikTok.com" 47 | download = tiktok_download 48 | download_playlist = playlist_not_supported('tiktok') 49 | -------------------------------------------------------------------------------- /src/you_get/extractors/infoq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from ..extractor import VideoExtractor 5 | 6 | import ssl 7 | 8 | class Infoq(VideoExtractor): 9 | name = "InfoQ" 10 | 11 | stream_types = [ 12 | {'id': 'video'}, 13 | {'id': 'audio'}, 14 | {'id': 'slides'} 15 | ] 16 | 17 | def prepare(self, **kwargs): 18 | content = get_content(self.url) 19 | self.title = match1(content, r'([^<]+)') 20 | s = match1(content, r'P\.s\s*=\s*\'([^\']+)\'') 21 | scp = match1(content, r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'') 22 | scs = match1(content, r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'') 23 | sck = match1(content, r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'') 24 | 25 | mp3 = match1(content, r'name="filename"\s*value="([^"]+\.mp3)"') 26 | if mp3: mp3 = 'http://res.infoq.com/downloads/mp3downloads/%s' % mp3 27 | 28 | pdf = match1(content, r'name="filename"\s*value="([^"]+\.pdf)"') 29 | if pdf: pdf = 'http://res.infoq.com/downloads/pdfdownloads/%s' % pdf 30 | 31 | # cookie handler 32 | ssl_context = request.HTTPSHandler( 33 | context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) 34 | cookie_handler = request.HTTPCookieProcessor() 35 | opener = request.build_opener(ssl_context, cookie_handler) 36 | opener.addheaders = [ 37 | ('Referer', self.url), 38 | ('Cookie', 39 | 'CloudFront-Policy=%s;CloudFront-Signature=%s;CloudFront-Key-Pair-Id=%s' % (scp, scs, sck)) 40 | ] 41 | request.install_opener(opener) 42 | 43 | if s: self.streams['video'] = {'url': s } 44 | if mp3: self.streams['audio'] = { 'url': mp3 } 45 | if pdf: self.streams['slides'] = { 'url': pdf } 46 | 47 | def extract(self, **kwargs): 48 | for i in self.streams: 49 | s = self.streams[i] 50 | _, s['container'], s['size'] = url_info(s['url']) 51 | s['src'] = [s['url']] 52 | 53 | site = Infoq() 54 | download = site.download_by_url 55 | download_playlist = site.download_by_url 56 | -------------------------------------------------------------------------------- /src/you_get/extractors/zhibo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['zhibo_download'] 4 | 5 | from ..common import * 6 | 7 | def zhibo_vedio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 8 | # http://video.zhibo.tv/video/details/d103057f-663e-11e8-9d83-525400ccac43.html 9 | 10 | html = get_html(url) 11 | title = r1(r'([\s\S]*)', html) 12 | total_size = 0 13 | part_urls= [] 14 | 15 | video_html = r1(r'', html) 16 | 17 | # video_guessulike = r1(r"window.xgData =([s\S'\s\.]*)\'\;[\s\S]*window.vouchData", video_html) 18 | video_url = r1(r"window.vurl = \'([s\S'\s\.]*)\'\;[\s\S]*window.imgurl", video_html) 19 | part_urls.append(video_url) 20 | ext = video_url.split('.')[-1] 21 | 22 | print_info(site_info, title, ext, total_size) 23 | if not info_only: 24 | download_urls(part_urls, title, ext, total_size, output_dir=output_dir, merge=merge) 25 | 26 | 27 | def zhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 28 | if 'video.zhibo.tv' in url: 29 | zhibo_vedio_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) 30 | return 31 | 32 | # if 'v.zhibo.tv' in url: 33 | # http://v.zhibo.tv/31609372 34 | html = get_html(url) 35 | title = r1(r'([\s\S]*)', html) 36 | is_live = r1(r"window.videoIsLive=\'([s\S'\s\.]*)\'\;[\s\S]*window.resDomain", html) 37 | if is_live != "1": 38 | raise ValueError("The live stream is not online! (Errno:%s)" % is_live) 39 | 40 | match = re.search(r""" 41 | ourStreamName .*? 42 | '(.*?)' .*? 43 | rtmpHighSource .*? 44 | '(.*?)' .*? 45 | '(.*?)' 46 | """, html, re.S | re.X) 47 | real_url = match.group(3) + match.group(1) + match.group(2) 48 | 49 | print_info(site_info, title, 'flv', float('inf')) 50 | if not info_only: 51 | download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge) 52 | 53 | site_info = "zhibo.tv" 54 | download = zhibo_download 55 | download_playlist = playlist_not_supported('zhibo') 56 | -------------------------------------------------------------------------------- /src/you_get/extractors/nanagogo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['nanagogo_download'] 4 | 5 | from ..common import * 6 | from .universal import * 7 | 8 | def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | if re.match(r'https?://stat.7gogo.jp', url): 10 | universal_download(url, output_dir, merge=merge, info_only=info_only) 11 | return 12 | 13 | talk_id = r1(r'7gogo.jp/([^/]+)/', url) 14 | post_id = r1(r'7gogo.jp/[^/]+/(\d+)', url) 15 | title = '%s_%s' % (talk_id, post_id) 16 | api_url = 'https://api.7gogo.jp/web/v2/talks/%s/posts/%s' % (talk_id, post_id) 17 | info = json.loads(get_content(api_url)) 18 | 19 | items = [] 20 | if info['data']['posts']['post'] is None: 21 | return 22 | if info['data']['posts']['post']['body'] is None: 23 | return 24 | for i in info['data']['posts']['post']['body']: 25 | if 'image' in i: 26 | image_url = i['image'] 27 | if image_url[:2] == '//': continue # skip stamp images 28 | _, ext, size = url_info(image_url) 29 | items.append({'title': title, 30 | 'url': image_url, 31 | 'ext': ext, 32 | 'size': size}) 33 | elif 'movieUrlHq' in i: 34 | movie_url = i['movieUrlHq'] 35 | _, ext, size = url_info(movie_url) 36 | items.append({'title': title, 37 | 'url': movie_url, 38 | 'ext': ext, 39 | 'size': size}) 40 | 41 | size = sum([i['size'] for i in items]) 42 | if size == 0: return # do not fail the whole process 43 | print_info(site_info, title, ext, size) 44 | if not info_only: 45 | for i in items: 46 | print_info(site_info, i['title'], i['ext'], i['size']) 47 | download_urls([i['url']], i['title'], i['ext'], i['size'], 48 | output_dir=output_dir, 49 | merge=merge) 50 | 51 | site_info = "7gogo.jp" 52 | download = nanagogo_download 53 | download_playlist = playlist_not_supported('nanagogo') 54 | -------------------------------------------------------------------------------- /src/you_get/extractors/miomio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['miomio_download'] 4 | 5 | from ..common import * 6 | 7 | from .tudou import tudou_download_by_id 8 | from .youku import youku_download_by_vid 9 | from xml.dom.minidom import parseString 10 | 11 | def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 12 | html = get_html(url) 13 | 14 | title = r1(r'list 43 | Convert XML to URL List. 44 | From Biligrab. 45 | """ 46 | rawurl = [] 47 | dom = parseString(xml_data) 48 | for node in dom.getElementsByTagName('durl'): 49 | url = node.getElementsByTagName('url')[0] 50 | rawurl.append(url.childNodes[0].data) 51 | return rawurl 52 | 53 | site_info = "MioMio.tv" 54 | download = miomio_download 55 | download_playlist = playlist_not_supported('miomio') 56 | -------------------------------------------------------------------------------- /src/you_get/extractors/douyin.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import json 4 | 5 | from ..common import ( 6 | url_size, 7 | print_info, 8 | get_content, 9 | fake_headers, 10 | download_urls, 11 | playlist_not_supported, 12 | match1, 13 | get_location, 14 | ) 15 | 16 | __all__ = ['douyin_download_by_url'] 17 | 18 | 19 | def get_value(source: dict, path): 20 | try: 21 | value = source 22 | for key in path: 23 | if type(key) is str: 24 | if key in value.keys(): 25 | value = value[key] 26 | else: 27 | value = None 28 | break 29 | elif type(key) is int: 30 | if len(value) != 0: 31 | value = value[key] 32 | else: 33 | value = None 34 | break 35 | except: 36 | value = None 37 | return value 38 | 39 | 40 | def douyin_download_by_url(url, **kwargs): 41 | # if short link, get the real url 42 | if 'v.douyin.com' in url: 43 | url = get_location(url) 44 | aweme_id = match1(url, r'/(\d+)/?') 45 | # get video info 46 | video_info_api = 'https://www.douyin.com/web/api/v2/aweme/iteminfo/?item_ids={}' 47 | url = video_info_api.format(aweme_id) 48 | page_content = get_content(url, headers=fake_headers) 49 | video_info = json.loads(page_content) 50 | 51 | # get video id and title 52 | video_id = get_value(video_info, ['item_list', 0, 'video', 'vid']) 53 | title = get_value(video_info, ['item_list', 0, 'desc']) 54 | 55 | # get video play url 56 | video_url = "https://aweme.snssdk.com/aweme/v1/play/?ratio=720p&line=0&video_id={}".format(video_id) 57 | video_format = 'mp4' 58 | size = url_size(video_url, faker=True) 59 | print_info( 60 | site_info='douyin.com', title=title, 61 | type=video_format, size=size 62 | ) 63 | if not kwargs['info_only']: 64 | download_urls( 65 | urls=[video_url], title=title, ext=video_format, total_size=size, 66 | faker=True, 67 | **kwargs 68 | ) 69 | 70 | 71 | download = douyin_download_by_url 72 | download_playlist = playlist_not_supported('douyin') 73 | -------------------------------------------------------------------------------- /src/you_get/extractors/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .acfun import * 4 | from .alive import * 5 | from .archive import * 6 | from .baidu import * 7 | from .bandcamp import * 8 | from .bigthink import * 9 | from .bilibili import * 10 | from .bokecc import * 11 | from .cbs import * 12 | from .ckplayer import * 13 | from .cntv import * 14 | from .coub import * 15 | from .dailymotion import * 16 | from .douban import * 17 | from .douyin import * 18 | from .douyutv import * 19 | from .ehow import * 20 | from .facebook import * 21 | from .fc2video import * 22 | from .flickr import * 23 | from .freesound import * 24 | from .funshion import * 25 | from .google import * 26 | from .heavymusic import * 27 | from .icourses import * 28 | from .ifeng import * 29 | from .imgur import * 30 | from .infoq import * 31 | from .instagram import * 32 | from .interest import * 33 | from .iqilu import * 34 | from .iqiyi import * 35 | from .joy import * 36 | from .khan import * 37 | from .ku6 import * 38 | from .kakao import * 39 | from .kuaishou import * 40 | from .kugou import * 41 | from .kuwo import * 42 | from .le import * 43 | from .lizhi import * 44 | from .longzhu import * 45 | from .magisto import * 46 | from .metacafe import * 47 | from .mgtv import * 48 | from .miaopai import * 49 | from .miomio import * 50 | from .mixcloud import * 51 | from .mtv81 import * 52 | from .nanagogo import * 53 | from .naver import * 54 | from .netease import * 55 | from .nicovideo import * 56 | from .pinterest import * 57 | from .pixnet import * 58 | from .pptv import * 59 | from .qie import * 60 | from .qingting import * 61 | from .qq import * 62 | from .showroom import * 63 | from .sina import * 64 | from .sohu import * 65 | from .soundcloud import * 66 | from .suntv import * 67 | from .ted import * 68 | from .theplatform import * 69 | from .tiktok import * 70 | from .tucao import * 71 | from .tudou import * 72 | from .tumblr import * 73 | from .twitter import * 74 | from .ucas import * 75 | from .veoh import * 76 | from .vimeo import * 77 | from .vk import * 78 | from .w56 import * 79 | from .wanmen import * 80 | from .xinpianchang import * 81 | from .yixia import * 82 | from .youku import * 83 | from .youtube import * 84 | from .zhanqi import * 85 | from .zhibo import * 86 | from .zhihu import * 87 | -------------------------------------------------------------------------------- /src/you_get/extractors/iwara.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | __all__ = ['iwara_download'] 3 | from ..common import * 4 | headers = { 5 | 'DNT': '1', 6 | 'Accept-Encoding': 'gzip, deflate, sdch, br', 7 | 'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2', 8 | 'Upgrade-Insecure-Requests': '1', 9 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36', 10 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 11 | 'Cache-Control': 'max-age=0', 12 | 'Connection': 'keep-alive', 13 | 'Save-Data': 'on', 14 | 'Cookie':'has_js=1;show_adult=1', 15 | } 16 | stream_types = [ 17 | {'id': 'Source', 'container': 'mp4', 'video_profile': '原始'}, 18 | {'id': '540p', 'container': 'mp4', 'video_profile': '540p'}, 19 | {'id': '360p', 'container': 'mp4', 'video_profile': '360P'}, 20 | ] 21 | def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 22 | global headers 23 | video_hash = match1(url, r'https?://\w+.iwara.tv/videos/(\w+)') 24 | video_url = match1(url, r'(https?://\w+.iwara.tv)/videos/\w+') 25 | html = get_content(url, headers=headers) 26 | title = r1(r'(.*)', html) 27 | api_url = video_url + '/api/video/' + video_hash 28 | content = get_content(api_url, headers=headers) 29 | data = json.loads(content) 30 | if len(data)<1 : 31 | print('Maybe is Private Video?'+'['+title+']') 32 | return True; 33 | down_urls = 'https:' + data[0]['uri'] 34 | type, ext, size = url_info(down_urls, headers=headers) 35 | print_info(site_info, title+data[0]['resolution'], type, size) 36 | 37 | if not info_only: 38 | download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers) 39 | 40 | def download_playlist_by_url( url, **kwargs): 41 | video_page = get_html(url) 42 | url_first=match1(url, r"(http[s]?://[^/]+)") 43 | videos = set(re.findall(r'0): 45 | for video in videos: 46 | iwara_download(url_first+video, **kwargs) 47 | else: 48 | maybe_print('this page not found any videos') 49 | site_info = "Iwara" 50 | download = iwara_download 51 | download_playlist = download_playlist_by_url 52 | -------------------------------------------------------------------------------- /src/you_get/extractors/zhanqi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['zhanqi_download'] 4 | 5 | from ..common import * 6 | import json 7 | import base64 8 | from urllib.parse import urlparse 9 | 10 | def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 11 | path = urlparse(url).path[1:] 12 | 13 | if not (path.startswith('videos') or path.startswith('v2/videos')): #url = "https://www.zhanqi.tv/huashan?param_s=1_0.2.0" 14 | path_list = path.split('/') 15 | room_id = path_list[1] if path_list[0] == 'topic' else path_list[0] 16 | zhanqi_live(room_id, merge=merge, output_dir=output_dir, info_only=info_only, **kwargs) 17 | else: #url = 'https://www.zhanqi.tv/videos/Lyingman/2017/01/182308.html' 18 | # https://www.zhanqi.tv/v2/videos/215593.html 19 | video_id = path.split('.')[0].split('/')[-1] 20 | zhanqi_video(video_id, merge=merge, output_dir=output_dir, info_only=info_only, **kwargs) 21 | 22 | def zhanqi_live(room_id, merge=True, output_dir='.', info_only=False, **kwargs): 23 | api_url = "https://www.zhanqi.tv/api/static/v2.1/room/domain/{}.json".format(room_id) 24 | json_data = json.loads(get_content(api_url))['data'] 25 | status = json_data['status'] 26 | if status != '4': 27 | raise Exception("The live stream is not online!") 28 | 29 | nickname = json_data['nickname'] 30 | title = nickname + ": " + json_data['title'] 31 | video_levels = base64.b64decode(json_data['flashvars']['VideoLevels']).decode('utf8') 32 | m3u8_url = json.loads(video_levels)['streamUrl'] 33 | 34 | print_info(site_info, title, 'm3u8', 0, m3u8_url=m3u8_url, m3u8_type='master') 35 | if not info_only: 36 | download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge) 37 | 38 | def zhanqi_video(video_id, output_dir='.', info_only=False, merge=True, **kwargs): 39 | api_url = 'https://www.zhanqi.tv/api/static/v2.1/video/{}.json'.format(video_id) 40 | json_data = json.loads(get_content(api_url))['data'] 41 | 42 | title = json_data['title'] 43 | vid = json_data['flashvars']['VideoID'] 44 | m3u8_url = 'http://dlvod.cdn.zhanqi.tv/' + vid 45 | urls = general_m3u8_extractor(m3u8_url) 46 | print_info(site_info, title, 'm3u8', 0) 47 | if not info_only: 48 | download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs) 49 | 50 | site_info = "www.zhanqi.tv" 51 | download = zhanqi_download 52 | download_playlist = playlist_not_supported('zhanqi') 53 | -------------------------------------------------------------------------------- /src/you_get/extractors/douban.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['douban_download'] 4 | 5 | import urllib.request, urllib.parse 6 | from ..common import * 7 | 8 | def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 9 | html = get_html(url) 10 | 11 | if re.match(r'https?://movie', url): 12 | title = match1(html, 'name="description" content="([^"]+)') 13 | tid = match1(url, r'trailer/(\d+)') 14 | real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid 15 | type, ext, size = url_info(real_url) 16 | 17 | print_info(site_info, title, type, size) 18 | if not info_only: 19 | download_urls([real_url], title, ext, size, output_dir, merge = merge) 20 | 21 | elif 'subject' in url: 22 | titles = re.findall(r'data-title="([^"]*)">', html) 23 | song_id = re.findall(r'
  • `__ is a tiny command-line utility to 7 | download media contents (videos, audios, images) from the Web, in case 8 | there is no other handy way to do it. 9 | 10 | Here's how you use ``you-get`` to download a video from `this web 11 | page `__: 12 | 13 | .. code:: console 14 | 15 | $ you-get http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society 16 | Site: fsf.org 17 | Title: TEDxGE2014_Stallman05_LQ 18 | Type: WebM video (video/webm) 19 | Size: 27.12 MiB (28435804 Bytes) 20 | 21 | Downloading TEDxGE2014_Stallman05_LQ.webm ... 22 | 100.0% ( 27.1/27.1 MB) ├████████████████████████████████████████┤[1/1] 12 MB/s 23 | 24 | And here's why you might want to use it: 25 | 26 | - You enjoyed something on the Internet, and just want to download them 27 | for your own pleasure. 28 | - You watch your favorite videos online from your computer, but you are 29 | prohibited from saving them. You feel that you have no control over 30 | your own computer. (And it's not how an open Web is supposed to 31 | work.) 32 | - You want to get rid of any closed-source technology or proprietary 33 | JavaScript code, and disallow things like Flash running on your 34 | computer. 35 | - You are an adherent of hacker culture and free software. 36 | 37 | What ``you-get`` can do for you: 38 | 39 | - Download videos / audios from popular websites such as YouTube, 40 | Youku, Niconico, and a bunch more. (See the `full list of supported 41 | sites <#supported-sites>`__) 42 | - Stream an online video in your media player. No web browser, no more 43 | ads. 44 | - Download images (of interest) by scraping a web page. 45 | - Download arbitrary non-HTML contents, i.e., binary files. 46 | 47 | Interested? `Install it <#installation>`__ now and `get started by 48 | examples <#getting-started>`__. 49 | 50 | Are you a Python programmer? Then check out `the 51 | source `__ and fork it! 52 | 53 | .. |PyPI version| image:: https://badge.fury.io/py/you-get.png 54 | :target: http://badge.fury.io/py/you-get 55 | .. |Build Status| image:: https://github.com/soimort/you-get/workflows/develop/badge.svg 56 | :target: https://github.com/soimort/you-get/actions 57 | .. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg 58 | :target: https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge 59 | -------------------------------------------------------------------------------- /src/you_get/extractors/fc2video.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['fc2video_download'] 4 | 5 | from ..common import * 6 | from hashlib import md5 7 | from urllib.parse import urlparse 8 | 9 | #---------------------------------------------------------------------- 10 | def makeMimi(upid): 11 | """From http://cdn37.atwikiimg.com/sitescript/pub/dksitescript/FC2.site.js 12 | Also com.hps.util.fc2.FC2EncrptUtil.makeMimiLocal 13 | L110""" 14 | strSeed = "gGddgPfeaf_gzyr" 15 | prehash = upid + "_" + strSeed 16 | return md5(prehash.encode('utf-8')).hexdigest() 17 | 18 | #---------------------------------------------------------------------- 19 | def fc2video_download_by_upid(upid, output_dir = '.', merge = True, info_only = False, **kwargs): 20 | """""" 21 | fake_headers = { 22 | 'DNT': '1', 23 | 'Accept-Encoding': 'gzip, deflate, sdch', 24 | 'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2', 25 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.58 Safari/537.36', 26 | 'Accept': '*/*', 27 | 'X-Requested-With': 'ShockwaveFlash/19.0.0.245', 28 | 'Connection': 'keep-alive', 29 | } 30 | api_base = 'http://video.fc2.com/ginfo.php?upid={upid}&mimi={mimi}'.format(upid = upid, mimi = makeMimi(upid)) 31 | html = get_content(api_base, headers=fake_headers) 32 | 33 | video_url = match1(html, r'filepath=(.+)&sec') 34 | video_url = video_url.replace('&mid', '?mid') 35 | 36 | title = match1(html, r'&title=([^&]+)') 37 | 38 | type, ext, size = url_info(video_url, headers=fake_headers) 39 | 40 | print_info(site_info, title, type, size) 41 | if not info_only: 42 | download_urls([video_url], title, ext, size, output_dir, merge=merge, headers = fake_headers) 43 | 44 | #---------------------------------------------------------------------- 45 | def fc2video_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 46 | """wrapper""" 47 | #'http://video.fc2.com/en/content/20151021bTVKnbEw' 48 | #'http://xiaojiadianvideo.asia/content/20151021bTVKnbEw' 49 | #'http://video.fc2.com/ja/content/20151021bTVKnbEw' 50 | #'http://video.fc2.com/tw/content/20151021bTVKnbEw' 51 | hostname = urlparse(url).hostname 52 | if not ('fc2.com' in hostname or 'xiaojiadianvideo.asia' in hostname): 53 | return False 54 | upid = match1(url, r'.+/content/(\w+)') 55 | 56 | fc2video_download_by_upid(upid, output_dir, merge, info_only) 57 | 58 | site_info = "FC2Video" 59 | download = fc2video_download 60 | download_playlist = playlist_not_supported('fc2video') 61 | -------------------------------------------------------------------------------- /src/you_get/extractors/vk.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['vk_download'] 4 | 5 | from ..common import * 6 | 7 | 8 | def get_video_info(url): 9 | video_page = get_content(url) 10 | title = r1(r'
    (.[^>]+?)]+?)"', video_page) 12 | 13 | for quality in ['.1080.', '.720.', '.480.', '.360.', '.240.']: 14 | for source in sources: 15 | if source.find(quality) != -1: 16 | url = source 17 | break 18 | assert url 19 | type, ext, size = url_info(url) 20 | print_info(site_info, title, type, size) 21 | 22 | return url, title, ext, size 23 | 24 | 25 | def get_video_from_user_videolist(url): 26 | ep = 'https://vk.com/al_video.php' 27 | to_post = dict(act='show', al=1, module='direct', video=re.search(r'video(\d+_\d+)', url).group(1)) 28 | page = post_content(ep, post_data=to_post) 29 | video_pt = r'(.+?)
    ', page).group(1) 32 | mime, ext, size = url_info(url) 33 | print_info(site_info, title, mime, size) 34 | 35 | return url, title, ext, size 36 | 37 | 38 | def get_image_info(url): 39 | image_page = get_content(url) 40 | # used for title - vk page owner 41 | page_of = re.findall(r'Sender:
    (.[^>]+?)(.[^>]+?)Download full size', image_page) 47 | type, ext, size = url_info(image_link) 48 | print_info(site_info, title, type, size) 49 | 50 | return image_link, title, ext, size 51 | 52 | 53 | def vk_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs): 54 | link = None 55 | if re.match(r'(.+)z\=video(.+)', url): 56 | link, title, ext, size = get_video_info(url) 57 | elif re.match(r'(.+)vk\.com\/photo(.+)', url): 58 | link, title, ext, size = get_image_info(url) 59 | elif re.search(r'vk\.com\/video\d+_\d+', url): 60 | link, title, ext, size = get_video_from_user_videolist(url) 61 | else: 62 | raise NotImplementedError('Nothing to download here') 63 | 64 | if not info_only and link is not None: 65 | download_urls([link], title, ext, size, output_dir, merge=merge) 66 | 67 | 68 | site_info = "VK.com" 69 | download = vk_download 70 | download_playlist = playlist_not_supported('vk') 71 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | 5 | from you_get.extractors import ( 6 | imgur, 7 | magisto, 8 | youtube, 9 | missevan, 10 | acfun, 11 | bilibili, 12 | soundcloud, 13 | tiktok, 14 | twitter, 15 | miaopai 16 | ) 17 | 18 | 19 | class YouGetTests(unittest.TestCase): 20 | def test_imgur(self): 21 | imgur.download('http://imgur.com/WVLk5nD', info_only=True) 22 | imgur.download('https://imgur.com/we-should-have-listened-WVLk5nD', info_only=True) 23 | 24 | def test_magisto(self): 25 | magisto.download( 26 | 'http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA', 27 | info_only=True 28 | ) 29 | 30 | #def test_youtube(self): 31 | #youtube.download( 32 | # 'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True 33 | #) 34 | #youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True) 35 | #youtube.download( 36 | # 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa 37 | # info_only=True 38 | #) 39 | #youtube.download( 40 | # 'https://www.youtube.com/watch?v=oRdxUFDoQe0', info_only=True 41 | #) 42 | 43 | def test_acfun(self): 44 | acfun.download('https://www.acfun.cn/v/ac44560432', info_only=True) 45 | 46 | #def test_bilibili(self): 47 | #bilibili.download('https://www.bilibili.com/video/BV1sL4y177sC', info_only=True) 48 | 49 | #def test_soundcloud(self): 50 | ## single song 51 | #soundcloud.download( 52 | # 'https://soundcloud.com/keiny-pham/impure-bird', info_only=True 53 | #) 54 | ## playlist 55 | #soundcloud.download( 56 | # 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True 57 | #) 58 | 59 | def test_tiktok(self): 60 | tiktok.download('https://www.tiktok.com/@zukky_48/video/7398162058153315605', info_only=True) 61 | tiktok.download('https://www.tiktok.com/@/video/7398162058153315605', info_only=True) 62 | tiktok.download('https://t.tiktok.com/i18n/share/video/7398162058153315605/', info_only=True) 63 | tiktok.download('https://vt.tiktok.com/ZSYKjKt6M/', info_only=True) 64 | 65 | def test_twitter(self): 66 | twitter.download('https://twitter.com/elonmusk/status/1530516552084234244', info_only=True) 67 | twitter.download('https://x.com/elonmusk/status/1530516552084234244', info_only=True) 68 | 69 | def test_weibo(self): 70 | miaopai.download('https://video.weibo.com/show?fid=1034:4825403706245135', info_only=True) 71 | 72 | if __name__ == '__main__': 73 | unittest.main() 74 | -------------------------------------------------------------------------------- /src/you_get/extractors/bigthink.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from ..extractor import VideoExtractor 5 | 6 | import json 7 | 8 | class Bigthink(VideoExtractor): 9 | name = "Bigthink" 10 | 11 | stream_types = [ #this is just a sample. Will make it in prepare() 12 | # {'id': '1080'}, 13 | # {'id': '720'}, 14 | # {'id': '360'}, 15 | # {'id': '288'}, 16 | # {'id': '190'}, 17 | # {'id': '180'}, 18 | 19 | ] 20 | 21 | @staticmethod 22 | def get_streams_by_id(account_number, video_id): 23 | """ 24 | int, int->list 25 | 26 | Get the height of the videos. 27 | 28 | Since brightcove is using 3 kinds of links: rtmp, http and https, 29 | we will be using the HTTPS one to make it secure. 30 | 31 | If somehow akamaihd.net is blocked by the Great Fucking Wall, 32 | change the "startswith https" to http. 33 | """ 34 | endpoint = 'https://edge.api.brightcove.com/playback/v1/accounts/{account_number}/videos/{video_id}'.format(account_number = account_number, video_id = video_id) 35 | fake_header_id = fake_headers 36 | #is this somehow related to the time? Magic.... 37 | fake_header_id['Accept'] ='application/json;pk=BCpkADawqM1cc6wmJQC2tvoXZt4mrB7bFfi6zGt9QnOzprPZcGLE9OMGJwspQwKfuFYuCjAAJ53JdjI8zGFx1ll4rxhYJ255AXH1BQ10rnm34weknpfG-sippyQ' 38 | 39 | html = get_content(endpoint, headers= fake_header_id) 40 | html_json = json.loads(html) 41 | 42 | link_list = [] 43 | 44 | for i in html_json['sources']: 45 | if 'src' in i: #to avoid KeyError 46 | if i['src'].startswith('https'): 47 | link_list.append((str(i['height']), i['src'])) 48 | 49 | return link_list 50 | 51 | def prepare(self, **kwargs): 52 | 53 | html = get_content(self.url) 54 | 55 | self.title = match1(html, r'', home_page)[-1] 13 | 14 | client_id = get_content(js_url) 15 | return re.search(r'client_id:"(.+?)"', client_id).group(1) 16 | 17 | 18 | def get_resource_info(resource_url, client_id): 19 | cont = get_content(resource_url, decoded=True) 20 | 21 | x = re.escape('forEach(function(e){n(e)})}catch(e){}})},') 22 | x = re.search(r'' + x + r'(.*)\);', cont) 23 | 24 | info = json.loads(x.group(1))[-1]['data'][0] 25 | 26 | info = info['tracks'] if info.get('track_count') else [info] 27 | 28 | ids = [i['id'] for i in info if i.get('comment_count') is None] 29 | ids = list(map(str, ids)) 30 | ids_split = ['%2C'.join(ids[i:i+10]) for i in range(0, len(ids), 10)] 31 | api_url = 'https://api-v2.soundcloud.com/tracks?ids={ids}&client_id={client_id}&%5Bobject%20Object%5D=&app_version=1584348206&app_locale=en' 32 | 33 | res = [] 34 | for ids in ids_split: 35 | uri = api_url.format(ids=ids, client_id=client_id) 36 | cont = get_content(uri, decoded=True) 37 | res += json.loads(cont) 38 | 39 | res = iter(res) 40 | info = [next(res) if i.get('comment_count') is None else i for i in info] 41 | 42 | return info 43 | 44 | 45 | def sndcd_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 46 | client_id = get_sndcd_apikey() 47 | 48 | r_info = get_resource_info(url, client_id) 49 | 50 | for info in r_info: 51 | title = info['title'] 52 | metadata = info.get('publisher_metadata') 53 | 54 | transcodings = info['media']['transcodings'] 55 | sq = [i for i in transcodings if i['quality'] == 'sq'] 56 | hq = [i for i in transcodings if i['quality'] == 'hq'] 57 | # source url 58 | surl = sq[0] if hq == [] else hq[0] 59 | surl = surl['url'] 60 | 61 | uri = surl + '?client_id=' + client_id 62 | r = get_content(uri) 63 | surl = json.loads(r)['url'] 64 | 65 | m3u8 = get_content(surl) 66 | # url list 67 | urll = re.findall(r'http.*?(?=\n)', m3u8) 68 | 69 | size = urls_size(urll) 70 | print_info(site_info, title, 'audio/mpeg', size) 71 | print(end='', flush=True) 72 | 73 | if not info_only: 74 | download_urls(urll, title=title, ext='mp3', total_size=size, output_dir=output_dir, merge=True) 75 | 76 | 77 | site_info = "SoundCloud.com" 78 | download = sndcd_download 79 | download_playlist = sndcd_download 80 | -------------------------------------------------------------------------------- /src/you_get/extractors/instagram.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['instagram_download'] 4 | 5 | from ..common import * 6 | 7 | def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | headers = { 9 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.2592.87', 10 | 'sec-fetch-mode': 'navigate' # important 11 | } 12 | 13 | url = r1(r'([^?]*)', url) 14 | cont = get_content(url, headers=headers) 15 | 16 | vid = r1(r'instagram.com/\w+/([^/]+)', url) 17 | description = r1(r'([^<]*)', cont) # with logged-in cookies 19 | title = "{} [{}]".format(description.replace("\n", " "), vid) 20 | 21 | appId = r1(r'"appId":"(\d+)"', cont) 22 | media_id = r1(r'"media_id":"(\d+)"', cont) 23 | logging.debug('appId: %s' % appId) 24 | logging.debug('media_id: %s' % media_id) 25 | 26 | api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id 27 | try: 28 | api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}}) 29 | post = json.loads(api_cont) 30 | except: 31 | log.wtf('[Error] Please specify a cookie file.') 32 | 33 | for item in post['items']: 34 | code = item['code'] 35 | carousel_media = item.get('carousel_media') or [item] 36 | for i, media in enumerate(carousel_media): 37 | title = '%s [%s]' % (code, i) 38 | image_url = media['image_versions2']['candidates'][0]['url'] 39 | ext = image_url.split('?')[0].split('.')[-1] 40 | size = int(get_head(image_url)['Content-Length']) 41 | 42 | print_info(site_info, title, ext, size) 43 | if not info_only: 44 | download_urls(urls=[image_url], 45 | title=title, 46 | ext=ext, 47 | total_size=size, 48 | output_dir=output_dir) 49 | 50 | # download videos (if any) 51 | if 'video_versions' in media: 52 | video_url = media['video_versions'][0]['url'] 53 | ext = video_url.split('?')[0].split('.')[-1] 54 | size = int(get_head(video_url)['Content-Length']) 55 | 56 | print_info(site_info, title, ext, size) 57 | if not info_only: 58 | download_urls(urls=[video_url], 59 | title=title, 60 | ext=ext, 61 | total_size=size, 62 | output_dir=output_dir) 63 | 64 | site_info = "Instagram.com" 65 | download = instagram_download 66 | download_playlist = playlist_not_supported('instagram') 67 | -------------------------------------------------------------------------------- /src/you_get/extractors/lizhi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['lizhi_download'] 4 | import json 5 | import datetime 6 | from ..common import * 7 | 8 | # 9 | # Worked well but not perfect. 10 | # TODO: add option --format={sd|hd} 11 | # 12 | def get_url(ep): 13 | readable = datetime.datetime.fromtimestamp(int(ep['create_time']) / 1000).strftime('%Y/%m/%d') 14 | return 'http://cdn5.lizhi.fm/audio/{}/{}_hd.mp3'.format(readable, ep['id']) 15 | 16 | # radio_id: e.g. 549759 from http://www.lizhi.fm/549759/ 17 | # 18 | # Returns a list of tuples (audio_id, title, url) for each episode 19 | # (audio) in the radio playlist. url is the direct link to the audio 20 | # file. 21 | def lizhi_extract_playlist_info(radio_id): 22 | # /api/radio_audios API parameters: 23 | # 24 | # - s: starting episode 25 | # - l: count (per page) 26 | # - band: radio_id 27 | # 28 | # We use l=65535 for poor man's pagination (that is, no pagination 29 | # at all -- hope all fits on a single page). 30 | # 31 | # TODO: Use /api/radio?band={radio_id} to get number of episodes 32 | # (au_cnt), then handle pagination properly. 33 | api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id 34 | api_response = json.loads(get_content(api_url)) 35 | return [(ep['id'], ep['name'], get_url(ep)) for ep in api_response] 36 | 37 | def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False): 38 | filetype, ext, size = url_info(url) 39 | print_info(site_info, title, filetype, size) 40 | if not info_only: 41 | download_urls([url], title, ext, size, output_dir=output_dir) 42 | 43 | def lizhi_download_playlist(url, output_dir='.', info_only=False, **kwargs): 44 | # Sample URL: http://www.lizhi.fm/549759/ 45 | radio_id = match1(url,r'/(\d+)') 46 | if not radio_id: 47 | raise NotImplementedError('%s not supported' % url) 48 | for audio_id, title, url in lizhi_extract_playlist_info(radio_id): 49 | lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only) 50 | 51 | def lizhi_download(url, output_dir='.', info_only=False, **kwargs): 52 | # Sample URL: http://www.lizhi.fm/549759/18864883431656710/ 53 | m = re.search(r'/(?P\d+)/(?P\d+)', url) 54 | if not m: 55 | raise NotImplementedError('%s not supported' % url) 56 | radio_id = m.group('radio_id') 57 | audio_id = m.group('audio_id') 58 | # Look for the audio_id among the full list of episodes 59 | for aid, title, url in lizhi_extract_playlist_info(radio_id): 60 | if aid == audio_id: 61 | lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only) 62 | break 63 | else: 64 | raise NotImplementedError('Audio #%s not found in playlist #%s' % (audio_id, radio_id)) 65 | 66 | site_info = "lizhi.fm" 67 | download = lizhi_download 68 | download_playlist = lizhi_download_playlist 69 | -------------------------------------------------------------------------------- /src/you_get/extractors/tucao.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['tucao_download'] 4 | from ..common import * 5 | # import re 6 | import random 7 | import time 8 | from xml.dom import minidom 9 | #possible raw list types 10 | #1.
  • type=tudou&vid=199687639
  • 11 | #2.
  • type=tudou&vid=199506910|
  • 12 | #3.
  • type=video&file=http://xiaoshen140731.qiniudn.com/lovestage04.flv|
  • 13 | #4 may ?
  • type=video&file=http://xiaoshen140731.qiniudn.com/lovestage04.flv|xx**type=&vid=?
  • 14 | #5.
  • type=tudou&vid=200003098|07**type=tudou&vid=200000350|08
  • 15 | #6.
  • vid=49454694&type=sina|
  • 16 | #7.
  • type=189&vid=513031813243909|
  • 17 | # re_pattern=re.compile(r"(type=(.+?)&(vid|file)=(.*?))[\|<]") 18 | 19 | def tucao_single_download(type_link, title, output_dir=".", merge=True, info_only=False): 20 | if "file" in type_link: 21 | url=type_link[type_link.find("file=")+5:] 22 | vtype, ext, size=url_info(url) 23 | print_info(site_info, title, vtype, size) 24 | if not info_only: 25 | download_urls([url], title, ext, size, output_dir) 26 | #fix for 189 video source, see raw list types 7 27 | elif "189" in type_link: 28 | vid = match1(type_link, r"vid=(\d+)") 29 | assert vid, "vid not exsits" 30 | url = "http://api.tucao.tv/api/down/{}".format(vid) 31 | vtype, ext, size=url_info(url) 32 | print_info(site_info, title, vtype, size) 33 | if not info_only: 34 | download_urls([url], title, ext, size, output_dir) 35 | else: 36 | u="http://www.tucao.tv/api/playurl.php?{}&key=tucao{:07x}.cc&r={}".format(type_link,random.getrandbits(28),int(time.time()*1000)) 37 | xml=minidom.parseString(get_content(u)) 38 | urls=[] 39 | size=0 40 | for i in xml.getElementsByTagName("url"): 41 | urls.append(i.firstChild.nodeValue) 42 | vtype, ext, _size=url_info(i.firstChild.nodeValue) 43 | size+=_size 44 | print_info(site_info, title, vtype, size) 45 | if not info_only: 46 | download_urls(urls, title, ext, size, output_dir) 47 | 48 | def tucao_download(url, output_dir=".", merge=True, info_only=False, **kwargs): 49 | html=get_content(url) 50 | title=match1(html,r'

    (.*?)<\w') 51 | #fix for raw list that vid goes before type, see raw list types 6 52 | raw_list=match1(html,r"
  • \s*(type=.+?|vid=.+?)
  • ") 53 | raw_l=raw_list.split("**") 54 | if len(raw_l)==1: 55 | format_link=raw_l[0][:-1] if raw_l[0].endswith("|") else raw_l[0] 56 | tucao_single_download(format_link,title,output_dir,merge,info_only) 57 | else: 58 | for i in raw_l: 59 | format_link,sub_title=i.split("|") 60 | tucao_single_download(format_link,title+"-"+sub_title,output_dir,merge,info_only) 61 | 62 | 63 | site_info = "tucao.tv" 64 | download = tucao_download 65 | download_playlist = playlist_not_supported("tucao") 66 | -------------------------------------------------------------------------------- /src/you_get/extractors/longzhu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['longzhu_download'] 4 | 5 | import json 6 | from ..common import ( 7 | get_content, 8 | general_m3u8_extractor, 9 | match1, 10 | print_info, 11 | download_urls, 12 | playlist_not_supported, 13 | ) 14 | from ..common import player 15 | 16 | def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwargs): 17 | web_domain = url.split('/')[2] 18 | if (web_domain == 'star.longzhu.com') or (web_domain == 'y.longzhu.com'): 19 | domain = url.split('/')[3].split('?')[0] 20 | m_url = 'http://m.longzhu.com/{0}'.format(domain) 21 | m_html = get_content(m_url) 22 | room_id_patt = r'var\s*roomId\s*=\s*(\d+);' 23 | room_id = match1(m_html,room_id_patt) 24 | 25 | json_url = 'http://liveapi.plu.cn/liveapp/roomstatus?roomId={0}'.format(room_id) 26 | content = get_content(json_url) 27 | data = json.loads(content) 28 | streamUri = data['streamUri'] 29 | if len(streamUri) <= 4: 30 | raise ValueError('The live stream is not online!') 31 | title = data['title'] 32 | streamer = data['userName'] 33 | title = str.format(streamer,': ',title) 34 | 35 | steam_api_url = 'http://livestream.plu.cn/live/getlivePlayurl?roomId={0}'.format(room_id) 36 | content = get_content(steam_api_url) 37 | data = json.loads(content) 38 | isonline = data.get('isTransfer') 39 | if isonline == '0': 40 | raise ValueError('The live stream is not online!') 41 | 42 | real_url = data['playLines'][0]['urls'][0]['securityUrl'] 43 | 44 | print_info(site_info, title, 'flv', float('inf')) 45 | 46 | if not info_only: 47 | download_urls([real_url], title, 'flv', None, output_dir, merge=merge) 48 | 49 | elif web_domain == 'replay.longzhu.com': 50 | videoid = match1(url, r'(\d+)$') 51 | json_url = 'http://liveapi.longzhu.com/livereplay/getreplayfordisplay?videoId={0}'.format(videoid) 52 | content = get_content(json_url) 53 | data = json.loads(content) 54 | 55 | username = data['userName'] 56 | title = data['title'] 57 | title = str.format(username,':',title) 58 | real_url = data['videoUrl'] 59 | 60 | if player: 61 | print_info('Longzhu Video', title, 'm3u8', 0) 62 | download_urls([real_url], title, 'm3u8', 0, output_dir, merge=merge) 63 | else: 64 | urls = general_m3u8_extractor(real_url) 65 | print_info('Longzhu Video', title, 'm3u8', 0) 66 | if not info_only: 67 | download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs) 68 | 69 | else: 70 | raise ValueError('Wrong url or unsupported link ... {0}'.format(url)) 71 | 72 | site_info = 'longzhu.com' 73 | download = longzhu_download 74 | download_playlist = playlist_not_supported('longzhu') 75 | -------------------------------------------------------------------------------- /src/you_get/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import getopt 4 | import os 5 | import platform 6 | import sys 7 | from .version import script_name, __version__ 8 | from .util import git, log 9 | 10 | _options = [ 11 | 'help', 12 | 'version', 13 | 'gui', 14 | 'force', 15 | 'playlists', 16 | ] 17 | _short_options = 'hVgfl' 18 | 19 | _help = """Usage: {} [OPTION]... [URL]... 20 | TODO 21 | """.format(script_name) 22 | 23 | # TBD 24 | def main_dev(**kwargs): 25 | """Main entry point. 26 | you-get-dev 27 | """ 28 | 29 | # Get (branch, commit) if running from a git repo. 30 | head = git.get_head(kwargs['repo_path']) 31 | 32 | # Get options and arguments. 33 | try: 34 | opts, args = getopt.getopt(sys.argv[1:], _short_options, _options) 35 | except getopt.GetoptError as e: 36 | log.wtf(""" 37 | [Fatal] {}. 38 | Try '{} --help' for more options.""".format(e, script_name)) 39 | 40 | if not opts and not args: 41 | # Display help. 42 | print(_help) 43 | # Enter GUI mode. 44 | #from .gui import gui_main 45 | #gui_main() 46 | else: 47 | conf = {} 48 | for opt, arg in opts: 49 | if opt in ('-h', '--help'): 50 | # Display help. 51 | print(_help) 52 | 53 | elif opt in ('-V', '--version'): 54 | # Display version. 55 | log.println("you-get:", log.BOLD) 56 | log.println(" version: {}".format(__version__)) 57 | if head is not None: 58 | log.println(" branch: {}\n commit: {}".format(*head)) 59 | else: 60 | log.println(" branch: {}\n commit: {}".format("(stable)", "(tag v{})".format(__version__))) 61 | 62 | log.println(" platform: {}".format(platform.platform())) 63 | log.println(" python: {}".format(sys.version.split('\n')[0])) 64 | 65 | elif opt in ('-g', '--gui'): 66 | # Run using GUI. 67 | conf['gui'] = True 68 | 69 | elif opt in ('-f', '--force'): 70 | # Force download. 71 | conf['force'] = True 72 | 73 | elif opt in ('-l', '--playlist', '--playlists'): 74 | # Download playlist whenever possible. 75 | conf['playlist'] = True 76 | 77 | if args: 78 | if 'gui' in conf and conf['gui']: 79 | # Enter GUI mode. 80 | from .gui import gui_main 81 | gui_main(*args, **conf) 82 | else: 83 | # Enter console mode. 84 | from .console import console_main 85 | console_main(*args, **conf) 86 | 87 | def main(**kwargs): 88 | """Main entry point. 89 | you-get (legacy) 90 | """ 91 | from .common import main 92 | main(**kwargs) 93 | 94 | if __name__ == '__main__': 95 | main() 96 | -------------------------------------------------------------------------------- /src/you_get/extractors/cntv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json 4 | import re 5 | 6 | from ..common import get_content, r1, match1, playlist_not_supported 7 | from ..extractor import VideoExtractor 8 | 9 | __all__ = ['cntv_download', 'cntv_download_by_id'] 10 | 11 | 12 | class CNTV(VideoExtractor): 13 | name = 'CNTV.com' 14 | stream_types = [ 15 | {'id': '1', 'video_profile': '1280x720_2000kb/s', 'map_to': 'chapters4'}, 16 | {'id': '2', 'video_profile': '1280x720_1200kb/s', 'map_to': 'chapters3'}, 17 | {'id': '3', 'video_profile': '640x360_850kb/s', 'map_to': 'chapters2'}, 18 | {'id': '4', 'video_profile': '480x270_450kb/s', 'map_to': 'chapters'}, 19 | {'id': '5', 'video_profile': '320x180_200kb/s', 'map_to': 'lowChapters'}, 20 | ] 21 | 22 | ep = 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid={}' 23 | 24 | def __init__(self): 25 | super().__init__() 26 | self.api_data = None 27 | 28 | def prepare(self, **kwargs): 29 | self.api_data = json.loads(get_content(self.__class__.ep.format(self.vid))) 30 | self.title = self.api_data['title'] 31 | for s in self.api_data['video']: 32 | for st in self.__class__.stream_types: 33 | if st['map_to'] == s: 34 | urls = self.api_data['video'][s] 35 | src = [u['url'] for u in urls] 36 | stream_data = dict(src=src, size=0, container='mp4', video_profile=st['video_profile']) 37 | self.streams[st['id']] = stream_data 38 | 39 | 40 | def cntv_download_by_id(rid, **kwargs): 41 | CNTV().download_by_vid(rid, **kwargs) 42 | 43 | 44 | def cntv_download(url, **kwargs): 45 | if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url): 46 | rid = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)') 47 | elif re.match(r'http(s)?://tv\.cctv\.com/\d+/\d+/\d+/\w+.shtml', url): 48 | rid = r1(r'var guid = "(\w+)"', get_content(url)) 49 | elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \ 50 | re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \ 51 | re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \ 52 | re.match(r'http(s)?://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \ 53 | re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): 54 | page = get_content(url) 55 | rid = r1(r'videoCenterId","(\w+)"', page) 56 | if rid is None: 57 | guid = re.search(r'guid\s*=\s*"([0-9a-z]+)"', page).group(1) 58 | rid = guid 59 | elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): 60 | rid = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url) 61 | else: 62 | raise NotImplementedError(url) 63 | 64 | CNTV().download_by_vid(rid, **kwargs) 65 | 66 | site_info = "CNTV.com" 67 | download = cntv_download 68 | download_playlist = playlist_not_supported('cntv') 69 | -------------------------------------------------------------------------------- /src/you_get/extractors/toutiao.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import binascii 3 | import random 4 | from json import loads 5 | from urllib.parse import urlparse 6 | 7 | from ..common import * 8 | 9 | try: 10 | from base64 import decodebytes 11 | except ImportError: 12 | from base64 import decodestring 13 | 14 | decodebytes = decodestring 15 | 16 | __all__ = ['toutiao_download', ] 17 | 18 | 19 | def random_with_n_digits(n): 20 | return random.randint(10 ** (n - 1), (10 ** n) - 1) 21 | 22 | 23 | def sign_video_url(vid): 24 | r = str(random_with_n_digits(16)) 25 | 26 | url = 'https://ib.365yg.com/video/urls/v/1/toutiao/mp4/{vid}'.format(vid=vid) 27 | n = urlparse(url).path + '?r=' + r 28 | b_n = bytes(n, encoding="utf-8") 29 | s = binascii.crc32(b_n) 30 | aid = 1364 31 | ts = int(time.time() * 1000) 32 | return url + '?r={r}&s={s}&aid={aid}&vfrom=xgplayer&callback=axiosJsonpCallback1&_={ts}'.format(r=r, s=s, aid=aid, 33 | ts=ts) 34 | 35 | 36 | class ToutiaoVideoInfo(object): 37 | 38 | def __init__(self): 39 | self.bitrate = None 40 | self.definition = None 41 | self.size = None 42 | self.height = None 43 | self.width = None 44 | self.type = None 45 | self.url = None 46 | 47 | def __str__(self): 48 | return json.dumps(self.__dict__) 49 | 50 | 51 | def get_file_by_vid(video_id): 52 | vRet = [] 53 | url = sign_video_url(video_id) 54 | ret = get_content(url) 55 | ret = loads(ret[20:-1]) 56 | vlist = ret.get('data').get('video_list') 57 | if len(vlist) > 0: 58 | vInfo = vlist.get(sorted(vlist.keys(), reverse=True)[0]) 59 | vUrl = vInfo.get('main_url') 60 | vUrl = decodebytes(vUrl.encode('ascii')).decode('ascii') 61 | videoInfo = ToutiaoVideoInfo() 62 | videoInfo.bitrate = vInfo.get('bitrate') 63 | videoInfo.definition = vInfo.get('definition') 64 | videoInfo.size = vInfo.get('size') 65 | videoInfo.height = vInfo.get('vheight') 66 | videoInfo.width = vInfo.get('vwidth') 67 | videoInfo.type = vInfo.get('vtype') 68 | videoInfo.url = vUrl 69 | vRet.append(videoInfo) 70 | return vRet 71 | 72 | 73 | def toutiao_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 74 | html = get_html(url, faker=True) 75 | video_id = match1(html, r".*?videoId: '(?P.*)'") 76 | title = match1(html, '.*?(?P<title>.*?)') 77 | video_file_list = get_file_by_vid(video_id) # 调api获取视频源文件 78 | type, ext, size = url_info(video_file_list[0].url, faker=True) 79 | print_info(site_info=site_info, title=title, type=type, size=size) 80 | if not info_only: 81 | download_urls([video_file_list[0].url], title, ext, size, output_dir, merge=merge, faker=True) 82 | 83 | 84 | site_info = "Toutiao.com" 85 | download = toutiao_download 86 | download_playlist = playlist_not_supported("toutiao") 87 | -------------------------------------------------------------------------------- /src/you_get/extractors/sohu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['sohu_download'] 4 | 5 | from ..common import * 6 | 7 | import json 8 | 9 | ''' 10 | Changelog: 11 | 1. http://tv.sohu.com/upload/swf/20150604/Main.swf 12 | new api 13 | ''' 14 | 15 | 16 | def real_url(fileName, key, ch): 17 | url = "https://data.vod.itc.cn/ip?new=" + fileName + "&num=1&key=" + key + "&ch=" + ch + "&pt=1&pg=2&prod=h5n" 18 | return json.loads(get_html(url))['servers'][0]['url'] 19 | 20 | 21 | def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_proxy=None, **kwargs): 22 | if re.match(r'http://share.vrs.sohu.com', url): 23 | vid = r1(r'id=(\d+)', url) 24 | else: 25 | html = get_html(url) 26 | vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html) 27 | assert vid 28 | 29 | if extractor_proxy: 30 | set_proxy(tuple(extractor_proxy.split(":"))) 31 | info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) 32 | if info and info.get("data", ""): 33 | for qtyp in ["oriVid", "superVid", "highVid", "norVid", "relativeId"]: 34 | if 'data' in info: 35 | hqvid = info['data'][qtyp] 36 | else: 37 | hqvid = info[qtyp] 38 | if hqvid != 0 and hqvid != vid: 39 | info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) 40 | if not 'allot' in info: 41 | continue 42 | break 43 | if extractor_proxy: 44 | unset_proxy() 45 | host = info['allot'] 46 | prot = info['prot'] 47 | tvid = info['tvid'] 48 | urls = [] 49 | data = info['data'] 50 | title = data['tvName'] 51 | size = sum(data['clipsBytes']) 52 | assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) 53 | for fileName, key in zip(data['su'], data['ck']): 54 | urls.append(real_url(fileName, key, data['ch'])) 55 | # assert data['clipsURL'][0].endswith('.mp4') 56 | 57 | else: 58 | info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) 59 | host = info['allot'] 60 | prot = info['prot'] 61 | tvid = info['tvid'] 62 | urls = [] 63 | data = info['data'] 64 | title = data['tvName'] 65 | size = sum(map(int, data['clipsBytes'])) 66 | assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) 67 | for fileName, key in zip(data['su'], data['ck']): 68 | urls.append(real_url(fileName, key, data['ch'])) 69 | 70 | print_info(site_info, title, 'mp4', size) 71 | if not info_only: 72 | download_urls(urls, title, 'mp4', size, output_dir, refer=url, merge=merge) 73 | 74 | 75 | site_info = "Sohu.com" 76 | download = sohu_download 77 | download_playlist = playlist_not_supported('sohu') 78 | -------------------------------------------------------------------------------- /src/you_get/extractors/douyutv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['douyutv_download'] 4 | 5 | from ..common import * 6 | from ..util.log import * 7 | import json 8 | import hashlib 9 | import time 10 | import re 11 | 12 | headers = { 13 | 'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4' 14 | } 15 | 16 | def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 17 | ep = 'http://vmobile.douyu.com/video/getInfo?vid=' 18 | patt = r'show/([0-9A-Za-z]+)' 19 | title_patt = r'

    (.+?)

    ' 20 | 21 | hit = re.search(patt, url) 22 | if hit is None: 23 | log.wtf('Unknown url pattern') 24 | vid = hit.group(1) 25 | 26 | page = get_content(url, headers=headers) 27 | hit = re.search(title_patt, page) 28 | if hit is None: 29 | title = vid 30 | else: 31 | title = hit.group(1) 32 | 33 | meta = json.loads(get_content(ep + vid)) 34 | if meta['error'] != 0: 35 | log.wtf('Error from API server') 36 | m3u8_url = meta['data']['video_url'] 37 | print_info('Douyu Video', title, 'm3u8', 0, m3u8_url=m3u8_url) 38 | if not info_only: 39 | urls = general_m3u8_extractor(m3u8_url) 40 | download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs) 41 | 42 | 43 | def douyutv_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 44 | if 'v.douyu.com/show/' in url: 45 | douyutv_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) 46 | return 47 | 48 | url = re.sub(r'.*douyu.com','https://m.douyu.com/room', url) 49 | html = get_content(url, headers) 50 | room_id_patt = r'"rid"\s*:\s*(\d+),' 51 | room_id = match1(html, room_id_patt) 52 | if room_id == "0": 53 | room_id = url[url.rfind('/') + 1:] 54 | 55 | api_url = "http://www.douyutv.com/api/v1/" 56 | args = "room/%s?aid=wp&client_sys=wp&time=%d" % (room_id, int(time.time())) 57 | auth_md5 = (args + "zNzMV1y4EMxOHS6I5WKm").encode("utf-8") 58 | auth_str = hashlib.md5(auth_md5).hexdigest() 59 | json_request_url = "%s%s&auth=%s" % (api_url, args, auth_str) 60 | 61 | content = get_content(json_request_url, headers) 62 | json_content = json.loads(content) 63 | data = json_content['data'] 64 | server_status = json_content.get('error', 0) 65 | if server_status != 0: 66 | raise ValueError("Server returned error:%s" % server_status) 67 | 68 | title = data.get('room_name') 69 | show_status = data.get('show_status') 70 | if show_status != "1": 71 | raise ValueError("The live stream is not online! (Errno:%s)" % server_status) 72 | 73 | real_url = data.get('rtmp_url') + '/' + data.get('rtmp_live') 74 | 75 | print_info(site_info, title, 'flv', float('inf')) 76 | if not info_only: 77 | download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge) 78 | 79 | 80 | site_info = "douyu.com" 81 | download = douyutv_download 82 | download_playlist = playlist_not_supported('douyu') 83 | -------------------------------------------------------------------------------- /src/you_get/extractors/ku6.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['ku6_download', 'ku6_download_by_id'] 4 | 5 | from ..common import * 6 | 7 | import json 8 | import re 9 | 10 | def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): 11 | data = json.loads(get_html('http://v.ku6.com/fetchVideo4Player/%s...html' % id))['data'] 12 | t = data['t'] 13 | f = data['f'] 14 | title = title or t 15 | assert title 16 | urls = f.split(',') 17 | ext = match1(urls[0], r'.*\.(\w+)\??[^\.]*') 18 | assert ext in ('flv', 'mp4', 'f4v'), ext 19 | ext = {'f4v': 'flv'}.get(ext, ext) 20 | size = 0 21 | for url in urls: 22 | _, _, temp = url_info(url) 23 | size += temp 24 | 25 | print_info(site_info, title, ext, size) 26 | if not info_only: 27 | download_urls(urls, title, ext, size, output_dir, merge = merge) 28 | 29 | def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 30 | id = None 31 | 32 | if match1(url, r'http://baidu.ku6.com/watch/(.*)\.html') is not None: 33 | id = baidu_ku6(url) 34 | else: 35 | patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html', 36 | r'http://v.ku6.com/show/(.*)\.\.\.html', 37 | r'http://my.ku6.com/watch\?.*v=(.*)\.\..*'] 38 | id = r1_of(patterns, url) 39 | 40 | if id is None: 41 | # http://www.ku6.com/2017/detail-zt.html?vid=xvqTmvZrH8MNvErpvRxFn3 42 | page = get_content(url) 43 | meta = re.search(r'detailDataMap=(\{.+?\});', page) 44 | if meta is not None: 45 | meta = meta.group(1) 46 | else: 47 | raise Exception('Unsupported url') 48 | vid = re.search(r'vid=([^&]+)', url) 49 | if vid is not None: 50 | vid = vid.group(1) 51 | else: 52 | raise Exception('Unsupported url') 53 | this_meta = re.search('"?'+vid+r'"?:\{(.+?)\}', meta) 54 | if this_meta is not None: 55 | this_meta = this_meta.group(1) 56 | title = re.search('title:"(.+?)"', this_meta).group(1) 57 | video_url = re.search('playUrl:"(.+?)"', this_meta).group(1) 58 | video_size = url_size(video_url) 59 | print_info(site_info, title, 'mp4', video_size) 60 | if not info_only: 61 | download_urls([video_url], title, 'mp4', video_size, output_dir, merge=merge, **kwargs) 62 | return 63 | 64 | ku6_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only) 65 | 66 | def baidu_ku6(url): 67 | id = None 68 | 69 | h1 = get_html(url) 70 | isrc = match1(h1, r'