├── tests
    ├── __init__.py
    ├── test_util.py
    ├── test_common.py
    └── test.py
├── src
    └── you_get
    │   ├── util
    │       ├── __init__.py
    │       ├── term.py
    │       ├── strings.py
    │       ├── fs.py
    │       ├── git.py
    │       └── log.py
    │   ├── cli_wrapper
    │       ├── __init__.py
    │       ├── player
    │       │   ├── wmp.py
    │       │   ├── mplayer.py
    │       │   ├── dragonplayer.py
    │       │   ├── gnome_mplayer.py
    │       │   ├── vlc.py
    │       │   ├── __init__.py
    │       │   └── __main__.py
    │       ├── openssl
    │       │   └── __init__.py
    │       ├── transcoder
    │       │   ├── libav.py
    │       │   ├── __init__.py
    │       │   ├── ffmpeg.py
    │       │   └── mencoder.py
    │       └── downloader
    │       │   └── __init__.py
    │   ├── version.py
    │   ├── processor
    │       ├── __init__.py
    │       ├── join_ts.py
    │       └── rtmpdump.py
    │   ├── __init__.py
    │   ├── extractors
    │       ├── khan.py
    │       ├── alive.py
    │       ├── archive.py
    │       ├── cbs.py
    │       ├── freesound.py
    │       ├── bandcamp.py
    │       ├── magisto.py
    │       ├── quanmin.py
    │       ├── heavymusic.py
    │       ├── ted.py
    │       ├── giphy.py
    │       ├── metacafe.py
    │       ├── mixcloud.py
    │       ├── iqilu.py
    │       ├── douyin.py
    │       ├── theplatform.py
    │       ├── facebook.py
    │       ├── huomaotv.py
    │       ├── musicplayon.py
    │       ├── interest.py
    │       ├── ehow.py
    │       ├── vine.py
    │       ├── vidto.py
    │       ├── baomihua.py
    │       ├── dailymotion.py
    │       ├── yizhibo.py
    │       ├── naver.py
    │       ├── kuaishou.py
    │       ├── suntv.py
    │       ├── iwara.py
    │       ├── kuwo.py
    │       ├── veoh.py
    │       ├── w56.py
    │       ├── joy.py
    │       ├── panda.py
    │       ├── videomega.py
    │       ├── qingting.py
    │       ├── soundcloud.py
    │       ├── mtv81.py
    │       ├── qq_egame.py
    │       ├── pinterest.py
    │       ├── ifeng.py
    │       ├── nicovideo.py
    │       ├── yinyuetai.py
    │       ├── fantasy.py
    │       ├── infoq.py
    │       ├── nanagogo.py
    │       ├── miomio.py
    │       ├── kugou.py
    │       ├── miaopai.py
    │       ├── __init__.py
    │       ├── zhanqi.py
    │       ├── instagram.py
    │       ├── douban.py
    │       ├── lizhi.py
    │       ├── huaban.py
    │       ├── toutiao.py
    │       ├── fc2video.py
    │       ├── vk.py
    │       ├── bigthink.py
    │       ├── longzhu.py
    │       ├── tucao.py
    │       ├── cntv.py
    │       ├── imgur.py
    │       ├── douyutv.py
    │       ├── ku6.py
    │       ├── qie_video.py
    │       ├── sohu.py
    │       ├── bokecc.py
    │       ├── showroom.py
    │       ├── pixnet.py
    │       ├── qie.py
    │       ├── yixia.py
    │       ├── ckplayer.py
    │       ├── ixigua.py
    │       ├── twitter.py
    │       ├── coub.py
    │       ├── ximalaya.py
    │       ├── dilidili.py
    │       ├── tudou.py
    │       └── universal.py
    │   ├── json_output.py
    │   └── __main__.py
├── MANIFEST.in
├── setup.cfg
├── you-get.plugin.zsh
├── you-get
├── .travis.yml
├── Makefile
├── contrib
    └── completion
    │   ├── you-get-completion.bash
    │   ├── _you-get
    │   └── you-get.fish
├── .gitignore
├── LICENSE.txt
├── setup.py
├── you-get.json
├── CONTRIBUTING.md
├── .github
    ├── ISSUE_TEMPLATE.md
    └── PULL_REQUEST_TEMPLATE.md
└── README.rst


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/wmp.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/openssl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/mplayer.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/transcoder/libav.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/downloader/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/dragonplayer.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/gnome_mplayer.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/transcoder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/transcoder/ffmpeg.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/transcoder/mencoder.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/vlc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | from .mplayer import *
4 | 


--------------------------------------------------------------------------------
/src/you_get/version.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | script_name = 'you-get'
4 | __version__ = '0.4.1040'
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst
2 | include *.txt
3 | include Makefile
4 | include README.md
5 | include you-get
6 | include you-get.json
7 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [build]
 2 | force = 0
 3 | 
 4 | [global]
 5 | verbose = 0
 6 | 
 7 | [egg_info]
 8 | tag_build = 
 9 | tag_date = 0
10 | tag_svn_revision = 0
11 | 


--------------------------------------------------------------------------------
/you-get.plugin.zsh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env zsh
2 | alias you-get="noglob python3 $(dirname $0)/you-get"
3 | alias you-vlc="noglob python3 $(dirname $0)/you-get --player vlc"
4 | 


--------------------------------------------------------------------------------
/src/you_get/processor/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | from .join_flv import concat_flv
4 | from .join_mp4 import concat_mp4
5 | from .ffmpeg import *
6 | from .rtmpdump import *
7 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | ''' WIP
 4 | def main():
 5 |     script_main('you-get', any_download, any_download_playlist)
 6 | 
 7 | if __name__ == "__main__":
 8 |     main()
 9 | '''
10 | 


--------------------------------------------------------------------------------
/src/you_get/util/term.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | def get_terminal_size():
 4 |     """Get (width, height) of the current terminal."""
 5 |     try:
 6 |         import fcntl, termios, struct # fcntl module only available on Unix
 7 |         return struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234'))
 8 |     except:
 9 |         return (40, 80)
10 | 


--------------------------------------------------------------------------------
/tests/test_util.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import unittest
 4 | 
 5 | from you_get.util.fs import *
 6 | 
 7 | class TestUtil(unittest.TestCase):
 8 |     def test_legitimize(self):
 9 |         self.assertEqual(legitimize("1*2", os="Linux"), "1*2")
10 |         self.assertEqual(legitimize("1*2", os="Darwin"), "1*2")
11 |         self.assertEqual(legitimize("1*2", os="Windows"), "1-2")
12 | 


--------------------------------------------------------------------------------
/tests/test_common.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import unittest
 4 | 
 5 | from you_get.common import *
 6 | 
 7 | class TestCommon(unittest.TestCase):
 8 |     
 9 |     def test_match1(self):
10 |         self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)'), '1234567890A')
11 |         self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)', r'youtu.(\w+)'), ['1234567890A', 'be'])
12 | 


--------------------------------------------------------------------------------
/src/you_get/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # This file is Python 2 compliant.
 3 | 
 4 | import sys
 5 | 
 6 | if sys.version_info[0] == 3:
 7 |     #from .extractor import Extractor, VideoExtractor
 8 |     #from .util import log
 9 | 
10 |     from .__main__ import *
11 | 
12 |     #from .common import *
13 |     #from .version import *
14 |     #from .cli_wrapper import *
15 |     #from .extractor import *
16 | else:
17 |     # Don't import anything.
18 |     pass
19 | 


--------------------------------------------------------------------------------
/you-get:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os, sys
 3 | 
 4 | _srcdir = '%s/src/' % os.path.dirname(os.path.realpath(__file__))
 5 | _filepath = os.path.dirname(sys.argv[0])
 6 | sys.path.insert(1, os.path.join(_filepath, _srcdir))
 7 | 
 8 | if sys.version_info[0] == 3:
 9 |     import you_get
10 |     if __name__ == '__main__':
11 |         you_get.main(repo_path=_filepath)
12 | else: # Python 2
13 |     from you_get.util import log
14 |     log.e("[fatal] Python 3 is required!")
15 |     log.wtf("try to run this script using 'python3 you-get'.")
16 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # https://travis-ci.org/soimort/you-get
 2 | language: python
 3 | python:
 4 |   - "3.2"
 5 |   - "3.3"
 6 |   - "3.4"
 7 |   - "3.5"
 8 |   - "3.6"
 9 |   - "nightly"
10 |   - "pypy3"
11 | script: make test
12 | sudo: false
13 | notifications:
14 |   webhooks:
15 |     urls:
16 |       - https://webhooks.gitter.im/e/43cd57826e88ed8f2152
17 |     on_success: change  # options: [always|never|change] default: always
18 |     on_failure: always  # options: [always|never|change] default: always
19 |     on_start: never     # options: [always|never|change] default: always
20 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/khan.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['khan_download']
 4 | 
 5 | from ..common import *
 6 | from .youtube import YouTube
 7 | 
 8 | def khan_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 9 |     html = get_content(url)
10 |     youtube_url = re.search('<meta property="og:video" content="([^"]+)', html).group(1)
11 |     YouTube().download_by_url(youtube_url, output_dir=output_dir, merge=merge, info_only=info_only)
12 | 
13 | site_info = "khanacademy.org"
14 | download = khan_download
15 | download_playlist = playlist_not_supported('khan')
16 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/alive.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['alive_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def alive_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 8 |     html = get_html(url)
 9 |     
10 |     title = r1(r'<meta property="og:title" content="([^"]+)"', html)
11 |     
12 |     url = r1(r'file: "(http://alive[^"]+)"', html)
13 |     type, ext, size = url_info(url)
14 |     
15 |     print_info(site_info, title, type, size)
16 |     if not info_only:
17 |         download_urls([url], title, ext, size, output_dir, merge = merge)
18 | 
19 | site_info = "Alive.in.th"
20 | download = alive_download
21 | download_playlist = playlist_not_supported('alive')
22 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/archive.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['archive_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def archive_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     html = get_html(url)
 9 |     title = r1(r'<meta property="og:title" content="([^"]*)"', html)
10 |     source = r1(r'<meta property="og:video" content="([^"]*)"', html)
11 |     mime, ext, size = url_info(source)
12 | 
13 |     print_info(site_info, title, mime, size)
14 |     if not info_only:
15 |         download_urls([source], title, ext, size, output_dir, merge=merge)
16 | 
17 | site_info = "Archive.org"
18 | download = archive_download
19 | download_playlist = playlist_not_supported('archive')
20 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/cbs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['cbs_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | from .theplatform import theplatform_download_by_pid
 8 | 
 9 | def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
10 |     """Downloads CBS videos by URL.
11 |     """
12 | 
13 |     html = get_content(url)
14 |     pid = match1(html, r'video\.settings\.pid\s*=\s*\'([^\']+)\'')
15 |     title = match1(html, r'video\.settings\.title\s*=\s*\"([^\"]+)\"')
16 | 
17 |     theplatform_download_by_pid(pid, title, output_dir=output_dir, merge=merge, info_only=info_only)
18 | 
19 | site_info = "CBS.com"
20 | download = cbs_download
21 | download_playlist = playlist_not_supported('cbs')
22 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/freesound.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['freesound_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def freesound_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 8 |     page = get_html(url)
 9 |     
10 |     title = r1(r'<meta property="og:title" content="([^"]*)"', page)
11 |     preview_url = r1(r'<meta property="og:audio" content="([^"]*)"', page)
12 |     
13 |     type, ext, size = url_info(preview_url)
14 |     
15 |     print_info(site_info, title, type, size)
16 |     if not info_only:
17 |         download_urls([preview_url], title, ext, size, output_dir, merge = merge)
18 | 
19 | site_info = "Freesound.org"
20 | download = freesound_download
21 | download_playlist = playlist_not_supported('freesound')
22 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/bandcamp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['bandcamp_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def bandcamp_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     html = get_html(url)
 9 |     trackinfo = json.loads(r1(r'(\[{"(video_poster_url|video_caption)".*}\]),', html))
10 |     for track in trackinfo:
11 |         track_num = track['track_num']
12 |         title = '%s. %s' % (track_num, track['title'])
13 |         file_url = 'http:' + track['file']['mp3-128']
14 |         mime, ext, size = url_info(file_url)
15 | 
16 |         print_info(site_info, title, mime, size)
17 |         if not info_only:
18 |             download_urls([file_url], title, ext, size, output_dir, merge=merge)
19 | 
20 | site_info = "Bandcamp.com"
21 | download = bandcamp_download
22 | download_playlist = bandcamp_download
23 | 


--------------------------------------------------------------------------------
/src/you_get/util/strings.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     # py 3.4
 3 |     from html import unescape as unescape_html
 4 | except ImportError:
 5 |     import re
 6 |     from html.entities import entitydefs
 7 | 
 8 |     def unescape_html(string):
 9 |         '''HTML entity decode'''
10 |         string = re.sub(r'&#[^;]+;', _sharp2uni, string)
11 |         string = re.sub(r'&[^;]+;', lambda m: entitydefs[m.group(0)[1:-1]], string)
12 |         return string
13 | 
14 |     def _sharp2uni(m):
15 |         '''&#...; ==> unicode'''
16 |         s = m.group(0)[2:].rstrip(';；')
17 |         if s.startswith('x'):
18 |             return chr(int('0'+s, 16))
19 |         else:
20 |             return chr(int(s))
21 | 
22 | from .fs import legitimize
23 | 
24 | def get_filename(htmlstring):
25 |     return legitimize(unescape_html(htmlstring))
26 | 
27 | def parameterize(string):
28 |     return "'%s'" % string.replace("'", r"'\''")
29 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/magisto.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['magisto_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 9 |     html = get_html(url)
10 |     
11 |     video_hash = r1(r'video\/([a-zA-Z0-9]+)', url)
12 |     api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash)
13 |     content = get_html(api_url)
14 |     data = json.loads(content)
15 |     title1 = data['title']
16 |     title2 = data['creator']
17 |     title = "%s - %s" % (title1, title2)
18 |     url = data['video_direct_url']
19 |     type, ext, size = url_info(url)
20 | 
21 |     print_info(site_info, title, type, size)
22 |     if not info_only:
23 |         download_urls([url], title, ext, size, output_dir, merge=merge)
24 | 
25 | site_info = "Magisto.com"
26 | download = magisto_download
27 | download_playlist = playlist_not_supported('magisto')
28 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/quanmin.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['quanmin_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | def quanmin_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 9 |     roomid = url.split('/')[3].split('?')[0]
10 | 
11 |     json_request_url = 'http://m.quanmin.tv/json/rooms/{}/noinfo6.json'.format(roomid)
12 |     content = get_html(json_request_url)
13 |     data = json.loads(content)
14 | 
15 |     title = data["title"]
16 | 
17 |     if not data["play_status"]:
18 |         raise ValueError("The live stream is not online!")
19 |         
20 |     real_url = data["live"]["ws"]["flv"]["5"]["src"]
21 | 
22 |     print_info(site_info, title, 'flv', float('inf'))
23 |     if not info_only:
24 |         download_urls([real_url], title, 'flv', None, output_dir, merge = merge)
25 | 
26 | site_info = "quanmin.tv"
27 | download = quanmin_download
28 | download_playlist = playlist_not_supported('quanmin')
29 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/heavymusic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['heavymusic_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def heavymusic_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     html = get_html(url)
 9 |     tracks = re.findall(r'href="(online2\.php[^"]+)"', html)
10 |     for track in tracks:
11 |         band = r1(r'band=([^&]*)', track)
12 |         album = r1(r'album=([^&]*)', track)
13 |         title = r1(r'track=([^&]*)', track)
14 |         file_url = 'http://www.heavy-music.ru/online2.php?band=%s&album=%s&track=%s' % (parse.quote(band), parse.quote(album), parse.quote(title))
15 |         _, _, size = url_info(file_url)
16 | 
17 |         print_info(site_info, title, 'mp3', size)
18 |         if not info_only:
19 |             download_urls([file_url], title[:-4], 'mp3', size, output_dir, merge=merge)
20 | 
21 | site_info = "heavy-music.ru"
22 | download = heavymusic_download
23 | download_playlist = heavymusic_download
24 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ted.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['ted_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 9 |     html = get_html(url)
10 |     patt = r'"__INITIAL_DATA__"\s*:\s*\{(.+)\}'
11 |     metadata = json.loads('{' + match1(html, patt) + '}')
12 |     title = metadata['talks'][0]['title']
13 |     nativeDownloads = metadata['talks'][0]['downloads']['nativeDownloads']
14 |     for quality in ['high', 'medium', 'low']:
15 |         if quality in nativeDownloads:
16 |             url = nativeDownloads[quality]
17 |             type, ext, size = url_info(url)
18 |             print_info(site_info, title, type, size)
19 |             if not info_only:
20 |                 download_urls([url], title, ext, size, output_dir, merge=merge)
21 |             break
22 | 
23 | site_info = "TED.com"
24 | download = ted_download
25 | download_playlist = playlist_not_supported('ted')
26 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/giphy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['giphy_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | def giphy_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 9 |     html = get_html(url)
10 | 
11 |     url = list(set([
12 |         unicodize(str.replace(i, '\\/', '/'))
13 |         for i in re.findall(r'<meta property="og:video:secure_url" content="(.*?)">', html)
14 |     ]))
15 | 
16 |     title = r1(r'<meta property="og:title" content="(.*?)">', html)
17 | 
18 |     if title is None:
19 |       title = url[0]
20 | 
21 |     type, ext, size = url_info(url[0], True)
22 |     size = urls_size(url)
23 | 
24 |     type = "video/mp4"
25 |     ext = "mp4"
26 | 
27 |     print_info(site_info, title, type, size)
28 |     if not info_only:
29 |         download_urls(url, title, ext, size, output_dir, merge=False)
30 | 
31 | site_info = "Giphy.com"
32 | download = giphy_download
33 | download_playlist = playlist_not_supported('giphy')
34 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/metacafe.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['metacafe_download']
 4 | 
 5 | from ..common import *
 6 | import urllib.error
 7 | from urllib.parse import unquote
 8 | 
 9 | def metacafe_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
10 |     if re.match(r'http://www.metacafe.com/watch/\w+', url):
11 |         html =get_content(url)
12 |         title = r1(r'<meta property="og:title" content="([^"]*)"', html)
13 |         
14 |         for i in html.split('&'):  #wont bother to use re
15 |             if 'videoURL' in i:
16 |                 url_raw = i[9:]
17 |         
18 |         url = unquote(url_raw)
19 |         
20 |         type, ext, size = url_info(url)
21 |         print_info(site_info, title, type, size)
22 |         if not info_only:
23 |             download_urls([url], title, ext, size, output_dir, merge=merge)
24 | 
25 | site_info = "metacafe"
26 | download = metacafe_download
27 | download_playlist = playlist_not_supported('metacafe')
28 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SETUP = python3 setup.py
 2 | 
 3 | .PHONY: default i test clean all html rst build sdist bdist bdist_egg bdist_wheel install release
 4 | 
 5 | default: i
 6 | 
 7 | i:
 8 | 	@(cd src/; python3 -i -c 'import you_get; print("You-Get %s\n>>> import you_get" % you_get.version.__version__)')
 9 | 
10 | test:
11 | 	$(SETUP) test
12 | 
13 | clean:
14 | 	zenity --question
15 | 	rm -fr build/ dist/ src/*.egg-info/
16 | 	find . | grep __pycache__ | xargs rm -fr
17 | 	find . | grep .pyc | xargs rm -f
18 | 
19 | all: build sdist bdist bdist_egg bdist_wheel
20 | 
21 | html:
22 | 	pandoc README.md > README.html
23 | 
24 | rst:
25 | 	pandoc -s -t rst README.md > README.rst
26 | 
27 | build:
28 | 	$(SETUP) build
29 | 
30 | sdist:
31 | 	$(SETUP) sdist
32 | 
33 | bdist:
34 | 	$(SETUP) bdist
35 | 
36 | bdist_egg:
37 | 	$(SETUP) bdist_egg
38 | 
39 | bdist_wheel:
40 | 	$(SETUP) bdist_wheel
41 | 
42 | install:
43 | 	$(SETUP) install --user --prefix=
44 | 
45 | release:
46 | 	zenity --question
47 | 	$(SETUP) sdist bdist_wheel upload --sign
48 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/mixcloud.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['mixcloud_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def mixcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     html = get_html(url, faker=True)
 9 |     title = r1(r'<meta property="og:title" content="([^"]*)"', html)
10 |     preview_url = r1(r'm-preview=\"([^\"]+)\"', html)
11 |     preview = r1(r'previews(.*)\.mp3$', preview_url)
12 | 
13 |     for i in range(10, 30):
14 |         url = 'https://stream{i}.mixcloud.com/c/m4a/64{p}.m4a'.format(
15 |             i = i,
16 |             p = preview
17 |         )
18 |         try:
19 |             mime, ext, size = url_info(url)
20 |             break
21 |         except: continue
22 | 
23 |     print_info(site_info, title, ext, size)
24 |     if not info_only:
25 |         download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
26 | 
27 | site_info = "Mixcloud.com"
28 | download = mixcloud_download
29 | download_playlist = playlist_not_supported('mixcloud')
30 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/iqilu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['iqilu_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | def iqilu_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
 9 |     ''''''
10 |     if re.match(r'http://v.iqilu.com/\w+', url):
11 |         patt = r'url\s*:\s*\[([^\]]+)\]'
12 |         
13 |         #URL in webpage
14 |         html = get_content(url)
15 |         player_data = '[' + match1(html, patt) + ']'
16 |         urls = json.loads(player_data)
17 |         url = urls[0]['stream_url']
18 |         
19 |         #grab title
20 |         title = match1(html, r'<meta name="description" content="(.*?)\"\W')
21 | 
22 |         type_, ext, size = url_info(url)
23 |         print_info(site_info, title, type_, size)
24 |         if not info_only:
25 |             download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
26 | 
27 | 
28 | site_info = "iQilu"
29 | download = iqilu_download
30 | download_playlist = playlist_not_supported('iqilu')
31 | 


--------------------------------------------------------------------------------
/contrib/completion/you-get-completion.bash:
--------------------------------------------------------------------------------
 1 | # Bash completion definition for you-get.
 2 | 
 3 | _you-get () {
 4 |     COMPREPLY=()
 5 |     local IFS=$' \n'
 6 |     local cur=$2 prev=$3
 7 |     local -a opts_without_arg opts_with_arg
 8 |     opts_without_arg=(
 9 |         -V --version -h --help -i --info -u --url --json -n --no-merge
10 |         --no-caption -f --force --no-proxy -d --debug
11 |     )
12 |     opts_with_arg=(
13 |         -F --format -O --output-filename -o --output-dir -p --player
14 |         -c --cookies -x --http-proxy -y --extractor-proxy -t --timeout
15 |     )
16 | 
17 |     # Do not complete non option names
18 |     [[ $cur == -* ]] || return 1
19 | 
20 |     # Do not complete when the previous arg is an option expecting an argument
21 |     for opt in "${opts_with_arg[@]}"; do
22 |         [[ $opt == $prev ]] && return 1
23 |     done
24 | 
25 |     # Complete option names
26 |     COMPREPLY=( $(compgen -W "${opts_without_arg[*]} ${opts_with_arg[*]}" \
27 |                           -- "$cur") )
28 |     return 0
29 | }
30 | 
31 | complete -F _you-get you-get
32 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/douyin.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | import re
 4 | import json
 5 | 
 6 | from ..common import (
 7 |     url_size,
 8 |     print_info,
 9 |     get_content,
10 |     download_urls,
11 |     playlist_not_supported,
12 | )
13 | 
14 | 
15 | __all__ = ['douyin_download_by_url']
16 | 
17 | 
18 | def douyin_download_by_url(url, **kwargs):
19 |     page_content = get_content(url)
20 |     match_rule = re.compile(r'var data = \[(.*?)\];')
21 |     video_info = json.loads(match_rule.findall(page_content)[0])
22 |     video_url = video_info['video']['play_addr']['url_list'][0]
23 |     title = video_info['cha_list'][0]['cha_name']
24 |     video_format = 'mp4'
25 |     size = url_size(video_url)
26 |     print_info(
27 |         site_info='douyin.com', title=title,
28 |         type=video_format, size=size
29 |     )
30 |     if not kwargs['info_only']:
31 |         download_urls(
32 |             urls=[video_url], title=title, ext=video_format, total_size=size,
33 |             **kwargs
34 |         )
35 | 
36 | 
37 | download = douyin_download_by_url
38 | download_playlist = playlist_not_supported('douyin')
39 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/theplatform.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | 
 5 | def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_only=False, **kwargs):
 6 |     smil_url = "http://link.theplatform.com/s/dJ5BDC/%s/meta.smil?format=smil&mbr=true" % pid
 7 |     smil = get_content(smil_url)
 8 |     smil_base = unescape_html(match1(smil, r'<meta base="([^"]+)"'))
 9 |     smil_videos = {y:x for x,y in dict(re.findall(r'<video src="([^"]+)".+height="([^"]+)"', smil)).items()}
10 |     for height in ['1080', '720', '480', '360', '240', '216']:
11 |         if height in smil_videos:
12 |             smil_video = smil_videos[height]
13 |             break
14 |     assert smil_video
15 | 
16 |     type, ext, size = 'mp4', 'mp4', 0
17 | 
18 |     print_info(site_info, title, type, size)
19 |     if not info_only:
20 |         download_rtmp_url(url=smil_base, title=title, ext=ext,params={"-y":ext+':'+smil_video}, output_dir=output_dir)
21 | 
22 | site_info = "thePlatform.com"
23 | download = theplatform_download_by_pid
24 | download_playlist = playlist_not_supported('theplatform')
25 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/facebook.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['facebook_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 9 |     html = get_html(url)
10 | 
11 |     title = r1(r'<title id="pageTitle">(.+)</title>', html)
12 | 
13 |     if title is None:
14 |       title = url
15 | 
16 |     sd_urls = list(set([
17 |         unicodize(str.replace(i, '\\/', '/'))
18 |         for i in re.findall(r'sd_src_no_ratelimit:"([^"]*)"', html)
19 |     ]))
20 |     hd_urls = list(set([
21 |         unicodize(str.replace(i, '\\/', '/'))
22 |         for i in re.findall(r'hd_src_no_ratelimit:"([^"]*)"', html)
23 |     ]))
24 |     urls = hd_urls if hd_urls else sd_urls
25 | 
26 |     type, ext, size = url_info(urls[0], True)
27 |     size = urls_size(urls)
28 | 
29 |     print_info(site_info, title, type, size)
30 |     if not info_only:
31 |         download_urls(urls, title, ext, size, output_dir, merge=False)
32 | 
33 | site_info = "Facebook.com"
34 | download = facebook_download
35 | download_playlist = playlist_not_supported('facebook')
36 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/huomaotv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['huomaotv_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | 
 8 | def get_mobile_room_url(room_id):
 9 |     return 'http://www.huomao.com/mobile/mob_live/%s' % room_id
10 | 
11 | 
12 | def get_m3u8_url(stream_id):
13 |     return 'http://live-ws.huomaotv.cn/live/%s/playlist.m3u8' % stream_id
14 | 
15 | 
16 | def huomaotv_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
17 |     room_id_pattern = r'huomao.com/(\d+)'
18 |     room_id = match1(url, room_id_pattern)
19 |     html = get_content(get_mobile_room_url(room_id))
20 | 
21 |     stream_id_pattern = r'id="html_stream" value="(\w+)"'
22 |     stream_id = match1(html, stream_id_pattern)
23 | 
24 |     m3u8_url = get_m3u8_url(stream_id)
25 | 
26 |     title = match1(html, r'<title>([^<]{1,9999})</title>')
27 | 
28 |     print_info(site_info, title, 'm3u8', float('inf'))
29 | 
30 |     if not info_only:
31 |         download_url_ffmpeg(m3u8_url, title, 'm3u8', None, output_dir=output_dir, merge=merge)
32 | 
33 | 
34 | site_info = 'huomao.com'
35 | download = huomaotv_download
36 | download_playlist = playlist_not_supported('huomao')
37 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/musicplayon.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from ..extractor import VideoExtractor
 5 | 
 6 | import json
 7 | 
 8 | class MusicPlayOn(VideoExtractor):
 9 |     name = "MusicPlayOn"
10 | 
11 |     stream_types = [
12 |         {'id': '720p HD'},
13 |         {'id': '360p SD'},
14 |     ]
15 | 
16 |     def prepare(self, **kwargs):
17 |         content = get_content(self.url)
18 | 
19 |         self.title = match1(content,
20 |                             r'setup\[\'title\'\] = "([^"]+)";')
21 | 
22 |         for s in self.stream_types:
23 |             quality = s['id']
24 |             src = match1(content,
25 |                          r'src: "([^"]+)", "data-res": "%s"' % quality)
26 |             if src is not None:
27 |                 url = 'http://en.musicplayon.com%s' % src
28 |                 self.streams[quality] = {'url': url}
29 | 
30 |     def extract(self, **kwargs):
31 |         for i in self.streams:
32 |             s = self.streams[i]
33 |             _, s['container'], s['size'] = url_info(s['url'])
34 |             s['src'] = [s['url']]
35 | 
36 | site = MusicPlayOn()
37 | download = site.download_by_url
38 | # TBD: implement download_playlist
39 | 


--------------------------------------------------------------------------------
/src/you_get/util/fs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import platform
 4 | 
 5 | def legitimize(text, os=platform.system()):
 6 |     """Converts a string to a valid filename.
 7 |     """
 8 | 
 9 |     # POSIX systems
10 |     text = text.translate({
11 |         0: None,
12 |         ord('/'): '-',
13 |         ord('|'): '-',
14 |     })
15 | 
16 |     if os == 'Windows':
17 |         # Windows (non-POSIX namespace)
18 |         text = text.translate({
19 |             # Reserved in Windows VFAT and NTFS
20 |             ord(':'): '-',
21 |             ord('*'): '-',
22 |             ord('?'): '-',
23 |             ord('\\'): '-',
24 |             ord('\"'): '\'',
25 |             # Reserved in Windows VFAT
26 |             ord('+'): '-',
27 |             ord('<'): '-',
28 |             ord('>'): '-',
29 |             ord('['): '(',
30 |             ord(']'): ')',
31 |         })
32 |     else:
33 |         # *nix
34 |         if os == 'Darwin':
35 |             # Mac OS HFS+
36 |             text = text.translate({
37 |                 ord(':'): '-',
38 |             })
39 | 
40 |         # Remove leading .
41 |         if text.startswith("."):
42 |             text = text[1:]
43 | 
44 |     text = text[:80] # Trim to 82 Unicode characters long
45 |     return text
46 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/interest.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from json import loads
 5 | 
 6 | def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 7 |     #http://ch.interest.me/zhtv/VOD/View/114789
 8 |     #http://program.interest.me/zhtv/sonja/8/Vod/View/15794
 9 |     html = get_content(url)
10 |     #get title
11 |     title = match1(html, r'<meta property="og:title" content="([^"]*)"')
12 |     title = title.split('&')[0].strip()
13 |     info_url = match1(html, r'data: "(.+)",')
14 |     play_info = loads(get_content(info_url))
15 |     try:
16 |         serverurl = play_info['data']['cdn']['serverurl']
17 |     except KeyError:
18 |         raise ValueError('Cannot_Get_Play_URL')
19 |     except:
20 |         raise ValueError('Cannot_Get_Play_URL')
21 |     # I cannot find any example of "fileurl", so i just put it like this for now
22 |     assert serverurl
23 | 
24 |     type, ext, size = 'mp4', 'mp4', 0
25 | 
26 |     print_info(site_info, title, type, size)
27 |     if not info_only:
28 |         download_rtmp_url(url=serverurl, title=title, ext=ext, output_dir=output_dir)
29 | 
30 | site_info = "interest.me"
31 | download = interest_download
32 | download_playlist = playlist_not_supported('interest')
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | # Misc
62 | _*
63 | *_
64 | *.3gp
65 | *.asf
66 | *.download
67 | *.f4v
68 | *.flv
69 | *.gif
70 | *.html
71 | *.jpg
72 | *.lrc
73 | *.mkv
74 | *.mp3
75 | *.mp4
76 | *.mpg
77 | *.png
78 | *.srt
79 | *.ts
80 | *.webm
81 | *.xml
82 | /.env
83 | /.idea
84 | *.m4a
85 | *.DS_Store
86 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ehow.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['ehow_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def ehow_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 8 | 	
 9 | 	assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported"
10 | 
11 | 	html = get_html(url)
12 | 	contentid = r1(r'<meta name="contentid" scheme="DMINSTR2" content="([^"]+)" />', html)
13 | 	vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html)
14 | 	assert vid
15 | 
16 | 	xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid)
17 |     
18 | 	from xml.dom.minidom import parseString
19 | 	doc = parseString(xml)
20 | 	tab = doc.getElementsByTagName('related')[0].firstChild
21 | 
22 | 	for video in tab.childNodes:
23 | 		if re.search(contentid, video.attributes['link'].value):
24 | 			url = video.attributes['flv'].value
25 | 			break
26 | 
27 | 	title = video.attributes['title'].value
28 | 	assert title 
29 | 
30 | 	type, ext, size = url_info(url)
31 | 	print_info(site_info, title, type, size)
32 | 	
33 | 	if not info_only:
34 | 		download_urls([url], title, ext, size, output_dir, merge = merge)
35 | 
36 | site_info = "ehow.com"
37 | download = ehow_download
38 | download_playlist = playlist_not_supported('ehow')
39 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ==============================================
 2 | This is a copy of the MIT license.
 3 | ==============================================
 4 | Copyright (C) 2012-2017 Mort Yao <mort.yao@gmail.com>
 5 | Copyright (C) 2012 Boyu Guo <iambus@gmail.com>
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 8 | this software and associated documentation files (the "Software"), to deal in
 9 | the Software without restriction, including without limitation the rights to
10 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | of the Software, and to permit persons to whom the Software is furnished to do
12 | so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/vine.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['vine_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | 
 9 | def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
10 |     html = get_content(url)
11 | 
12 |     video_id = r1(r'vine.co/v/([^/]+)', url)
13 |     title = r1(r'<title>([^<]*)</title>', html)
14 |     stream = r1(r'<meta property="twitter:player:stream" content="([^"]*)">', html)
15 |     if not stream:  # https://vine.co/v/.../card
16 |         stream = r1(r'"videoUrl":"([^"]+)"', html)
17 |         if stream:
18 |             stream = stream.replace('\\/', '/')
19 |         else:
20 |             posts_url = 'https://archive.vine.co/posts/' + video_id + '.json'
21 |             json_data = json.loads(get_content(posts_url))
22 |             stream = json_data['videoDashUrl']
23 |             title = json_data['description']
24 |             if title == "":
25 |                 title = json_data['username'].replace(" ", "_") + "_" + video_id
26 | 
27 |     mime, ext, size = url_info(stream)
28 | 
29 |     print_info(site_info, title, mime, size)
30 |     if not info_only:
31 |         download_urls([stream], title, ext, size, output_dir, merge=merge)
32 | 
33 | 
34 | site_info = "Vine.co"
35 | download = vine_download
36 | download_playlist = playlist_not_supported('vine')
37 | 


--------------------------------------------------------------------------------
/tests/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import unittest
 4 | 
 5 | from you_get.extractors import (
 6 |     imgur,
 7 |     magisto,
 8 |     youtube,
 9 |     bilibili,
10 | )
11 | 
12 | 
13 | class YouGetTests(unittest.TestCase):
14 |     def test_imgur(self):
15 |         imgur.download('http://imgur.com/WVLk5nD', info_only=True)
16 |         imgur.download('http://imgur.com/gallery/WVLk5nD', info_only=True)
17 | 
18 |     def test_magisto(self):
19 |         magisto.download(
20 |             'http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA',
21 |             info_only=True
22 |         )
23 | 
24 |     def test_youtube(self):
25 |         youtube.download(
26 |             'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True
27 |         )
28 |         youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True)
29 |         youtube.download(
30 |             'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare',  # noqa
31 |             info_only=True
32 |         )
33 | 
34 |     def test_bilibili(self):
35 |         bilibili.download(
36 |             'https://www.bilibili.com/video/av16907446/', info_only=True
37 |         )
38 |         bilibili.download(
39 |             'https://www.bilibili.com/video/av13228063/', info_only=True
40 |         )
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     unittest.main()
45 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/vidto.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['vidto_download']
 4 | 
 5 | from ..common import *
 6 | import pdb
 7 | import time
 8 | 
 9 | 
10 | def vidto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
11 |     html = get_content(url)
12 |     params = {}
13 |     r = re.findall(
14 |         r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html)
15 |     for name, value in r:
16 |         params[name] = value
17 |     data = parse.urlencode(params).encode('utf-8')
18 |     req = request.Request(url)
19 |     print("Please wait for 6 seconds...")
20 |     time.sleep(6)
21 |     print("Starting")
22 |     new_html = request.urlopen(req, data).read().decode('utf-8', 'replace')
23 |     new_stff = re.search('lnk_download" href="(.*?)">', new_html)
24 |     if(new_stff):
25 |         url = new_stff.group(1)
26 |         title = params['fname']
27 |         type = ""
28 |         ext = ""
29 |         a, b, size = url_info(url)
30 |         print_info(site_info, title, type, size)
31 |         if not info_only:
32 |             download_urls([url], title, ext, size, output_dir, merge=merge)
33 |     else:
34 |         print("cannot find link, please review")
35 |         pdb.set_trace()
36 | 
37 | 
38 | site_info = "vidto.me"
39 | download = vidto_download
40 | download_playlist = playlist_not_supported('vidto')
41 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/baomihua.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['baomihua_download', 'baomihua_download_by_id']
 4 | 
 5 | from ..common import *
 6 | 
 7 | import urllib
 8 | 
 9 | def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
10 |     html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id)
11 |     host = r1(r'host=([^&]*)', html)
12 |     assert host
13 |     type = r1(r'videofiletype=([^&]*)', html)
14 |     assert type
15 |     vid = r1(r'&stream_name=([^&]*)', html)
16 |     assert vid
17 |     dir_str = r1(r'&dir=([^&]*)', html).strip()
18 |     url = "http://%s/%s/%s.%s" % (host, dir_str, vid, type)
19 |     _, ext, size = url_info(url)
20 |     print_info(site_info, title, type, size)
21 |     if not info_only:
22 |         download_urls([url], title, ext, size, output_dir, merge = merge)
23 | 
24 | def baomihua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
25 |     html = get_html(url)
26 |     title = r1(r'<title>(.*)</title>', html)
27 |     assert title
28 |     id = r1(r'flvid\s*=\s*(\d+)', html)
29 |     assert id
30 |     baomihua_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
31 | 
32 | site_info = "baomihua.com"
33 | download = baomihua_download
34 | download_playlist = playlist_not_supported('baomihua')
35 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/dailymotion.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['dailymotion_download']
 4 | 
 5 | from ..common import *
 6 | import urllib.parse
 7 | 
 8 | def rebuilt_url(url):
 9 |     path = urllib.parse.urlparse(url).path
10 |     aid = path.split('/')[-1].split('_')[0]
11 |     return 'http://www.dailymotion.com/embed/video/{}?autoplay=1'.format(aid)
12 | 
13 | def dailymotion_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
14 |     """Downloads Dailymotion videos by URL.
15 |     """
16 | 
17 |     html = get_content(rebuilt_url(url))
18 |     info = json.loads(match1(html, r'qualities":({.+?}),"'))
19 |     title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \
20 |             match1(html, r'"title"\s*:\s*"([^"]+)"')
21 |     title = unicodize(title)
22 | 
23 |     for quality in ['1080','720','480','380','240','144','auto']:
24 |         try:
25 |             real_url = info[quality][1]["url"]
26 |             if real_url:
27 |                 break
28 |         except KeyError:
29 |             pass
30 | 
31 |     mime, ext, size = url_info(real_url)
32 | 
33 |     print_info(site_info, title, mime, size)
34 |     if not info_only:
35 |         download_urls([real_url], title, ext, size, output_dir=output_dir, merge=merge)
36 | 
37 | site_info = "Dailymotion.com"
38 | download = dailymotion_download
39 | download_playlist = playlist_not_supported('dailymotion')
40 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | PROJ_NAME = 'you-get'
 4 | PACKAGE_NAME = 'you_get'
 5 | 
 6 | PROJ_METADATA = '%s.json' % PROJ_NAME
 7 | 
 8 | import os, json, imp
 9 | here = os.path.abspath(os.path.dirname(__file__))
10 | proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read())
11 | try:
12 |     README = open(os.path.join(here, 'README.rst'), encoding='utf-8').read()
13 | except:
14 |     README = ""
15 | CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst'), encoding='utf-8').read()
16 | VERSION = imp.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__
17 | 
18 | from setuptools import setup, find_packages
19 | setup(
20 |     name = proj_info['name'],
21 |     version = VERSION,
22 | 
23 |     author = proj_info['author'],
24 |     author_email = proj_info['author_email'],
25 |     url = proj_info['url'],
26 |     license = proj_info['license'],
27 | 
28 |     description = proj_info['description'],
29 |     keywords = proj_info['keywords'],
30 | 
31 |     long_description = README,
32 | 
33 |     packages = find_packages('src'),
34 |     package_dir = {'' : 'src'},
35 | 
36 |     test_suite = 'tests',
37 | 
38 |     platforms = 'any',
39 |     zip_safe = True,
40 |     include_package_data = True,
41 | 
42 |     classifiers = proj_info['classifiers'],
43 | 
44 |     entry_points = {'console_scripts': proj_info['console_scripts']}
45 | )
46 | 


--------------------------------------------------------------------------------
/you-get.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "you-get",
 3 |   "author": "Mort Yao",
 4 |   "author_email": "mort.yao@gmail.com",
 5 |   "url": "https://you-get.org/",
 6 |   "license": "MIT",
 7 | 
 8 |   "description": "Dumb downloader that scrapes the web",
 9 |   "keywords": "video download youtube youku niconico",
10 | 
11 |   "classifiers": [
12 |     "Development Status :: 4 - Beta",
13 |     "Environment :: Console",
14 |     "Intended Audience :: Developers",
15 |     "Intended Audience :: End Users/Desktop",
16 |     "License :: OSI Approved :: MIT License",
17 |     "Operating System :: OS Independent",
18 |     "Programming Language :: Python",
19 |     "Programming Language :: Python :: 3",
20 |     "Programming Language :: Python :: 3 :: Only",
21 |     "Programming Language :: Python :: 3.0",
22 |     "Programming Language :: Python :: 3.1",
23 |     "Programming Language :: Python :: 3.2",
24 |     "Programming Language :: Python :: 3.3",
25 |     "Programming Language :: Python :: 3.4",
26 |     "Programming Language :: Python :: 3.5",
27 |     "Programming Language :: Python :: 3.6",
28 |     "Topic :: Internet",
29 |     "Topic :: Internet :: WWW/HTTP",
30 |     "Topic :: Multimedia",
31 |     "Topic :: Multimedia :: Graphics",
32 |     "Topic :: Multimedia :: Sound/Audio",
33 |     "Topic :: Multimedia :: Video",
34 |     "Topic :: Utilities"
35 |   ],
36 | 
37 |   "console_scripts": [
38 |     "you-get = you_get.__main__:main"
39 |   ]
40 | }
41 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/yizhibo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['yizhibo_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | import time
 8 | 
 9 | def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
10 |     video_id = url[url.rfind('/')+1:].split(".")[0]
11 |     json_request_url = 'http://www.yizhibo.com/live/h5api/get_basic_live_info?scid={}'.format(video_id)
12 |     content = get_content(json_request_url)
13 |     error = json.loads(content)['result']
14 |     if (error != 1):
15 |         raise ValueError("Error : {}".format(error))
16 | 
17 |     data = json.loads(content)
18 |     title = data.get('data')['live_title']
19 |     if (title == ''):
20 |         title = data.get('data')['nickname']
21 |     m3u8_url = data.get('data')['play_url']
22 |     m3u8 = get_content(m3u8_url)
23 |     base_url = "/".join(data.get('data')['play_url'].split("/")[:7])+"/"
24 |     part_url = re.findall(r'([0-9]+\.ts)', m3u8)
25 |     real_url = []
26 |     for i in part_url:
27 |         url = base_url + i
28 |         real_url.append(url)
29 |     print_info(site_info, title, 'ts', float('inf'))
30 |     if not info_only:
31 |         if player:
32 |             launch_player(player, [m3u8_url])
33 |         download_urls(real_url, title, 'ts', float('inf'), output_dir, merge = merge)
34 | 
35 | site_info = "yizhibo.com"
36 | download = yizhibo_download
37 | download_playlist = playlist_not_supported('yizhibo')
38 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | `you-get` is currently experimenting with an aggressive approach to handling issues. Namely, a bug report must be addressed with some code via a pull request.
 4 | 
 5 | ## Report a broken extractor
 6 | 
 7 | **How-To:** Please open a new pull request with the following changes:
 8 | 
 9 | * Add a new test case in [tests/test.py](https://github.com/soimort/you-get/blob/develop/tests/test.py), with the failing URL(s).
10 | 
11 | The Travis CI build will (ideally) fail showing a :x:, which means you have successfully reported a broken extractor.
12 | 
13 | Such a valid PR will be either *closed* if it's fixed by another PR, or *merged* if it's fixed by follow-up commits from the reporter himself/herself.
14 | 
15 | ## Report other issues / Suggest a new feature
16 | 
17 | **How-To:** Please open a pull request with the proposed changes directly.
18 | 
19 | A valid PR need not be complete (i.e., can be WIP), but it should contain at least one sensible, nontrivial commit.
20 | 
21 | ## Hints
22 | 
23 | * The [`develop`](https://github.com/soimort/you-get/tree/develop) branch is where your pull request goes.
24 | * Remember to rebase.
25 | * Document your PR clearly, and if applicable, provide some sample links for reviewers to test with.
26 | * Write well-formatted, easy-to-understand commit messages. If you don't know how, look at existing ones.
27 | * We will not ask you to sign a CLA, but you must assure that your code can be legally redistributed (under the terms of the MIT license).
28 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/naver.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import urllib.request
 4 | import urllib.parse
 5 | import json
 6 | import re
 7 | 
 8 | from ..util import log
 9 | from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size
10 | 
11 | __all__ = ['naver_download_by_url']
12 | 
13 | 
14 | def naver_download_by_url(url, info_only=False, **kwargs):
15 |     ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}'
16 |     page = get_content(url)
17 |     og_video_url = re.search(r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page).group(1)
18 |     params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query)
19 |     vid = params_dict['vid'][0]
20 |     key = params_dict['outKey'][0]
21 |     meta_str = get_content(ep.format(vid, key))
22 |     meta_json = json.loads(meta_str)
23 |     if 'errorCode' in meta_json:
24 |         log.wtf(meta_json['errorCode'])
25 |     title = meta_json['meta']['subject']
26 |     videos = meta_json['videos']['list']
27 |     video_list = sorted(videos, key=lambda video: video['encodingOption']['width'])
28 |     video_url = video_list[-1]['source']
29 |     # size = video_list[-1]['size']
30 |     # result wrong size
31 |     size = url_size(video_url)
32 |     print_info(site_info, title, 'mp4', size)
33 |     if not info_only:
34 |         download_urls([video_url], title, 'mp4', size, **kwargs)
35 | 
36 | site_info = "naver.com"
37 | download = naver_download_by_url
38 | download_playlist = playlist_not_supported('naver')
39 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/kuaishou.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import urllib.request
 4 | import urllib.parse
 5 | import json
 6 | import re
 7 | 
 8 | from ..util import log
 9 | from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size
10 | 
11 | __all__ = ['kuaishou_download_by_url']
12 | 
13 | 
14 | def kuaishou_download_by_url(url, info_only=False, **kwargs):
15 |     page = get_content(url)
16 |     # size = video_list[-1]['size']
17 |     # result wrong size
18 |     try:
19 |         og_video_url = re.search(r"<meta\s+property=\"og:video:url\"\s+content=\"(.+?)\"/>", page).group(1)
20 |         video_url = og_video_url
21 |         title = url.split('/')[-1]
22 |         size = url_size(video_url)
23 |         video_format = video_url.split('.')[-1]
24 |         print_info(site_info, title, video_format, size)
25 |         if not info_only:
26 |             download_urls([video_url], title, video_format, size, **kwargs)
27 |     except:# extract image
28 |         og_image_url = re.search(r"<meta\s+property=\"og:image\"\s+content=\"(.+?)\"/>", page).group(1)
29 |         image_url = og_image_url
30 |         title = url.split('/')[-1]
31 |         size = url_size(image_url)
32 |         image_format = image_url.split('.')[-1]
33 |         print_info(site_info, title, image_format, size)
34 |         if not info_only:
35 |             download_urls([image_url], title, image_format, size, **kwargs)
36 | 
37 | site_info = "kuaishou.com"
38 | download = kuaishou_download_by_url
39 | download_playlist = playlist_not_supported('kuaishou')
40 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/suntv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['suntv_download']
 4 | 
 5 | from ..common import *
 6 | import urllib
 7 | import re
 8 | 
 9 | def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
10 |     if re.match(r'http://www.isuntv.com/\w+', url):
11 |         API_URL = "http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,App_Web_playcatemp4.ascx.9f08f04f.ashx"
12 |         
13 |         itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html')
14 |         values = {"itemid" : itemid, "vodid": ""}
15 |         
16 |         data = str(values).replace("'", '"')
17 |         data = data.encode('utf-8')
18 |         req = urllib.request.Request(API_URL, data)
19 |         req.add_header('AjaxPro-Method', 'ToPlay')  #important!
20 |         resp = urllib.request.urlopen(req)
21 |         respData = resp.read()
22 |         respData = respData.decode('ascii').strip('"')  #Ahhhhhhh!
23 |     
24 |         video_url = 'http://www.isuntv.com' + str(respData)
25 |         
26 |         html = get_content(url, decoded=False)
27 |         html = html.decode('gbk')
28 |         title = match1(html, '<title>([^<]+)').strip()  #get rid of \r\n s
29 |         
30 |         type_ = ''
31 |         size = 0
32 |         type, ext, size = url_info(video_url)
33 |         
34 |         print_info(site_info, title, type, size)
35 |         if not info_only:
36 |             download_urls([url], title, 'mp4', size, output_dir, merge=merge)
37 | 
38 | site_info = "SunTV"
39 | download = suntv_download
40 | download_playlist = playlist_not_supported('suntv')
41 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/iwara.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | __all__ = ['iwara_download']
 3 | from ..common import *
 4 | headers = {
 5 |     'DNT': '1',
 6 |     'Accept-Encoding': 'gzip, deflate, sdch, br',
 7 |     'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2',
 8 |     'Upgrade-Insecure-Requests': '1',
 9 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36',
10 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
11 |     'Cache-Control': 'max-age=0',
12 | 
13 |     'Connection': 'keep-alive',
14 |     'Save-Data': 'on',
15 |     'Cookie':'has_js=1;show_adult=1',
16 | }
17 | 
18 | def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
19 |     global headers
20 |     video_hash=match1(url, r'http://\w+.iwara.tv/videos/(\w+)')
21 |     video_url=match1(url, r'(http://\w+.iwara.tv)/videos/\w+')
22 |     html = get_content(url,headers=headers)
23 |     title = r1(r'<title>(.*)</title>', html)
24 |     api_url=video_url+'/api/video/'+video_hash
25 |     content=get_content(api_url,headers=headers)
26 |     data=json.loads(content)
27 |     type,ext,size=url_info(data[0]['uri'], headers=headers)
28 |     down_urls=data[0]['uri']
29 |     print_info(down_urls,title+data[0]['resolution'],type,size)
30 | 
31 |     if not info_only:
32 |         download_urls([down_urls], title, ext, size, output_dir, merge = merge,headers=headers)
33 | 
34 | site_info = "iwara"
35 | download = iwara_download
36 | download_playlist = playlist_not_supported('iwara')
37 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/kuwo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['kuwo_download']
 4 | 
 5 | from ..common import *
 6 | import re
 7 | 
 8 | def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False):
 9 |     html=get_content("http://player.kuwo.cn/webmusic/st/getNewMuiseByRid?rid=MUSIC_%s"%rid)
10 |     title=match1(html,r"<name>(.*)</name>")
11 |     #to get title
12 |     #format =aac|mp3 ->to get aac format=mp3 ->to get mp3
13 |     url=get_content("http://antiserver.kuwo.cn/anti.s?format=mp3&rid=MUSIC_%s&type=convert_url&response=url"%rid)
14 |     songtype, ext, size = url_info(url)
15 |     print_info(site_info, title, songtype, size)
16 |     if not info_only:
17 |         download_urls([url], title, ext, size, output_dir)
18 | 
19 | def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
20 |     html=get_content(url)
21 |     matched=set(re.compile("yinyue/(\d+)").findall(html))#reduce duplicated
22 |     for rid in matched:
23 |         kuwo_download_by_rid(rid,output_dir,merge,info_only)
24 | 
25 | 
26 | 
27 | def kuwo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
28 |     if "www.kuwo.cn/yinyue" in url:
29 |         rid=match1(url,'yinyue/(\d+)')
30 |         kuwo_download_by_rid(rid,output_dir, merge, info_only)
31 |     else:
32 |         kuwo_playlist_download(url,output_dir,merge,info_only)
33 | 
34 | site_info = "kuwo.cn"
35 | download = kuwo_download
36 | # download_playlist = playlist_not_supported("kugou")
37 | # download_playlist=playlist_not_supported("kuwo")
38 | download_playlist=kuwo_playlist_download
39 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/veoh.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['veoh_download']
 4 | 
 5 | from ..common import *
 6 | import urllib.error
 7 | 
 8 | def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
 9 |     '''Get item_id'''
10 |     if re.match(r'http://www.veoh.com/watch/\w+', url):
11 |         item_id = match1(url, r'http://www.veoh.com/watch/(\w+)')
12 |     elif re.match(r'http://www.veoh.com/m/watch.php\?v=\.*', url):
13 |         item_id = match1(url, r'http://www.veoh.com/m/watch.php\?v=(\w+)')
14 |     else:
15 |         raise NotImplementedError('Cannot find item ID')
16 |     veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = info_only, **kwargs)
17 | 
18 | #----------------------------------------------------------------------
19 | def veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = False, **kwargs):
20 |     """Source: Android mobile"""
21 |     webpage_url = 'http://www.veoh.com/m/watch.php?v={item_id}&quality=1'.format(item_id = item_id)
22 | 
23 |     #grab download URL
24 |     a = get_content(webpage_url, decoded=True)
25 |     url = match1(a, r'<source src="(.*?)\"\W')
26 | 
27 |     #grab title
28 |     title = match1(a, r'<meta property="og:title" content="([^"]*)"')
29 | 
30 |     type_, ext, size = url_info(url)
31 |     print_info(site_info, title, type_, size)
32 |     if not info_only:
33 |         download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
34 | 
35 | 
36 | site_info = "Veoh"
37 | download = veoh_download
38 | download_playlist = playlist_not_supported('veoh')
39 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/w56.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['w56_download', 'w56_download_by_id']
 4 | 
 5 | from ..common import *
 6 | 
 7 | from .sohu import sohu_download
 8 | 
 9 | import json
10 | 
11 | def w56_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
12 |     content = json.loads(get_html('http://vxml.56.com/json/%s/?src=site' % id))
13 |     info = content['info']
14 |     title = title or info['Subject']
15 |     assert title
16 |     hd = info['hd']
17 |     assert hd in (0, 1, 2)
18 |     hd_types = [['normal', 'qvga'], ['clear', 'vga'], ['super', 'wvga']][hd]
19 |     files = [x for x in info['rfiles'] if x['type'] in hd_types]
20 |     assert len(files) == 1
21 |     size = int(files[0]['filesize'])
22 |     url = files[0]['url'] + '&prod=56'
23 |     ext = 'mp4'
24 | 
25 |     print_info(site_info, title, ext, size)
26 |     if not info_only:
27 |         download_urls([url], title, ext, size, output_dir = output_dir, merge = merge)
28 | 
29 | def w56_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
30 |     content = get_content(url)
31 |     sohu_url = r1(r"url:\s*'([^']*)'", content)
32 |     if sohu_url:
33 |         sohu_download(sohu_url, output_dir, merge=merge, info_only=info_only, **kwargs)
34 |         return
35 | 
36 |     id = r1(r'http://www.56.com/u\d+/v_(\w+).html', url) or \
37 |          r1(r'http://www.56.com/.*vid-(\w+).html', url)
38 |     w56_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
39 | 
40 | site_info = "56.com"
41 | download = w56_download
42 | download_playlist = playlist_not_supported('56')
43 | 


--------------------------------------------------------------------------------
/contrib/completion/_you-get:
--------------------------------------------------------------------------------
 1 | #compdef you-get
 2 | 
 3 | # Zsh completion definition for soimort/you-get.
 4 | 
 5 | setopt localoptions noshwordsplit noksharrays
 6 | local -a args
 7 | 
 8 | args=(
 9 |     '(- : *)'{-V,--version}'[print version and exit]'
10 |     '(- : *)'{-h,--help}'[print help and exit]'
11 |     '(-i --info)'{-i,--info}'[print extracted information]'
12 |     '(-u --url)'{-u,--url}'[print extracted information with URLs]'
13 |     '(--json)--json[print extracted URLs in JSON format]'
14 |     '(-n --no-merge)'{-n,--no-merge}'[do not merge video parts]'
15 |     '(--no-caption)--no-caption[do not download captions]'
16 |     '(-f --force)'{-f,--force}'[force overwrite existing files]'
17 |     '(-F --format)'{-F,--format}'[set video format to the specified stream id]:stream id'
18 |     '(-O --output-filename)'{-O,--output-filename}'[set output filename]:filename:_files'
19 |     '(-o --output-dir)'{-o,--output-dir}'[set output directory]:directory:_files -/'
20 |     '(-p --player)'{-p,--player}'[stream extracted URL to the specified player]:player and options'
21 |     '(-c --cookies)'{-c,--cookies}'[load cookies.txt or cookies.sqlite]:cookies file:_files'
22 |     '(-x --http-proxy)'{-x,--http-proxy}'[use the specified HTTP proxy for downloading]:host\:port:'
23 |     '(-y --extractor-proxy)'{-y,--extractor-proxy}'[use the specified HTTP proxy for extraction only]:host\:port'
24 |     '(--no-proxy)--no-proxy[do not use a proxy]'
25 |     '(-t --timeout)'{-t,--timeout}'[set socket timeout]:seconds'
26 |     '(-d --debug)'{-d,--debug}'[show traceback and other debug info]'
27 |     '*: :_guard "^-*" url'
28 | )
29 | _arguments -S -s $args
30 | 


--------------------------------------------------------------------------------
/src/you_get/util/git.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import subprocess
 5 | from ..version import __version__
 6 | 
 7 | def get_head(repo_path):
 8 |     """Get (branch, commit) from HEAD of a git repo."""
 9 |     try:
10 |         ref = open(os.path.join(repo_path, '.git', 'HEAD'), 'r').read().strip()[5:].split('/')
11 |         branch = ref[-1]
12 |         commit = open(os.path.join(repo_path, '.git', *ref), 'r').read().strip()[:7]
13 |         return branch, commit
14 |     except:
15 |         return None
16 | 
17 | def get_version(repo_path):
18 |     try:
19 |         version = __version__.split('.')
20 |         major, minor, cn = [int(i) for i in version]
21 |         p = subprocess.Popen(['git',
22 |                               '--git-dir', os.path.join(repo_path, '.git'),
23 |                               '--work-tree', repo_path,
24 |                               'rev-list', 'HEAD', '--count'],
25 |                              stdout=subprocess.PIPE, stderr=subprocess.PIPE)
26 |         raw, err = p.communicate()
27 |         c_head = int(raw.decode('ascii'))
28 |         q = subprocess.Popen(['git',
29 |                               '--git-dir', os.path.join(repo_path, '.git'),
30 |                               '--work-tree', repo_path,
31 |                               'rev-list', 'master', '--count'],
32 |                              stdout=subprocess.PIPE, stderr=subprocess.PIPE)
33 |         raw, err = q.communicate()
34 |         c_master = int(raw.decode('ascii'))
35 |         cc = c_head - c_master
36 |         assert cc
37 |         return '%s.%s.%s' % (major, minor, cn + cc)
38 |     except:
39 |         return __version__
40 | 


--------------------------------------------------------------------------------
/contrib/completion/you-get.fish:
--------------------------------------------------------------------------------
 1 | # Fish completion definition for you-get.
 2 | 
 3 | complete -c you-get -s V -l version -d 'print version and exit'
 4 | complete -c you-get -s h -l help -d 'print help and exit'
 5 | complete -c you-get -s i -l info -d 'print extracted information'
 6 | complete -c you-get -s u -l url -d 'print extracted information'
 7 | complete -c you-get -l json -d 'print extracted URLs in JSON format'
 8 | complete -c you-get -s n -l no-merge -d 'do not merge video parts'
 9 | complete -c you-get -l no-caption -d 'do not download captions'
10 | complete -c you-get -s f -l force -d 'force overwrite existing files'
11 | complete -c you-get -s F -l format -x -d 'set video format to the specified stream id'
12 | complete -c you-get -s O -l output-filename -d 'set output filename' \
13 |          -x -a '(__fish_complete_path (commandline -ct) "output filename")'
14 | complete -c you-get -s o -l output-dir  -d 'set output directory' \
15 |          -x -a '(__fish_complete_directories (commandline -ct) "output directory")'
16 | complete -c you-get -s p -l player -x -d 'stream extracted URL to the specified player'
17 | complete -c you-get -s c -l cookies -d 'load cookies.txt or cookies.sqlite' \
18 |          -x -a '(__fish_complete_path (commandline -ct) "cookies.txt or cookies.sqlite")'
19 | complete -c you-get -s x -l http-proxy -x -d 'use the specified HTTP proxy for downloading'
20 | complete -c you-get -s y -l extractor-proxy -x -d 'use the specified HTTP proxy for extraction only'
21 | complete -c you-get -l no-proxy -d 'do not use a proxy'
22 | complete -c you-get -s t -l timeout -x -d 'set socket timeout'
23 | complete -c you-get -s d -l debug -d 'show traceback and other debug info'
24 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/joy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['joy_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def video_info(channel_id, program_id, volumn_id):
 8 |     url = 'http://msx.app.joy.cn/service.php'
 9 |     if program_id:
10 |         url += '?action=vodmsxv6'
11 |         url += '&channelid=%s' % channel_id
12 |         url += '&programid=%s' % program_id
13 |         url += '&volumnid=%s' % volumn_id
14 |     else:
15 |         url += '?action=msxv6'
16 |         url += '&videoid=%s' % volumn_id
17 |     
18 |     xml = get_html(url)
19 |     
20 |     name = r1(r'<Title>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</Title>', xml)
21 |     urls = re.findall(r'<Url[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</Url>', xml)
22 |     hostpath = r1(r'<HostPath[^>]*>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</HostPath>', xml)
23 |     
24 |     return name, urls, hostpath
25 | 
26 | def joy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
27 |     channel_id = r1(r'[^_]channelId\s*:\s*"([^\"]+)"', get_html(url))
28 |     program_id = r1(r'[^_]programId\s*:\s*"([^\"]+)"', get_html(url))
29 |     volumn_id = r1(r'[^_]videoId\s*:\s*"([^\"]+)"', get_html(url))
30 |     
31 |     title, urls, hostpath = video_info(channel_id, program_id, volumn_id)
32 |     urls = [hostpath + url for url in urls]
33 |     
34 |     size = 0
35 |     for url in urls:
36 |         _, ext, temp = url_info(url)
37 |         size += temp
38 |     
39 |     print_info(site_info, title, ext, size)
40 |     if not info_only:
41 |         download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge)
42 | 
43 | site_info = "Joy.cn"
44 | download = joy_download
45 | download_playlist = playlist_not_supported('joy')
46 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Please make sure these boxes are checked before submitting your issue – thank you!
 2 | 
 3 | - [ ] You can actually watch the video in your browser or mobile application, but not download them with `you-get`.
 4 | - [ ] Your `you-get` is up-to-date.
 5 | - [ ] I have read <https://github.com/soimort/you-get/wiki/FAQ> and tried to do so.
 6 | - [ ] The issue is not yet reported on <https://github.com/soimort/you-get/issues> or  <https://github.com/soimort/you-get/wiki/Known-Bugs>. If so, please add your comments under the existing issue.
 7 | - [ ] The issue (or question) is really about `you-get`, not about some other code or project.
 8 | 
 9 | Run the command with the `--debug` option, and paste the full output inside the fences:
10 | 
11 | ```
12 | [PASTE IN ME]
13 | ```
14 | 
15 | If there's anything else you would like to say (e.g. in case your issue is not about downloading a specific video; it might as well be a general discussion or proposal for a new feature), fill in the box below; otherwise, you may want to post an emoji or meme instead:
16 | 
17 | > [WRITE SOMETHING]
18 | > [OR HAVE SOME :icecream:!]
19 | 
20 | 汉语翻译最终日期：2016年02月26日
21 | 
22 | 在提交前，请确保您已经检查了以下内容!
23 | 
24 | - [ ] 你可以在浏览器或移动端中观看视频，但不能使用`you-get`下载.
25 | - [ ] 您的`you-get`为最新版.
26 | - [ ] 我已经阅读并按 <https://github.com/soimort/you-get/wiki/FAQ> 中的指引进行了操作.
27 | - [ ] 您的问题没有在<https://github.com/soimort/you-get/issues> , <https://github.com/soimort/you-get/wiki/FAQ> 或 <https://github.com/soimort/you-get/wiki/Known-Bugs> 报告，否则请在原有issue下报告.
28 | - [ ] 本问题确实关于`you-get`, 而不是其他项目.
29 | 
30 | 请使用`--debug`运行，并将输出粘贴在下面:
31 | 
32 | ```
33 | [在这里粘贴完整日志]
34 | ```
35 | 
36 | 如果您有其他附言，例如问题只在某个视频发生，或者是一般性讨论或者提出新功能，请在下面添加;或者您可以卖个萌:
37 | 
38 | > [您的内容]
39 | > [舔 :icecream:!]
40 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/panda.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['panda_download']
 4 | 
 5 | from ..common import *
 6 | from ..util.log import *
 7 | import json
 8 | import time
 9 | 
10 | def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
11 |     roomid = re.search('/(\d+)', url)
12 |     if roomid is None:
13 |         log.wtf('Cannot found room id for this url')
14 |     roomid = roomid.group(1)
15 |     json_request_url ="http://www.panda.tv/api_room_v2?roomid={}&__plat=pc_web&_={}".format(roomid, int(time.time()))
16 |     content = get_html(json_request_url)
17 |     api_json = json.loads(content)
18 |     
19 |     errno = api_json["errno"]
20 |     errmsg = api_json["errmsg"]
21 |     if errno:
22 |         raise ValueError("Errno : {}, Errmsg : {}".format(errno, errmsg))
23 |     data = api_json["data"]
24 |     title = data["roominfo"]["name"]
25 |     room_key = data["videoinfo"]["room_key"]
26 |     plflag = data["videoinfo"]["plflag"].split("_")
27 |     status = data["videoinfo"]["status"]
28 |     if status is not "2":
29 |         raise ValueError("The live stream is not online! (status:%s)" % status)
30 | 
31 |     data2 = json.loads(data["videoinfo"]["plflag_list"])
32 |     rid = data2["auth"]["rid"]
33 |     sign = data2["auth"]["sign"]
34 |     ts = data2["auth"]["time"]
35 |     real_url = "http://pl{}.live.panda.tv/live_panda/{}.flv?sign={}&ts={}&rid={}".format(plflag[1], room_key, sign, ts, rid)
36 |     
37 |     print_info(site_info, title, 'flv', float('inf'))
38 |     if not info_only:
39 |         download_urls([real_url], title, 'flv', None, output_dir, merge = merge)
40 | 
41 | site_info = "panda.tv"
42 | download = panda_download
43 | download_playlist = playlist_not_supported('panda')
44 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/videomega.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['videomega_download']
 4 | 
 5 | from ..common import *
 6 | import ssl
 7 | 
 8 | def videomega_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 9 |     # Hot-plug cookie handler
10 |     ssl_context = request.HTTPSHandler(
11 |         context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
12 |     cookie_handler = request.HTTPCookieProcessor()
13 |     opener = request.build_opener(ssl_context, cookie_handler)
14 |     opener.addheaders = [('Referer', url),
15 |                          ('Cookie', 'noadvtday=0')]
16 |     request.install_opener(opener)
17 | 
18 |     if re.search(r'view\.php', url):
19 |         php_url = url
20 |     else:
21 |         content = get_content(url)
22 |         m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content)
23 |         ref = m.group(1)
24 |         width, height = m.group(2), m.group(3)
25 |         php_url = 'http://videomega.tv/view.php?ref=%s&width=%s&height=%s' % (ref, width, height)
26 |     content = get_content(php_url)
27 | 
28 |     title = match1(content, r'<title>(.*)</title>')
29 |     js = match1(content, r'(eval.*)')
30 |     t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)')
31 |     t = re.sub(r'(\w)', r'{\1}', t)
32 |     t = t.translate({87 + i: str(i) for i in range(10, 36)})
33 |     s = match1(js, r"'([^']+)'\.split").split('|')
34 |     src = t.format(*s)
35 | 
36 |     type, ext, size = url_info(src, faker=True)
37 | 
38 |     print_info(site_info, title, type, size)
39 |     if not info_only:
40 |         download_urls([src], title, ext, size, output_dir, merge=merge, faker=True)
41 | 
42 | site_info = "Videomega.tv"
43 | download = videomega_download
44 | download_playlist = playlist_not_supported('videomega')
45 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/qingting.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import re
 3 | 
 4 | from ..common import get_content, playlist_not_supported, url_size
 5 | from ..extractors import VideoExtractor
 6 | from ..util import log
 7 | 
 8 | __all__ = ['qingting_download_by_url']
 9 | 
10 | 
11 | class Qingting(VideoExtractor):
12 |     # every resource is described by its channel id and program id
13 |     # so vid is tuple (chaanel_id, program_id)
14 | 
15 |     name = 'Qingting'
16 |     stream_types = [
17 |         {'id': '_default'}
18 |     ]
19 | 
20 |     ep = 'http://i.qingting.fm/wapi/channels/{}/programs/{}'
21 |     file_host = 'http://od.qingting.fm/{}'
22 |     mobile_pt = r'channels\/(\d+)\/programs/(\d+)'
23 | 
24 |     def prepare(self, **kwargs):
25 |         if self.vid is None:
26 |             hit = re.search(self.__class__.mobile_pt, self.url)
27 |             self.vid = (hit.group(1), hit.group(2))
28 | 
29 |         ep_url = self.__class__.ep.format(self.vid[0], self.vid[1])
30 |         meta = json.loads(get_content(ep_url))
31 | 
32 |         if meta['code'] != 0:
33 |             log.wtf(meta['message']['errormsg'])
34 | 
35 |         file_path = self.__class__.file_host.format(meta['data']['file_path'])
36 |         self.title = meta['data']['name']
37 |         duration = str(meta['data']['duration']) + 's'
38 | 
39 |         self.streams['_default'] = {'src': [file_path], 'video_profile': duration, 'container': 'm4a'}
40 | 
41 |     def extract(self, **kwargs):
42 |         self.streams['_default']['size'] = url_size(self.streams['_default']['src'][0])
43 | 
44 | 
45 | def qingting_download_by_url(url, **kwargs):
46 |     Qingting().download_by_url(url, **kwargs)
47 | 
48 | site_info = 'Qingting'
49 | download = qingting_download_by_url
50 | download_playlist = playlist_not_supported('Qingting')
51 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/soundcloud.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['soundcloud_download', 'soundcloud_download_by_id']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | import urllib.error
 8 | 
 9 | client_id = 'WKcQQdEZw7Oi01KqtHWxeVSxNyRzgT8M'
10 | 
11 | def soundcloud_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False):
12 |     assert title
13 |     url = 'https://api.soundcloud.com/tracks/{}/{}?client_id={}'.format(id, 'stream', client_id)
14 |     
15 |     type, ext, size = url_info(url)
16 |     
17 |     print_info(site_info, title, type, size)
18 | 
19 |     if not info_only:
20 |         download_urls([url], title, ext, size, output_dir, merge = merge)
21 | 
22 | def soundcloud_i1_api(track_id):
23 |     url = 'https://api.soundcloud.com/i1/tracks/{}/streams?client_id={}'.format(track_id, client_id)
24 |     return json.loads(get_content(url))['http_mp3_128_url']
25 | 
26 | def soundcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
27 |     url = 'https://api.soundcloud.com/resolve.json?url={}&client_id={}'.format(url, client_id)
28 |     metadata = get_content(url)
29 |     info = json.loads(metadata)
30 |     title = info["title"]
31 |     real_url = info.get('download_url')
32 |     if real_url is None:
33 |         real_url = info.get('steram_url')
34 |     if real_url is None:
35 |         raise Exception('Cannot get media URI for {}'.format(url))
36 |     real_url = soundcloud_i1_api(info['id'])
37 |     mime, ext, size = url_info(real_url)
38 |     print_info(site_info, title, mime, size)
39 |     if not info_only:
40 |         download_urls([real_url], title, ext, size, output_dir, merge=merge)
41 | 
42 | site_info = "SoundCloud.com"
43 | download = soundcloud_download
44 | download_playlist = playlist_not_supported('soundcloud')
45 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/mtv81.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['mtv81_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | from xml.dom.minidom import parseString
 8 | 
 9 | from html.parser import HTMLParser
10 | 
11 | 
12 | def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
13 |     html = get_content(url)
14 |     title = HTMLParser().unescape(
15 |         "|".join(match1(html, r"<title>(.*?)</title>").split("|")[:-2]))
16 | 
17 |     # mgid%3Auma%3Avideo%3Amtv81.com%3A897974
18 |     vid = match1(html, r'getTheVideo\("(.*?)"')
19 |     xml = parseString(
20 |         get_content("http://intl.esperanto.mtvi.com/www/xml/media/mediaGen.jhtml?uri={}&flashPlayer=LNX%2013,0,0,206&geo=CN&sid=123456".format(vid)))
21 | 
22 |     url = sorted(
23 |         map(lambda x: x.firstChild.nodeValue, xml.getElementsByTagName("src")),
24 |         key=lambda x: int(match1(x, r'_(\d+?)_')))[-1]
25 | 
26 |     mediatype, ext, size = 'mp4', 'mp4', 0
27 |     print_info(site_info, title, mediatype, size)
28 |     #
29 |     # rtmpdump  -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf
30 |     #
31 |     # because rtmpdump is unstable,may try serveral times
32 |     #
33 |     if not info_only:
34 |         # import pdb
35 |         # pdb.set_trace()
36 |         download_rtmp_url(url=url, title=title, ext=ext, params={
37 |                           "--swfVfy": "http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf"}, output_dir=output_dir)
38 | 
39 | 
40 | site_info = "mtv81.com"
41 | download = mtv81_download
42 | download_playlist = playlist_not_supported('mtv81')
43 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/qq_egame.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import json
 3 | 
 4 | from ..common import get_content
 5 | from ..extractors import VideoExtractor
 6 | from ..util import log
 7 | from ..util.strings import unescape_html
 8 | 
 9 | __all__ = ['qq_egame_download']
10 | 
11 | 
12 | class QQEgame(VideoExtractor):
13 |     stream_types = [
14 |         {'id': 'original', 'video_profile': '0', 'container': 'flv'},
15 |         {'id': '900', 'video_profile': '900kb/s', 'container': 'flv'},
16 |         {'id': '550', 'video_profile': '550kb/s', 'container': 'flv'}
17 |     ]
18 |     name = 'QQEgame'
19 | 
20 |     def prepare(self, **kwargs):
21 |         page = get_content(self.url)
22 |         server_data = re.search(r'serverData\s*=\s*({.+?});', page)
23 |         if server_data is None:
24 |             log.wtf('cannot find server_data')
25 |         json_data = json.loads(server_data.group(1))
26 |         live_info = json_data['liveInfo']['data']
27 |         self.title = '{}_{}'.format(live_info['profileInfo']['nickName'], live_info['videoInfo']['title'])
28 |         for exsited_stream in live_info['videoInfo']['streamInfos']:
29 |             for s in self.__class__.stream_types:
30 |                 if re.search(r'(\d+)', s['video_profile']).group(1) == exsited_stream['bitrate']:
31 |                     current_stream_id = s['id']
32 |                     stream_info = dict(src=[unescape_html(exsited_stream['playUrl'])])
33 |                     stream_info['video_profile'] = exsited_stream['desc']
34 |                     stream_info['container'] = s['container']
35 |                     stream_info['size'] = float('inf')
36 |                     self.streams[current_stream_id] = stream_info
37 | 
38 | 
39 | def qq_egame_download(url, **kwargs):
40 |     QQEgame().download_by_url(url, **kwargs)
41 |     # url dispatching has been done in qq.py
42 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/pinterest.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from ..extractor import VideoExtractor
 5 | 
 6 | class Pinterest(VideoExtractor):
 7 |     # site name
 8 |     name = "Pinterest"
 9 | 
10 |     # ordered list of supported stream types / qualities on this site
11 |     # order: high quality -> low quality
12 |     stream_types = [
13 |         {'id': 'original'}, # contains an 'id' or 'itag' field at minimum
14 |         {'id': 'small'},
15 |     ]
16 | 
17 |     def prepare(self, **kwargs):
18 |         # scrape the html
19 |         content = get_content(self.url)
20 | 
21 |         # extract title
22 |         self.title = match1(content,
23 |                             r'<meta property="og:description" name="og:description" content="([^"]+)"')
24 | 
25 |         # extract raw urls
26 |         orig_img = match1(content,
27 |                          r'<meta itemprop="image" content="([^"]+/originals/[^"]+)"')
28 |         twit_img = match1(content,
29 |                           r'<meta property="twitter:image:src" name="twitter:image:src" content="([^"]+)"')
30 | 
31 |         # construct available streams
32 |         if orig_img: self.streams['original'] = {'url': orig_img}
33 |         if twit_img: self.streams['small'] = {'url': twit_img}
34 | 
35 |     def extract(self, **kwargs):
36 |         for i in self.streams:
37 |             # for each available stream
38 |             s = self.streams[i]
39 |             # fill in 'container' field and 'size' field (optional)
40 |             _, s['container'], s['size'] = url_info(s['url'])
41 |             # 'src' field is a list of processed urls for direct downloading
42 |             # usually derived from 'url'
43 |             s['src'] = [s['url']]
44 | 
45 | site = Pinterest()
46 | download = site.download_by_url
47 | # TBD: implement download_playlist
48 | 


--------------------------------------------------------------------------------
/src/you_get/json_output.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import json
 3 | 
 4 | # save info from common.print_info()
 5 | last_info = None
 6 | 
 7 | def output(video_extractor, pretty_print=True):
 8 |     ve = video_extractor
 9 |     out = {}
10 |     out['url'] = ve.url
11 |     out['title'] = ve.title
12 |     out['site'] = ve.name
13 |     out['streams'] = ve.streams
14 |     try:
15 |         if ve.audiolang:
16 |             out['audiolang'] = ve.audiolang
17 |     except AttributeError:
18 |         pass
19 |     extra = {}
20 |     if getattr(ve, 'referer', None) is not None:
21 |         extra["referer"] = ve.referer
22 |     if getattr(ve, 'ua', None) is not None:
23 |         extra["ua"] = ve.ua
24 |     if extra:
25 |         out["extra"] = extra
26 |     if pretty_print:
27 |         print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False))
28 |     else:
29 |         print(json.dumps(out))
30 | 
31 | # a fake VideoExtractor object to save info
32 | class VideoExtractor(object):
33 |     pass
34 | 
35 | def print_info(site_info=None, title=None, type=None, size=None):
36 |     global last_info
37 |     # create a VideoExtractor and save info for download_urls()
38 |     ve = VideoExtractor()
39 |     last_info = ve
40 |     ve.name = site_info
41 |     ve.title = title
42 |     ve.url = None
43 | 
44 | def download_urls(urls=None, title=None, ext=None, total_size=None, refer=None):
45 |     ve = last_info
46 |     if not ve:
47 |         ve = VideoExtractor()
48 |         ve.name = ''
49 |         ve.url = urls
50 |         ve.title=title
51 |     # save download info in streams
52 |     stream = {}
53 |     stream['container'] = ext
54 |     stream['size'] = total_size
55 |     stream['src'] = urls
56 |     if refer:
57 |         stream['refer'] = refer
58 |     stream['video_profile'] = '__default__'
59 |     ve.streams = {}
60 |     ve.streams['__default__'] = stream
61 |     output(ve)
62 | 
63 | 


--------------------------------------------------------------------------------
/src/you_get/processor/join_ts.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import struct
 4 | from io import BytesIO
 5 | 
 6 | ##################################################
 7 | # main
 8 | ##################################################
 9 | 
10 | def guess_output(inputs):
11 |     import os.path
12 |     inputs = map(os.path.basename, inputs)
13 |     n = min(map(len, inputs))
14 |     for i in reversed(range(1, n)):
15 |         if len(set(s[:i] for s in inputs)) == 1:
16 |             return inputs[0][:i] + '.ts'
17 |     return 'output.ts'
18 | 
19 | def concat_ts(ts_parts, output = None):
20 |     assert ts_parts, 'no ts files found'
21 |     import os.path
22 |     if not output:
23 |         output = guess_output(ts_parts)
24 |     elif os.path.isdir(output):
25 |         output = os.path.join(output, guess_output(ts_parts))
26 |     
27 |     print('Merging video parts...')
28 |     
29 |     ts_out_file = open(output, "wb")
30 |     for ts_in in ts_parts:
31 |         ts_in_file = open(ts_in, "rb")
32 |         ts_in_data = ts_in_file.read()
33 |         ts_in_file.close()
34 |         ts_out_file.write(ts_in_data)
35 |     ts_out_file.close()
36 |     return output
37 | 
38 | def usage():
39 |     print('Usage: [python3] join_ts.py --output TARGET.ts ts...')
40 | 
41 | def main():
42 |     import sys, getopt
43 |     try:
44 |         opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="])
45 |     except getopt.GetoptError as err:
46 |         usage()
47 |         sys.exit(1)
48 |     output = None
49 |     for o, a in opts:
50 |         if o in ("-h", "--help"):
51 |             usage()
52 |             sys.exit()
53 |         elif o in ("-o", "--output"):
54 |             output = a
55 |         else:
56 |             usage()
57 |             sys.exit(1)
58 |     if not args:
59 |         usage()
60 |         sys.exit(1)
61 |     
62 |     concat_ts(args, output)
63 | 
64 | if __name__ == '__main__':
65 |     main()
66 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ifeng.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['ifeng_download', 'ifeng_download_by_id']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def ifeng_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
 8 |     assert r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', id), id
 9 |     url = 'http://vxml.ifengimg.com/video_info_new/%s/%s/%s.xml' % (id[-2], id[-2:], id)
10 |     xml = get_html(url, 'utf-8')
11 |     title = r1(r'Name="([^"]+)"', xml)
12 |     title = unescape_html(title)
13 |     url = r1(r'VideoPlayUrl="([^"]+)"', xml)
14 |     from random import randint
15 |     r = randint(10, 19)
16 |     url = url.replace('http://wideo.ifeng.com/', 'http://ips.ifeng.com/wideo.ifeng.com/')
17 |     type, ext, size = url_info(url)
18 | 
19 |     print_info(site_info, title, ext, size)
20 |     if not info_only:
21 |         download_urls([url], title, ext, size, output_dir, merge = merge)
22 | 
23 | def ifeng_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
24 | # old pattern /uuid.shtml
25 | # now it could be #uuid
26 |     id = r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', url)
27 |     if id:
28 |         return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
29 | 
30 |     html = get_content(url)
31 |     uuid_pattern = r'"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"'
32 |     id = r1(r'var vid="([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"', html)
33 |     if id is None:
34 |         video_pattern = r'"vid"\s*:\s*' + uuid_pattern
35 |         id = match1(html, video_pattern)
36 |     assert id, "can't find video info"
37 |     return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
38 | 
39 | site_info = "ifeng.com"
40 | download = ifeng_download
41 | download_playlist = playlist_not_supported('ifeng')
42 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/nicovideo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['nicovideo_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def nicovideo_login(user, password):
 8 |     data = "current_form=login&mail=" + user +"&password=" + password + "&login_submit=Log+In"
 9 |     response = request.urlopen(request.Request("https://secure.nicovideo.jp/secure/login?site=niconico", headers=fake_headers, data=data.encode('utf-8')))
10 |     return response.headers
11 | 
12 | def nicovideo_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
13 |     import ssl
14 |     ssl_context = request.HTTPSHandler(
15 | context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
16 |     cookie_handler = request.HTTPCookieProcessor()
17 |     opener = request.build_opener(ssl_context, cookie_handler)
18 |     request.install_opener(opener)
19 | 
20 |     import netrc, getpass
21 |     try:
22 |         info = netrc.netrc().authenticators('nicovideo')
23 |     except:
24 |         info = None
25 |     if info is None:
26 |         user = input("User:     ")
27 |         password = getpass.getpass("Password: ")
28 |     else:
29 |         user, password = info[0], info[2]
30 |     print("Logging in...")
31 |     nicovideo_login(user, password)
32 | 
33 |     html = get_html(url) # necessary!
34 |     title = r1(r'<title>(.+?)</title>', html)
35 |     #title = unicodize(r1(r'<span class="videoHeaderTitle"[^>]*>([^<]+)</span>', html))
36 | 
37 |     vid = url.split('/')[-1].split('?')[0]
38 |     api_html = get_html('http://flapi.nicovideo.jp/api/getflv?v=%s' % vid)
39 |     real_url = parse.unquote(r1(r'url=([^&]+)&', api_html))
40 | 
41 |     type, ext, size = url_info(real_url)
42 | 
43 |     print_info(site_info, title, type, size)
44 |     if not info_only:
45 |         download_urls([real_url], title, ext, size, output_dir, merge = merge)
46 | 
47 | site_info = "Nicovideo.jp"
48 | download = nicovideo_download
49 | download_playlist = playlist_not_supported('nicovideo')
50 | 


--------------------------------------------------------------------------------
/src/you_get/processor/rtmpdump.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os.path
 4 | import subprocess
 5 | 
 6 | def get_usable_rtmpdump(cmd):
 7 |     try:
 8 |         p = subprocess.Popen([cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 9 |         out, err = p.communicate()
10 |         return cmd
11 |     except:
12 |         return None
13 | 
14 | RTMPDUMP = get_usable_rtmpdump('rtmpdump')
15 | 
16 | def has_rtmpdump_installed():
17 |     return RTMPDUMP is not None
18 | 
19 | #
20 | #params ={"-y":"playlist","-q":None,}
21 | #if Only Key ,Value should be None
22 | #-r -o should not be included in params
23 | 
24 | def download_rtmpdump_stream(url, title, ext,params={},output_dir='.'):
25 |     filename = '%s.%s' % (title, ext)
26 |     filepath = os.path.join(output_dir, filename)
27 | 
28 |     cmdline = [RTMPDUMP, '-r']
29 |     cmdline.append(url)
30 |     cmdline.append('-o')
31 |     cmdline.append(filepath)
32 | 
33 |     for key in params.keys():
34 |         cmdline.append(key)
35 |         if params[key]!=None:
36 |             cmdline.append(params[key])
37 | 
38 |     # cmdline.append('-y')
39 |     # cmdline.append(playpath)
40 |     print("Call rtmpdump:\n"+" ".join(cmdline)+"\n")
41 |     subprocess.call(cmdline)
42 |     return
43 | 
44 | #
45 | def play_rtmpdump_stream(player, url, params={}):
46 |     
47 |     #construct left side of pipe
48 |     cmdline = [RTMPDUMP, '-r']
49 |     cmdline.append(url)
50 |     
51 |     #append other params if exist
52 |     for key in params.keys():
53 |         cmdline.append(key)
54 |         if params[key]!=None:
55 |             cmdline.append(params[key])
56 | 
57 |     cmdline.append('-o')
58 |     cmdline.append('-')
59 | 
60 |     #pipe start
61 |     cmdline.append('|')
62 |     cmdline.append(player)
63 |     cmdline.append('-')
64 | 
65 |     #logging
66 |     print("Call rtmpdump:\n"+" ".join(cmdline)+"\n")
67 | 
68 |     #call RTMPDump!
69 |     subprocess.call(cmdline)
70 |     
71 |     # os.system("rtmpdump -r '%s' -y '%s' -o - | %s -" % (url, playpath, player))
72 |     return
73 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/yinyuetai.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['yinyuetai_download', 'yinyuetai_download_by_id']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def yinyuetai_download_by_id(vid, title=None, output_dir='.', merge=True, info_only=False):
 8 |     video_info = json.loads(get_html('http://www.yinyuetai.com/insite/get-video-info?json=true&videoId=%s' % vid))
 9 |     url_models = video_info['videoInfo']['coreVideoInfo']['videoUrlModels']
10 |     url_models = sorted(url_models, key=lambda i: i['qualityLevel'])
11 |     url = url_models[-1]['videoUrl']
12 |     type = ext = r1(r'\.(flv|mp4)', url)
13 |     _, _, size = url_info(url)
14 | 
15 |     print_info(site_info, title, type, size)
16 |     if not info_only:
17 |         download_urls([url], title, ext, size, output_dir, merge = merge)
18 | 
19 | def yinyuetai_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
20 |     id = r1(r'http://\w+.yinyuetai.com/video/(\d+)', url) or \
21 |          r1(r'http://\w+.yinyuetai.com/video/h5/(\d+)', url)
22 |     if not id:
23 |         yinyuetai_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
24 |         return
25 | 
26 |     html = get_html(url, 'utf-8')
27 |     title = r1(r'<meta property="og:title"\s+content="([^"]+)"/>', html) or r1(r'<title>(.*)', html)
28 |     assert title
29 |     title = parse.unquote(title)
30 |     title = escape_file_path(title)
31 |     yinyuetai_download_by_id(id, title, output_dir, merge=merge, info_only=info_only)
32 | 
33 | def yinyuetai_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs):
34 |     playlist = r1(r'http://\w+.yinyuetai.com/playlist/(\d+)', url)
35 |     html = get_html(url)
36 |     data_ids = re.findall(r'data-index="\d+"\s*data-id=(\d+)', html)
37 |     for data_id in data_ids:
38 |         yinyuetai_download('http://v.yinyuetai.com/video/' + data_id,
39 |                            output_dir=output_dir, merge=merge, info_only=info_only)
40 | 
41 | site_info = "YinYueTai.com"
42 | download = yinyuetai_download
43 | download_playlist = yinyuetai_download_playlist
44 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/fantasy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['fantasy_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | import random
 8 | from urllib.parse import urlparse, parse_qs
 9 | 
10 | 
11 | def fantasy_download_by_id_channelId(id = 0, channelId = 0, output_dir = '.', merge = True, info_only = False,
12 |                                      **kwargs):
13 |     api_url = 'http://www.fantasy.tv/tv/playDetails.action?' \
14 |               'myChannelId=1&id={id}&channelId={channelId}&t={t}'.format(id = id,
15 |                                                                          channelId = channelId,
16 |                                                                          t = str(random.random())
17 |                                                                          )
18 |     html = get_content(api_url)
19 |     html = json.loads(html)
20 | 
21 |     if int(html['status']) != 100000:
22 |         raise Exception('API error!')
23 | 
24 |     title = html['data']['tv']['title']
25 | 
26 |     video_url = html['data']['tv']['videoPath']
27 |     headers = fake_headers.copy()
28 |     headers['Referer'] = api_url
29 |     type, ext, size = url_info(video_url, headers=headers)
30 | 
31 |     print_info(site_info, title, type, size)
32 |     if not info_only:
33 |         download_urls([video_url], title, ext, size, output_dir, merge = merge, headers = headers)
34 | 
35 | 
36 | def fantasy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
37 |     if 'fantasy.tv' not in url:
38 |         raise Exception('Wrong place!')
39 | 
40 |     q = parse_qs(urlparse(url).query)
41 | 
42 |     if 'tvId' not in q or 'channelId' not in q:
43 |         raise Exception('No enough arguments!')
44 | 
45 |     tvId = q['tvId'][0]
46 |     channelId = q['channelId'][0]
47 | 
48 |     fantasy_download_by_id_channelId(id = tvId, channelId = channelId, output_dir = output_dir, merge = merge,
49 |                                      info_only = info_only, **kwargs)
50 | 
51 | 
52 | site_info = "fantasy.tv"
53 | download = fantasy_download
54 | download_playlist = playlist_not_supported('fantasy.tv')
55 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/infoq.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from ..extractor import VideoExtractor
 5 | 
 6 | import ssl
 7 | 
 8 | class Infoq(VideoExtractor):
 9 |     name = "InfoQ"
10 | 
11 |     stream_types = [
12 |         {'id': 'video'},
13 |         {'id': 'audio'},
14 |         {'id': 'slides'}
15 |     ]
16 | 
17 |     def prepare(self, **kwargs):
18 |         content = get_content(self.url)
19 |         self.title = match1(content, r'<title>([^<]+)</title>')
20 |         s = match1(content, r'P\.s\s*=\s*\'([^\']+)\'')
21 |         scp = match1(content, r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'')
22 |         scs = match1(content, r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'')
23 |         sck = match1(content, r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'')
24 | 
25 |         mp3 = match1(content, r'name="filename"\s*value="([^"]+\.mp3)"')
26 |         if mp3: mp3 = 'http://res.infoq.com/downloads/mp3downloads/%s' % mp3
27 | 
28 |         pdf = match1(content, r'name="filename"\s*value="([^"]+\.pdf)"')
29 |         if pdf: pdf = 'http://res.infoq.com/downloads/pdfdownloads/%s' % pdf
30 | 
31 |         # cookie handler
32 |         ssl_context = request.HTTPSHandler(
33 |             context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
34 |         cookie_handler = request.HTTPCookieProcessor()
35 |         opener = request.build_opener(ssl_context, cookie_handler)
36 |         opener.addheaders = [
37 |             ('Referer', self.url),
38 |             ('Cookie',
39 |              'CloudFront-Policy=%s;CloudFront-Signature=%s;CloudFront-Key-Pair-Id=%s' % (scp, scs, sck))
40 |         ]
41 |         request.install_opener(opener)
42 | 
43 |         if s: self.streams['video'] = {'url': s }
44 |         if mp3: self.streams['audio'] = { 'url': mp3 }
45 |         if pdf: self.streams['slides'] = { 'url': pdf }
46 | 
47 |     def extract(self, **kwargs):
48 |         for i in self.streams:
49 |             s = self.streams[i]
50 |             _, s['container'], s['size'] = url_info(s['url'])
51 |             s['src'] = [s['url']]
52 | 
53 | site = Infoq()
54 | download = site.download_by_url
55 | download_playlist = site.download_by_url
56 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/nanagogo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['nanagogo_download']
 4 | 
 5 | from ..common import *
 6 | from .universal import *
 7 | 
 8 | def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 9 |     if re.match(r'https?://stat.7gogo.jp', url):
10 |         universal_download(url, output_dir, merge=merge, info_only=info_only)
11 |         return
12 | 
13 |     talk_id = r1(r'7gogo.jp/([^/]+)/', url)
14 |     post_id = r1(r'7gogo.jp/[^/]+/(\d+)', url)
15 |     title = '%s_%s' % (talk_id, post_id)
16 |     api_url = 'https://api.7gogo.jp/web/v2/talks/%s/posts/%s' % (talk_id, post_id)
17 |     info = json.loads(get_content(api_url))
18 | 
19 |     items = []
20 |     if info['data']['posts']['post'] is None:
21 |         return
22 |     if info['data']['posts']['post']['body'] is None:
23 |         return
24 |     for i in info['data']['posts']['post']['body']:
25 |         if 'image' in i:
26 |             image_url = i['image']
27 |             if image_url[:2] == '//': continue # skip stamp images
28 |             _, ext, size = url_info(image_url)
29 |             items.append({'title': title,
30 |                           'url':   image_url,
31 |                           'ext':   ext,
32 |                           'size':  size})
33 |         elif 'movieUrlHq' in i:
34 |             movie_url = i['movieUrlHq']
35 |             _, ext, size = url_info(movie_url)
36 |             items.append({'title': title,
37 |                           'url':   movie_url,
38 |                           'ext':   ext,
39 |                           'size':  size})
40 | 
41 |     size = sum([i['size'] for i in items])
42 |     if size == 0: return # do not fail the whole process
43 |     print_info(site_info, title, ext, size)
44 |     if not info_only:
45 |         for i in items:
46 |             print_info(site_info, i['title'], i['ext'], i['size'])
47 |             download_urls([i['url']], i['title'], i['ext'], i['size'],
48 |                           output_dir=output_dir,
49 |                           merge=merge)
50 | 
51 | site_info = "7gogo.jp"
52 | download = nanagogo_download
53 | download_playlist = playlist_not_supported('nanagogo')
54 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/miomio.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['miomio_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | from .tudou import tudou_download_by_id
 8 | from .youku import youku_download_by_vid
 9 | from xml.dom.minidom import parseString
10 | 
11 | def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
12 |     html = get_html(url)
13 | 
14 |     title = r1(r'<meta name="description" content="([^"]*)"', html)
15 |     flashvars = r1(r'flashvars="(type=[^"]*)"', html)
16 | 
17 |     t = r1(r'type=(\w+)', flashvars)
18 |     id = r1(r'vid=([^"]+)', flashvars)
19 |     if t == 'youku':
20 |         youku_download_by_vid(id, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
21 |     elif t == 'tudou':
22 |         tudou_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
23 |     elif t == 'sina' or t == 'video':
24 |         fake_headers['Referer'] = url
25 |         url = "http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?vid=" + id
26 |         xml_data = get_content(url, headers=fake_headers, decoded=True)
27 |         url_list = sina_xml_to_url_list(xml_data)
28 | 
29 |         size_full = 0
30 |         for url in url_list:
31 |             type_, ext, size = url_info(url, headers=fake_headers)
32 |             size_full += size
33 | 
34 |         print_info(site_info, title, type_, size_full)
35 |         if not info_only:
36 |             download_urls(url_list, title, ext, total_size=size_full, output_dir=output_dir, merge=merge, headers=fake_headers)
37 |     else:
38 |         raise NotImplementedError(flashvars)
39 | 
40 | #----------------------------------------------------------------------
41 | def sina_xml_to_url_list(xml_data):
42 |     """str->list
43 |     Convert XML to URL List.
44 |     From Biligrab.
45 |     """
46 |     rawurl = []
47 |     dom = parseString(xml_data)
48 |     for node in dom.getElementsByTagName('durl'):
49 |         url = node.getElementsByTagName('url')[0]
50 |         rawurl.append(url.childNodes[0].data)
51 |     return rawurl
52 | 
53 | site_info = "MioMio.tv"
54 | download = miomio_download
55 | download_playlist = playlist_not_supported('miomio')
56 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/kugou.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['kugou_download']
 4 | 
 5 | from ..common import *
 6 | from json import loads
 7 | from base64 import b64decode
 8 | import re
 9 | import hashlib
10 | 
11 | def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
12 |     if url.lower().find("5sing")!=-1:
13 |         #for 5sing.kugou.com
14 |         html=get_html(url)
15 |         ticket=r1(r'"ticket":\s*"(.*)"',html)
16 |         j=loads(str(b64decode(ticket),encoding="utf-8"))
17 |         url=j['file']
18 |         title=j['songName']
19 |         songtype, ext, size = url_info(url)
20 |         print_info(site_info, title, songtype, size)
21 |         if not info_only:
22 |             download_urls([url], title, ext, size, output_dir, merge=merge)
23 |     else:
24 |         #for the www.kugou.com/
25 |         return kugou_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
26 |         # raise NotImplementedError(url)       
27 | 
28 | def kugou_download_by_hash(title,hash_val,output_dir = '.', merge = True, info_only = False):
29 |     #sample
30 |     #url_sample:http://www.kugou.com/yy/album/single/536957.html
31 |     #hash ->key  md5(hash+kgcloud")->key  decompile swf
32 |     #cmd 4 for mp3 cmd 3 for m4a
33 |     key=hashlib.new('md5',(hash_val+"kgcloud").encode("utf-8")).hexdigest()
34 |     html=get_html("http://trackercdn.kugou.com/i/?pid=6&key=%s&acceptMp3=1&cmd=4&hash=%s"%(key,hash_val))
35 |     j=loads(html)
36 |     url=j['url']
37 |     songtype, ext, size = url_info(url)
38 |     print_info(site_info, title, songtype, size)
39 |     if not info_only:
40 |         download_urls([url], title, ext, size, output_dir, merge=merge)
41 | 
42 | def kugou_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs):
43 |     html=get_html(url)
44 |     pattern=re.compile('title="(.*?)".* data="(\w*)\|.*?"')
45 |     pairs=pattern.findall(html)
46 |     for title,hash_val in pairs:
47 |         kugou_download_by_hash(title,hash_val,output_dir,merge,info_only)
48 | 
49 | 
50 | site_info = "kugou.com"
51 | download = kugou_download
52 | # download_playlist = playlist_not_supported("kugou")
53 | download_playlist=kugou_download_playlist
54 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/miaopai.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['miaopai_download']
 4 | 
 5 | from ..common import *
 6 | import urllib.error
 7 | import urllib.parse
 8 | 
 9 | fake_headers_mobile = {
10 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
11 |     'Accept-Charset': 'UTF-8,*;q=0.5',
12 |     'Accept-Encoding': 'gzip,deflate,sdch',
13 |     'Accept-Language': 'en-US,en;q=0.8',
14 |     'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
15 | }
16 | 
17 | def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs):
18 |     '''Source: Android mobile'''
19 |     page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4'
20 | 
21 |     mobile_page = get_content(page_url, headers=fake_headers_mobile)
22 |     url = match1(mobile_page, r'<video id=.*?src=[\'"](.*?)[\'"]\W')
23 |     title = match1(mobile_page, r'<title>((.|\n)+?)</title>')
24 |     if not title:
25 |         title = fid
26 |     title = title.replace('\n', '_')
27 |     ext, size = 'mp4', url_info(url)[2]
28 |     print_info(site_info, title, ext, size)
29 |     if not info_only:
30 |         download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
31 | 
32 | #----------------------------------------------------------------------
33 | def miaopai_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
34 |     fid = match1(url, r'\?fid=(\d{4}:\w{32})')
35 |     if fid is not None:
36 |         miaopai_download_by_fid(fid, output_dir, merge, info_only)
37 |     elif '/p/230444' in url:
38 |         fid = match1(url, r'/p/230444(\w+)')
39 |         miaopai_download_by_fid('1034:'+fid, output_dir, merge, info_only)
40 |     else:
41 |         mobile_page = get_content(url, headers = fake_headers_mobile)
42 |         hit = re.search(r'"page_url"\s*:\s*"([^"]+)"', mobile_page)
43 |         if not hit:
44 |             raise Exception('Unknown pattern')
45 |         else:
46 |             escaped_url = hit.group(1)
47 |             miaopai_download(urllib.parse.unquote(escaped_url), output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
48 | 
49 | site_info = "miaopai"
50 | download = miaopai_download
51 | download_playlist = playlist_not_supported('miaopai')
52 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | **(PLEASE DELETE ALL THESE AFTER READING)**
 2 | 
 3 | Thank you for the pull request! `you-get` is a growing open source project, which would not have been possible without contributors like you.
 4 | 
 5 | Here are some simple rules to follow, please recheck them before sending the pull request:
 6 | 
 7 | - [ ] If you want to propose two or more unrelated patches, please open separate pull requests for them, instead of one;
 8 | - [ ] All pull requests should be based upon the latest `develop` branch;
 9 | - [ ] Name your branch (from which you will send the pull request) properly; use a meaningful name like `add-this-shining-feature` rather than just `develop`;
10 | - [ ] All commit messages, as well as comments in code, should be written in understandable English.
11 | 
12 | As a contributor, you must be aware that
13 | 
14 | - [ ] You agree to contribute your code to this project, under the terms of the MIT license, so that any person may freely use or redistribute them; of course, you will still reserve the copyright for your own authorship.
15 | - [ ] You may not contribute any code not authored by yourself, unless they are licensed under either public domain or the MIT license, literally.
16 | 
17 | Not all pull requests can eventually be merged. I consider merged / unmerged patches as equally important for the community: as long as you think a patch would be helpful, someone else might find it helpful, too, therefore they could take your fork and benefit in some way. In any case, I would like to thank you in advance for taking your time to contribute to this project.
18 | 
19 | Cheers,
20 | Mort
21 | 
22 | **(PLEASE REPLACE ALL ABOVE WITH A DETAILED DESCRIPTION OF YOUR PULL REQUEST)**
23 | 
24 | 
25 | 汉语翻译最后日期：2016年02月26日
26 | 
27 | **(阅读后请删除所有内容)**
28 | 
29 | 感谢您的pull request! `you-get`是稳健成长的开源项目，感谢您的贡献.
30 | 
31 | 以下简单检查项目望您复查:
32 | 
33 | - [ ] 如果您预计提出两个或更多不相关补丁，请为每个使用不同的pull requests，而不是单一;
34 | - [ ] 所有的pull requests应基于最新的`develop`分支;
35 | - [ ] 您预计提出pull requests的分支应有有意义名称，例如`add-this-shining-feature`而不是`develop`;
36 | - [ ] 所有的提交信息与代码中注释应使用可理解的英语.
37 | 
38 | 作为贡献者，您需要知悉
39 | 
40 | - [ ] 您同意在MIT协议下贡献代码，以便任何人自由使用或分发;当然，你仍旧保留代码的著作权
41 | - [ ] 你不得贡献非自己编写的代码，除非其属于公有领域或使用MIT协议.
42 | 
43 | 不是所有的pull requests都会被合并,然而我认为合并/不合并的补丁一样重要：如果您认为补丁重要，其他人也有可能这么认为，那么他们可以从你的fork中提取工作并获益。无论如何，感谢您费心对本项目贡献.
44 | 
45 | 祝好,
46 | Mort
47 | 
48 | **(请将本内容完整替换为PULL REQUEST的详细内容)**
49 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from .acfun import *
 4 | from .alive import *
 5 | from .archive import *
 6 | from .baidu import *
 7 | from .bandcamp import *
 8 | from .bigthink import *
 9 | from .bilibili import *
10 | from .bokecc import *
11 | from .cbs import *
12 | from .ckplayer import *
13 | from .cntv import *
14 | from .coub import *
15 | from .dailymotion import *
16 | from .dilidili import *
17 | from .douban import *
18 | from .douyin import *
19 | from .douyutv import *
20 | from .ehow import *
21 | from .facebook import *
22 | from .fantasy import *
23 | from .fc2video import *
24 | from .flickr import *
25 | from .freesound import *
26 | from .funshion import *
27 | from .google import *
28 | from .heavymusic import *
29 | from .huaban import *
30 | from .icourses import *
31 | from .ifeng import *
32 | from .imgur import *
33 | from .infoq import *
34 | from .instagram import *
35 | from .interest import *
36 | from .iqilu import *
37 | from .iqiyi import *
38 | from .joy import *
39 | from .ku6 import *
40 | from .kugou import *
41 | from .kuwo import *
42 | from .le import *
43 | from .lizhi import *
44 | from .longzhu import *
45 | from .magisto import *
46 | from .metacafe import *
47 | from .mgtv import *
48 | from .miaopai import *
49 | from .miomio import *
50 | from .mixcloud import *
51 | from .mtv81 import *
52 | from .musicplayon import *
53 | from .nanagogo import *
54 | from .naver import *
55 | from .netease import *
56 | from .nicovideo import *
57 | from .panda import *
58 | from .pinterest import *
59 | from .pixnet import *
60 | from .pptv import *
61 | from .qie import *
62 | from .qingting import *
63 | from .qq import *
64 | from .showroom import *
65 | from .sina import *
66 | from .sohu import *
67 | from .soundcloud import *
68 | from .suntv import *
69 | from .theplatform import *
70 | from .tucao import *
71 | from .tudou import *
72 | from .tumblr import *
73 | from .twitter import *
74 | from .ucas import *
75 | from .veoh import *
76 | from .videomega import *
77 | from .vimeo import *
78 | from .vine import *
79 | from .vk import *
80 | from .w56 import *
81 | from .wanmen import *
82 | from .xiami import *
83 | from .yinyuetai import *
84 | from .yixia import *
85 | from .youku import *
86 | from .youtube import *
87 | from .ted import *
88 | from .khan import *
89 | from .zhanqi import *
90 | from .kuaishou import *
91 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/zhanqi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['zhanqi_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | import base64
 8 | from urllib.parse import urlparse
 9 | 
10 | def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
11 |     path = urlparse(url).path[1:]
12 | 
13 |     if not (path.startswith('videos') or path.startswith('v2/videos')): #url = "https://www.zhanqi.tv/huashan?param_s=1_0.2.0"
14 |         path_list = path.split('/')
15 |         room_id = path_list[1] if path_list[0] == 'topic' else path_list[0]
16 |         zhanqi_live(room_id, merge=merge, output_dir=output_dir, info_only=info_only, **kwargs)
17 |     else: #url = 'https://www.zhanqi.tv/videos/Lyingman/2017/01/182308.html'
18 |         # https://www.zhanqi.tv/v2/videos/215593.html
19 |         video_id = path.split('.')[0].split('/')[-1]
20 |         zhanqi_video(video_id, merge=merge, output_dir=output_dir, info_only=info_only, **kwargs)
21 | 
22 | def zhanqi_live(room_id, merge=True, output_dir='.', info_only=False, **kwargs):
23 |     api_url = "https://www.zhanqi.tv/api/static/v2.1/room/domain/{}.json".format(room_id)
24 |     json_data = json.loads(get_content(api_url))['data']
25 |     status = json_data['status']
26 |     if status != '4':
27 |         raise Exception("The live stream is not online!")
28 | 
29 |     nickname = json_data['nickname']
30 |     title = nickname + ": " + json_data['title']
31 |     video_levels = base64.b64decode(json_data['flashvars']['VideoLevels']).decode('utf8')
32 |     m3u8_url = json.loads(video_levels)['streamUrl']
33 | 
34 |     print_info(site_info, title, 'm3u8', 0, m3u8_url=m3u8_url, m3u8_type='master')
35 |     if not info_only:
36 |         download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge)
37 | 
38 | def zhanqi_video(video_id, output_dir='.', info_only=False, merge=True, **kwargs):
39 |     api_url = 'https://www.zhanqi.tv/api/static/v2.1/video/{}.json'.format(video_id)
40 |     json_data = json.loads(get_content(api_url))['data']
41 | 
42 |     title = json_data['title']
43 |     vid = json_data['flashvars']['VideoID']
44 |     m3u8_url = 'http://dlvod.cdn.zhanqi.tv/' + vid
45 |     urls = general_m3u8_extractor(m3u8_url)
46 |     print_info(site_info, title, 'm3u8', 0)
47 |     if not info_only:
48 |         download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs)
49 | 
50 | site_info = "www.zhanqi.tv"
51 | download = zhanqi_download
52 | download_playlist = playlist_not_supported('zhanqi')
53 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/instagram.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['instagram_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     url = r1(r'([^?]*)', url)
 9 |     html = get_html(url)
10 | 
11 |     vid = r1(r'instagram.com/p/([^/]+)', url)
12 |     description = r1(r'<meta property="og:title" content="([^"]*)"', html)
13 |     title = "{} [{}]".format(description.replace("\n", " "), vid)
14 |     stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
15 |     if stream:
16 |         _, ext, size = url_info(stream)
17 | 
18 |         print_info(site_info, title, ext, size)
19 |         if not info_only:
20 |             download_urls([stream], title, ext, size, output_dir, merge=merge)
21 |     else:
22 |         data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', html)
23 |         info = json.loads(data.group(1))
24 | 
25 |         if 'edge_sidecar_to_children' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']:
26 |             edges = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']
27 |             for edge in edges:
28 |                 title = edge['node']['shortcode']
29 |                 image_url = edge['node']['display_url']
30 |                 ext = image_url.split('.')[-1]
31 |                 size = int(get_head(image_url)['Content-Length'])
32 |                 print_info(site_info, title, ext, size)
33 |                 if not info_only:
34 |                     download_urls(urls=[image_url],
35 |                                   title=title,
36 |                                   ext=ext,
37 |                                   total_size=size,
38 |                                   output_dir=output_dir)
39 |         else:
40 |             title = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['shortcode']
41 |             image_url = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['display_url']
42 |             ext = image_url.split('.')[-1]
43 |             size = int(get_head(image_url)['Content-Length'])
44 |             print_info(site_info, title, ext, size)
45 |             if not info_only:
46 |                 download_urls(urls=[image_url],
47 |                               title=title,
48 |                               ext=ext,
49 |                               total_size=size,
50 |                               output_dir=output_dir)
51 | 
52 | site_info = "Instagram.com"
53 | download = instagram_download
54 | download_playlist = playlist_not_supported('instagram')
55 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | You-Get
 2 | =======
 3 | 
 4 | |PyPI version| |Build Status| |Gitter|
 5 | 
 6 | `You-Get <https://you-get.org/>`__ is a tiny command-line utility to
 7 | download media contents (videos, audios, images) from the Web, in case
 8 | there is no other handy way to do it.
 9 | 
10 | Here's how you use ``you-get`` to download a video from `this web
11 | page <http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society>`__:
12 | 
13 | .. code:: console
14 | 
15 |     $ you-get http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society
16 |     Site:       fsf.org
17 |     Title:      TEDxGE2014_Stallman05_LQ
18 |     Type:       WebM video (video/webm)
19 |     Size:       27.12 MiB (28435804 Bytes)
20 | 
21 |     Downloading TEDxGE2014_Stallman05_LQ.webm ...
22 |     100.0% ( 27.1/27.1 MB) ├████████████████████████████████████████┤[1/1]   12 MB/s
23 | 
24 | And here's why you might want to use it:
25 | 
26 | -  You enjoyed something on the Internet, and just want to download them
27 |    for your own pleasure.
28 | -  You watch your favorite videos online from your computer, but you are
29 |    prohibited from saving them. You feel that you have no control over
30 |    your own computer. (And it's not how an open Web is supposed to
31 |    work.)
32 | -  You want to get rid of any closed-source technology or proprietary
33 |    JavaScript code, and disallow things like Flash running on your
34 |    computer.
35 | -  You are an adherent of hacker culture and free software.
36 | 
37 | What ``you-get`` can do for you:
38 | 
39 | -  Download videos / audios from popular websites such as YouTube,
40 |    Youku, Niconico, and a bunch more. (See the `full list of supported
41 |    sites <#supported-sites>`__)
42 | -  Stream an online video in your media player. No web browser, no more
43 |    ads.
44 | -  Download images (of interest) by scraping a web page.
45 | -  Download arbitrary non-HTML contents, i.e., binary files.
46 | 
47 | Interested? `Install it <#installation>`__ now and `get started by
48 | examples <#getting-started>`__.
49 | 
50 | Are you a Python programmer? Then check out `the
51 | source <https://github.com/soimort/you-get>`__ and fork it!
52 | 
53 | .. |PyPI version| image:: https://badge.fury.io/py/you-get.png
54 |    :target: http://badge.fury.io/py/you-get
55 | .. |Build Status| image:: https://api.travis-ci.org/soimort/you-get.png
56 |    :target: https://travis-ci.org/soimort/you-get
57 | .. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg
58 |    :target: https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
59 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/douban.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['douban_download']
 4 | 
 5 | import urllib.request, urllib.parse
 6 | from ..common import *
 7 | 
 8 | def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 9 |     html = get_html(url)
10 | 
11 |     if re.match(r'https?://movie', url):
12 |         title = match1(html, 'name="description" content="([^"]+)')
13 |         tid = match1(url, 'trailer/(\d+)')
14 |         real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
15 |         type, ext, size = url_info(real_url)
16 | 
17 |         print_info(site_info, title, type, size)
18 |         if not info_only:
19 |             download_urls([real_url], title, ext, size, output_dir, merge = merge)
20 | 
21 |     elif 'subject' in url:
22 |         titles = re.findall(r'data-title="([^"]*)">', html)
23 |         song_id = re.findall(r'<li class="song-item" id="([^"]*)"', html)
24 |         song_ssid = re.findall(r'data-ssid="([^"]*)"', html)
25 |         get_song_url = 'http://music.douban.com/j/songlist/get_song_url'
26 | 
27 |         for i in range(len(titles)):
28 |             title = titles[i]
29 |             datas = {
30 |                 'sid': song_id[i],
31 |                 'ssid': song_ssid[i]
32 |             }
33 |             post_params = urllib.parse.urlencode(datas).encode('utf-8')
34 |             try:
35 |                 resp = urllib.request.urlopen(get_song_url, post_params)
36 |                 resp_data = json.loads(resp.read().decode('utf-8'))
37 |                 real_url = resp_data['r']
38 |                 type, ext, size = url_info(real_url)
39 |                 print_info(site_info, title, type, size)
40 |             except:
41 |                 pass
42 | 
43 |             if not info_only:
44 |                 try:
45 |                     download_urls([real_url], title, ext, size, output_dir, merge = merge)
46 |                 except:
47 |                     pass
48 | 
49 |     else:
50 |         titles = re.findall(r'"name":"([^"]*)"', html)
51 |         real_urls = [re.sub('\\\\/', '/', i) for i in re.findall(r'"rawUrl":"([^"]*)"', html)]
52 | 
53 |         for i in range(len(titles)):
54 |             title = titles[i]
55 |             real_url = real_urls[i]
56 | 
57 |             type, ext, size = url_info(real_url)
58 | 
59 |             print_info(site_info, title, type, size)
60 |             if not info_only:
61 |                 download_urls([real_url], title, ext, size, output_dir, merge = merge)
62 | 
63 | site_info = "Douban.com"
64 | download = douban_download
65 | download_playlist = playlist_not_supported('douban')
66 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/lizhi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['lizhi_download']
 4 | import json
 5 | from ..common import *
 6 | 
 7 | # radio_id: e.g. 549759 from http://www.lizhi.fm/549759/
 8 | #
 9 | # Returns a list of tuples (audio_id, title, url) for each episode
10 | # (audio) in the radio playlist. url is the direct link to the audio
11 | # file.
12 | def lizhi_extract_playlist_info(radio_id):
13 |     # /api/radio_audios API parameters:
14 |     #
15 |     # - s: starting episode
16 |     # - l: count (per page)
17 |     # - band: radio_id
18 |     #
19 |     # We use l=65535 for poor man's pagination (that is, no pagination
20 |     # at all -- hope all fits on a single page).
21 |     #
22 |     # TODO: Use /api/radio?band={radio_id} to get number of episodes
23 |     # (au_cnt), then handle pagination properly.
24 |     api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id
25 |     api_response = json.loads(get_content(api_url))
26 |     return [(ep['id'], ep['name'], ep['url']) for ep in api_response]
27 | 
28 | def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False):
29 |     filetype, ext, size = url_info(url)
30 |     print_info(site_info, title, filetype, size)
31 |     if not info_only:
32 |         download_urls([url], title, ext, size, output_dir=output_dir)
33 | 
34 | def lizhi_download_playlist(url, output_dir='.', info_only=False, **kwargs):
35 |     # Sample URL: http://www.lizhi.fm/549759/
36 |     radio_id = match1(url,r'/(\d+)')
37 |     if not radio_id:
38 |         raise NotImplementedError('%s not supported' % url)
39 |     for audio_id, title, url in lizhi_extract_playlist_info(radio_id):
40 |         lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
41 | 
42 | def lizhi_download(url, output_dir='.', info_only=False, **kwargs):
43 |     # Sample URL: http://www.lizhi.fm/549759/18864883431656710/
44 |     m = re.search(r'/(?P<radio_id>\d+)/(?P<audio_id>\d+)', url)
45 |     if not m:
46 |         raise NotImplementedError('%s not supported' % url)
47 |     radio_id = m.group('radio_id')
48 |     audio_id = m.group('audio_id')
49 |     # Look for the audio_id among the full list of episodes
50 |     for aid, title, url in lizhi_extract_playlist_info(radio_id):
51 |         if aid == audio_id:
52 |             lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
53 |             break
54 |     else:
55 |         raise NotImplementedError('Audio #%s not found in playlist #%s' % (audio_id, radio_id))
56 | 
57 | site_info = "lizhi.fm"
58 | download = lizhi_download
59 | download_playlist = lizhi_download_playlist
60 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/huaban.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import json
 4 | import os
 5 | import re
 6 | import math
 7 | import traceback
 8 | import urllib.parse as urlparse
 9 | 
10 | from ..common import *
11 | 
12 | __all__ = ['huaban_download']
13 | 
14 | site_info = '花瓣 (Huaban)'
15 | 
16 | LIMIT = 100
17 | 
18 | 
19 | class Board:
20 |     def __init__(self, title, pins):
21 |         self.title = title
22 |         self.pins = pins
23 |         self.pin_count = len(pins)
24 | 
25 | 
26 | class Pin:
27 |     host = 'http://img.hb.aicdn.com/'
28 | 
29 |     def __init__(self, pin_json):
30 |         img_file = pin_json['file']
31 |         self.id = str(pin_json['pin_id'])
32 |         self.url = urlparse.urljoin(self.host, img_file['key'])
33 |         self.ext = img_file['type'].split('/')[-1]
34 | 
35 | 
36 | def construct_url(url, **params):
37 |     param_str = urlparse.urlencode(params)
38 |     return url + '?' + param_str
39 | 
40 | 
41 | def extract_json_data(url, **params):
42 |     url = construct_url(url, **params)
43 |     html = get_content(url, headers=fake_headers)
44 |     json_string = match1(html, r'app.page\["board"\] = (.*?});')
45 |     json_data = json.loads(json_string)
46 |     return json_data
47 | 
48 | 
49 | def extract_board_data(url):
50 |     json_data = extract_json_data(url, limit=LIMIT)
51 |     pin_list = json_data['pins']
52 |     title = json_data['title']
53 |     pin_count = json_data['pin_count']
54 |     pin_count -= len(pin_list)
55 | 
56 |     while pin_count > 0:
57 |         json_data = extract_json_data(url, max=pin_list[-1]['pin_id'],
58 |                                       limit=LIMIT)
59 |         pins = json_data['pins']
60 |         pin_list += pins
61 |         pin_count -= len(pins)
62 | 
63 |     return Board(title, list(map(Pin, pin_list)))
64 | 
65 | 
66 | def huaban_download_board(url, output_dir, **kwargs):
67 |     kwargs['merge'] = False
68 |     board = extract_board_data(url)
69 |     output_dir = os.path.join(output_dir, board.title)
70 |     print_info(site_info, board.title, 'jpg', float('Inf'))
71 |     for pin in board.pins:
72 |         download_urls([pin.url], pin.id, pin.ext, float('Inf'),
73 |                       output_dir=output_dir, faker=True, **kwargs)
74 | 
75 | 
76 | def huaban_download(url, output_dir='.', **kwargs):
77 |     if re.match(r'http://huaban\.com/boards/\d+/', url):
78 |         huaban_download_board(url, output_dir, **kwargs)
79 |     else:
80 |         print('Only board (画板) pages are supported currently')
81 |         print('ex: http://huaban.com/boards/12345678/')
82 | 
83 | 
84 | download = huaban_download
85 | download_playlist = playlist_not_supported("huaban")
86 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/toutiao.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import base64
 3 | 
 4 | import binascii
 5 | 
 6 | from ..common import *
 7 | import random
 8 | from json import loads
 9 | 
10 | __all__ = ['toutiao_download', ]
11 | 
12 | 
13 | def sign_video_url(vid):
14 |     # some code from http://codecloud.net/110854.html
15 |     r = str(random.random())[2:]
16 | 
17 |     def right_shift(val, n):
18 |         return val >> n if val >= 0 else (val + 0x100000000) >> n
19 | 
20 |     url = 'http://i.snssdk.com/video/urls/v/1/toutiao/mp4/%s' % vid
21 |     n = url.replace("http://i.snssdk.com", "")+ '?r=' + r
22 |     c = binascii.crc32(n.encode("ascii"))
23 |     s = right_shift(c, 0)
24 |     return url + '?r=%s&s=%s' % (r, s)
25 | 
26 | 
27 | class ToutiaoVideoInfo(object):
28 | 
29 |     def __init__(self):
30 |         self.bitrate = None
31 |         self.definition = None
32 |         self.size = None
33 |         self.height = None
34 |         self.width = None
35 |         self.type = None
36 |         self.url = None
37 | 
38 |     def __str__(self):
39 |         return json.dumps(self.__dict__)
40 | 
41 | 
42 | def get_file_by_vid(video_id):
43 |     vRet = []
44 |     url = sign_video_url(video_id)
45 |     ret = get_content(url)
46 |     ret = loads(ret)
47 |     vlist = ret.get('data').get('video_list')
48 |     if len(vlist) > 0:
49 |         vInfo = vlist.get(sorted(vlist.keys(), reverse=True)[0])
50 |         vUrl = vInfo.get('main_url')
51 |         vUrl = base64.decodestring(vUrl.encode('ascii')).decode('ascii')
52 |         videoInfo = ToutiaoVideoInfo()
53 |         videoInfo.bitrate = vInfo.get('bitrate')
54 |         videoInfo.definition = vInfo.get('definition')
55 |         videoInfo.size = vInfo.get('size')
56 |         videoInfo.height = vInfo.get('vheight')
57 |         videoInfo.width = vInfo.get('vwidth')
58 |         videoInfo.type = vInfo.get('vtype')
59 |         videoInfo.url = vUrl
60 |         vRet.append(videoInfo)
61 |     return vRet
62 | 
63 | 
64 | def toutiao_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
65 |     html = get_html(url, faker=True)
66 |     video_id = match1(html, r"videoid\s*:\s*'([^']+)',\n")
67 |     title = match1(html, r"title: '([^']+)'.replace")
68 |     video_file_list = get_file_by_vid(video_id)  # 调api获取视频源文件
69 |     type, ext, size = url_info(video_file_list[0].url, faker=True)
70 |     print_info(site_info=site_info, title=title, type=type, size=size)
71 |     if not info_only:
72 |         download_urls([video_file_list[0].url], title, ext, size, output_dir, merge=merge, faker=True)
73 | 
74 | 
75 | site_info = "Toutiao.com"
76 | download = toutiao_download
77 | download_playlist = playlist_not_supported("toutiao")
78 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/fc2video.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['fc2video_download']
 4 | 
 5 | from ..common import *
 6 | from hashlib import md5
 7 | from urllib.parse import urlparse
 8 | import re
 9 | 
10 | #----------------------------------------------------------------------
11 | def makeMimi(upid):
12 |     """From http://cdn37.atwikiimg.com/sitescript/pub/dksitescript/FC2.site.js
13 |     Also com.hps.util.fc2.FC2EncrptUtil.makeMimiLocal
14 |     L110"""
15 |     strSeed = "gGddgPfeaf_gzyr"
16 |     prehash = upid + "_" + strSeed
17 |     return md5(prehash.encode('utf-8')).hexdigest()
18 | 
19 | #----------------------------------------------------------------------
20 | def fc2video_download_by_upid(upid, output_dir = '.', merge = True, info_only = False, **kwargs):
21 |     """"""
22 |     fake_headers = {
23 |         'DNT': '1',
24 |         'Accept-Encoding': 'gzip, deflate, sdch',
25 |         'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2',
26 |         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.58 Safari/537.36',
27 |         'Accept': '*/*',
28 |         'X-Requested-With': 'ShockwaveFlash/19.0.0.245',
29 |         'Connection': 'keep-alive',
30 |     }
31 |     api_base = 'http://video.fc2.com/ginfo.php?upid={upid}&mimi={mimi}'.format(upid = upid, mimi = makeMimi(upid))
32 |     html = get_content(api_base, headers=fake_headers)
33 | 
34 |     video_url = match1(html, r'filepath=(.+)&sec')
35 |     video_url = video_url.replace('&mid', '?mid')
36 | 
37 |     title = match1(html, r'&title=([^&]+)')
38 | 
39 |     type, ext, size = url_info(video_url, headers=fake_headers)
40 | 
41 |     print_info(site_info, title, type, size)
42 |     if not info_only:
43 |         download_urls([video_url], title, ext, size, output_dir, merge=merge, headers = fake_headers)
44 | 
45 | #----------------------------------------------------------------------
46 | def fc2video_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
47 |     """wrapper"""
48 |     #'http://video.fc2.com/en/content/20151021bTVKnbEw'
49 |     #'http://xiaojiadianvideo.asia/content/20151021bTVKnbEw'
50 |     #'http://video.fc2.com/ja/content/20151021bTVKnbEw'
51 |     #'http://video.fc2.com/tw/content/20151021bTVKnbEw'
52 |     hostname = urlparse(url).hostname
53 |     if not ('fc2.com' in hostname or 'xiaojiadianvideo.asia' in hostname):
54 |         return False
55 |     upid = match1(url, r'.+/content/(\w+)')
56 | 
57 |     fc2video_download_by_upid(upid, output_dir, merge, info_only)
58 | 
59 | site_info = "FC2Video"
60 | download = fc2video_download
61 | download_playlist = playlist_not_supported('fc2video')
62 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/vk.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['vk_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | 
 8 | def get_video_info(url):
 9 |     video_page = get_content(url)
10 |     title = r1(r'<div class="vv_summary">(.[^>]+?)</div', video_page)
11 |     sources = re.findall(r'<source src=\"(.[^>]+?)"', video_page)
12 | 
13 |     for quality in ['.1080.', '.720.', '.480.', '.360.', '.240.']:
14 |         for source in sources:
15 |             if source.find(quality) != -1:
16 |                 url = source
17 |                 break
18 |     assert url
19 |     type, ext, size = url_info(url)
20 |     print_info(site_info, title, type, size)
21 | 
22 |     return url, title, ext, size
23 | 
24 | 
25 | def get_video_from_user_videolist(url):
26 |     ep = 'https://vk.com/al_video.php'
27 |     to_post = dict(act='show', al=1, module='direct', video=re.search(r'video(\d+_\d+)', url).group(1))
28 |     page = post_content(ep, post_data=to_post)
29 |     video_pt = r'<source src="(.+?)" type="video\/mp4"'
30 |     url = re.search(video_pt, page).group(1)
31 |     title = re.search(r'<div class="mv_title".+?>(.+?)</div>', page).group(1)
32 |     mime, ext, size = url_info(url)
33 |     print_info(site_info, title, mime, size)
34 | 
35 |     return url, title, ext, size
36 | 
37 | 
38 | def get_image_info(url):
39 |     image_page = get_content(url)
40 |     # used for title - vk page owner
41 |     page_of = re.findall(r'Sender:</dt><dd><a href=.*>(.[^>]+?)</a', image_page)
42 |     # used for title - date when photo was uploaded
43 |     photo_date = re.findall(r'<span class="item_date">(.[^>]+?)</span', image_page)
44 | 
45 |     title = (' ').join(page_of + photo_date)
46 |     image_link = r1(r'href="([^"]+)" class=\"mva_item\" target="_blank">Download full size', image_page)
47 |     type, ext, size = url_info(image_link)
48 |     print_info(site_info, title, type, size)
49 | 
50 |     return image_link, title, ext, size
51 | 
52 | 
53 | def vk_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs):
54 |     link = None
55 |     if re.match(r'(.+)z\=video(.+)', url):
56 |         link, title, ext, size = get_video_info(url)
57 |     elif re.match(r'(.+)vk\.com\/photo(.+)', url):
58 |         link, title, ext, size = get_image_info(url)
59 |     elif re.search(r'vk\.com\/video\d+_\d+', url):
60 |         link, title, ext, size = get_video_from_user_videolist(url)
61 |     else:
62 |         raise NotImplementedError('Nothing to download here')
63 | 
64 |     if not info_only and link is not None:
65 |         download_urls([link], title, ext, size, output_dir, merge=merge)
66 | 
67 | 
68 | site_info = "VK.com"
69 | download = vk_download
70 | download_playlist = playlist_not_supported('vk')
71 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/bigthink.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from ..extractor import VideoExtractor
 5 | 
 6 | import json
 7 | 
 8 | class Bigthink(VideoExtractor):
 9 |     name = "Bigthink"
10 | 
11 |     stream_types = [  #this is just a sample. Will make it in prepare()
12 |         # {'id': '1080'},
13 |         # {'id': '720'},
14 |         # {'id': '360'},
15 |         # {'id': '288'},
16 |         # {'id': '190'},
17 |         # {'id': '180'},
18 |         
19 |     ]
20 | 
21 |     @staticmethod
22 |     def get_streams_by_id(account_number, video_id):
23 |         """
24 |         int, int->list
25 |         
26 |         Get the height of the videos.
27 |         
28 |         Since brightcove is using 3 kinds of links: rtmp, http and https,
29 |         we will be using the HTTPS one to make it secure.
30 |         
31 |         If somehow akamaihd.net is blocked by the Great Fucking Wall,
32 |         change the "startswith https" to http.
33 |         """
34 |         endpoint = 'https://edge.api.brightcove.com/playback/v1/accounts/{account_number}/videos/{video_id}'.format(account_number = account_number, video_id = video_id)
35 |         fake_header_id = fake_headers
36 |         #is this somehow related to the time? Magic....
37 |         fake_header_id['Accept'] ='application/json;pk=BCpkADawqM1cc6wmJQC2tvoXZt4mrB7bFfi6zGt9QnOzprPZcGLE9OMGJwspQwKfuFYuCjAAJ53JdjI8zGFx1ll4rxhYJ255AXH1BQ10rnm34weknpfG-sippyQ'
38 | 
39 |         html = get_content(endpoint, headers= fake_header_id)
40 |         html_json = json.loads(html)
41 | 
42 |         link_list = []
43 | 
44 |         for i in html_json['sources']:
45 |             if 'src' in i:  #to avoid KeyError
46 |                 if i['src'].startswith('https'):
47 |                     link_list.append((str(i['height']), i['src']))
48 | 
49 |         return link_list
50 | 
51 |     def prepare(self, **kwargs):
52 | 
53 |         html = get_content(self.url)
54 | 
55 |         self.title = match1(html, r'<meta property="og:title" content="([^"]*)"')
56 | 
57 |         account_number = match1(html, r'data-account="(\d+)"')
58 | 
59 |         video_id = match1(html, r'data-brightcove-id="(\d+)"')
60 |         
61 |         assert account_number, video_id
62 | 
63 |         link_list = self.get_streams_by_id(account_number, video_id)
64 | 
65 |         for i in link_list:
66 |             self.stream_types.append({'id': str(i[0])})
67 |             self.streams[i[0]] = {'url': i[1]}
68 | 
69 |     def extract(self, **kwargs):
70 |         for i in self.streams:
71 |             s = self.streams[i]
72 |             _, s['container'], s['size'] = url_info(s['url'])
73 |             s['src'] = [s['url']]
74 | 
75 | site = Bigthink()
76 | download = site.download_by_url
77 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/longzhu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['longzhu_download']
 4 | 
 5 | import json
 6 | from ..common import (
 7 |     get_content,
 8 |     match1,
 9 |     print_info,
10 |     download_urls,
11 |     playlist_not_supported,
12 | )
13 | from ..common import player
14 | 
15 | def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwargs):
16 |     web_domain = url.split('/')[2]
17 |     if (web_domain == 'star.longzhu.com') or (web_domain == 'y.longzhu.com'):
18 |         domain = url.split('/')[3].split('?')[0]
19 |         m_url = 'http://m.longzhu.com/{0}'.format(domain)
20 |         m_html = get_content(m_url)
21 |         room_id_patt = r'var\s*roomId\s*=\s*(\d+);'
22 |         room_id = match1(m_html,room_id_patt)
23 | 
24 |         json_url = 'http://liveapi.plu.cn/liveapp/roomstatus?roomId={0}'.format(room_id)
25 |         content = get_content(json_url)
26 |         data = json.loads(content)
27 |         streamUri = data['streamUri']
28 |         if len(streamUri) <= 4:
29 |             raise ValueError('The live stream is not online!')
30 |         title = data['title']
31 |         streamer = data['userName']
32 |         title = str.format(streamer,': ',title)
33 | 
34 |         steam_api_url = 'http://livestream.plu.cn/live/getlivePlayurl?roomId={0}'.format(room_id)
35 |         content = get_content(steam_api_url)
36 |         data = json.loads(content)
37 |         isonline = data.get('isTransfer')
38 |         if isonline == '0':
39 |             raise ValueError('The live stream is not online!')
40 | 
41 |         real_url = data['playLines'][0]['urls'][0]['securityUrl']
42 | 
43 |         print_info(site_info, title, 'flv', float('inf'))
44 | 
45 |         if not info_only:
46 |             download_urls([real_url], title, 'flv', None, output_dir, merge=merge)
47 | 
48 |     elif web_domain == 'replay.longzhu.com':
49 |         videoid = match1(url, r'(\d+)$')
50 |         json_url = 'http://liveapi.longzhu.com/livereplay/getreplayfordisplay?videoId={0}'.format(videoid)
51 |         content = get_content(json_url)
52 |         data = json.loads(content)
53 | 
54 |         username = data['userName']
55 |         title = data['title']
56 |         title = str.format(username,':',title)
57 |         real_url = data['videoUrl']
58 | 
59 |         if player:
60 |             print_info('Longzhu Video', title, 'm3u8', 0)
61 |             download_urls([real_url], title, 'm3u8', 0, output_dir, merge=merge)
62 |         else:
63 |             urls = general_m3u8_extractor(real_url)
64 |             print_info('Longzhu Video', title, 'm3u8', 0)
65 |             if not info_only:
66 |                 download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs)
67 | 
68 |     else:
69 |         raise ValueError('Wrong url or unsupported link ... {0}'.format(url))
70 | 
71 | site_info = 'longzhu.com'
72 | download = longzhu_download
73 | download_playlist = playlist_not_supported('longzhu')


--------------------------------------------------------------------------------
/src/you_get/extractors/tucao.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['tucao_download']
 4 | from ..common import *
 5 | # import re
 6 | import random
 7 | import time
 8 | from xml.dom import minidom
 9 | #possible raw list types
10 | #1. <li>type=tudou&vid=199687639</li>
11 | #2. <li>type=tudou&vid=199506910|</li>
12 | #3. <li>type=video&file=http://xiaoshen140731.qiniudn.com/lovestage04.flv|</li>
13 | #4 may ? <li>type=video&file=http://xiaoshen140731.qiniudn.com/lovestage04.flv|xx**type=&vid=?</li>
14 | #5. <li>type=tudou&vid=200003098|07**type=tudou&vid=200000350|08</li>
15 | #6. <li>vid=49454694&type=sina|</li>
16 | #7. <li>type=189&vid=513031813243909|</li>
17 | # re_pattern=re.compile(r"(type=(.+?)&(vid|file)=(.*?))[\|<]")
18 | 
19 | def tucao_single_download(type_link, title, output_dir=".", merge=True, info_only=False):
20 |     if "file" in type_link:
21 |         url=type_link[type_link.find("file=")+5:]
22 |         vtype, ext, size=url_info(url)
23 |         print_info(site_info, title, vtype, size)
24 |         if not info_only:
25 |             download_urls([url], title, ext, size, output_dir)
26 |     #fix for 189 video source, see raw list types 7
27 |     elif "189" in type_link:
28 |         vid = match1(type_link, r"vid=(\d+)")
29 |         assert vid, "vid not exsits"
30 |         url = "http://api.tucao.tv/api/down/{}".format(vid)
31 |         vtype, ext, size=url_info(url)
32 |         print_info(site_info, title, vtype, size)
33 |         if not info_only:
34 |             download_urls([url], title, ext, size, output_dir)
35 |     else:
36 |         u="http://www.tucao.tv/api/playurl.php?{}&key=tucao{:07x}.cc&r={}".format(type_link,random.getrandbits(28),int(time.time()*1000))
37 |         xml=minidom.parseString(get_content(u))
38 |         urls=[]
39 |         size=0
40 |         for i in xml.getElementsByTagName("url"):
41 |             urls.append(i.firstChild.nodeValue)
42 |             vtype, ext, _size=url_info(i.firstChild.nodeValue)
43 |             size+=_size
44 |         print_info(site_info, title, vtype, size)
45 |         if not info_only:
46 |             download_urls(urls, title, ext, size, output_dir)
47 | 
48 | def tucao_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
49 |     html=get_content(url)
50 |     title=match1(html,r'<h1 class="show_title">(.*?)<\w')
51 |     #fix for raw list that vid goes before type, see raw list types 6
52 |     raw_list=match1(html,r"<li>\s*(type=.+?|vid=.+?)</li>")
53 |     raw_l=raw_list.split("**")
54 |     if len(raw_l)==1:
55 |         format_link=raw_l[0][:-1] if raw_l[0].endswith("|") else raw_l[0]
56 |         tucao_single_download(format_link,title,output_dir,merge,info_only)
57 |     else:
58 |         for i in raw_l:
59 |             format_link,sub_title=i.split("|")
60 |             tucao_single_download(format_link,title+"-"+sub_title,output_dir,merge,info_only)
61 | 
62 | 
63 | site_info = "tucao.tv"
64 | download = tucao_download
65 | download_playlist = playlist_not_supported("tucao")
66 | 


--------------------------------------------------------------------------------
/src/you_get/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import getopt
 4 | import os
 5 | import platform
 6 | import sys
 7 | from .version import script_name, __version__
 8 | from .util import git, log
 9 | 
10 | _options = [
11 |     'help',
12 |     'version',
13 |     'gui',
14 |     'force',
15 |     'playlists',
16 | ]
17 | _short_options = 'hVgfl'
18 | 
19 | _help = """Usage: {} [OPTION]... [URL]...
20 | TODO
21 | """.format(script_name)
22 | 
23 | # TBD
24 | def main_dev(**kwargs):
25 |     """Main entry point.
26 |     you-get-dev
27 |     """
28 | 
29 |     # Get (branch, commit) if running from a git repo.
30 |     head = git.get_head(kwargs['repo_path'])
31 | 
32 |     # Get options and arguments.
33 |     try:
34 |         opts, args = getopt.getopt(sys.argv[1:], _short_options, _options)
35 |     except getopt.GetoptError as e:
36 |         log.wtf("""
37 |     [Fatal] {}.
38 |     Try '{} --help' for more options.""".format(e, script_name))
39 | 
40 |     if not opts and not args:
41 |         # Display help.
42 |         print(_help)
43 |         # Enter GUI mode.
44 |         #from .gui import gui_main
45 |         #gui_main()
46 |     else:
47 |         conf = {}
48 |         for opt, arg in opts:
49 |             if opt in ('-h', '--help'):
50 |                 # Display help.
51 |                 print(_help)
52 | 
53 |             elif opt in ('-V', '--version'):
54 |                 # Display version.
55 |                 log.println("you-get:", log.BOLD)
56 |                 log.println("    version:  {}".format(__version__))
57 |                 if head is not None:
58 |                     log.println("    branch:   {}\n    commit:   {}".format(*head))
59 |                 else:
60 |                     log.println("    branch:   {}\n    commit:   {}".format("(stable)", "(tag v{})".format(__version__)))
61 | 
62 |                 log.println("    platform: {}".format(platform.platform()))
63 |                 log.println("    python:   {}".format(sys.version.split('\n')[0]))
64 | 
65 |             elif opt in ('-g', '--gui'):
66 |                 # Run using GUI.
67 |                 conf['gui'] = True
68 | 
69 |             elif opt in ('-f', '--force'):
70 |                 # Force download.
71 |                 conf['force'] = True
72 | 
73 |             elif opt in ('-l', '--playlist', '--playlists'):
74 |                 # Download playlist whenever possible.
75 |                 conf['playlist'] = True
76 | 
77 |         if args:
78 |             if 'gui' in conf and conf['gui']:
79 |                 # Enter GUI mode.
80 |                 from .gui import gui_main
81 |                 gui_main(*args, **conf)
82 |             else:
83 |                 # Enter console mode.
84 |                 from .console import console_main
85 |                 console_main(*args, **conf)
86 | 
87 | def main(**kwargs):
88 |     """Main entry point.
89 |     you-get (legacy)
90 |     """
91 |     from .common import main
92 |     main(**kwargs)
93 | 
94 | if __name__ == '__main__':
95 |     main()
96 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/cntv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import json
 4 | import re
 5 | 
 6 | from ..common import get_content, r1, match1, playlist_not_supported
 7 | from ..extractor import VideoExtractor
 8 | 
 9 | __all__ = ['cntv_download', 'cntv_download_by_id']
10 | 
11 | 
12 | class CNTV(VideoExtractor):
13 |     name = 'CNTV.com'
14 |     stream_types = [
15 |         {'id': '1', 'video_profile': '1280x720_2000kb/s', 'map_to': 'chapters4'},
16 |         {'id': '2', 'video_profile': '1280x720_1200kb/s', 'map_to': 'chapters3'},
17 |         {'id': '3', 'video_profile': '640x360_850kb/s', 'map_to': 'chapters2'},
18 |         {'id': '4', 'video_profile': '480x270_450kb/s', 'map_to': 'chapters'},
19 |         {'id': '5', 'video_profile': '320x180_200kb/s', 'map_to': 'lowChapters'},
20 |     ]
21 | 
22 |     ep = 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid={}'
23 | 
24 |     def __init__(self):
25 |         super().__init__()
26 |         self.api_data = None
27 | 
28 |     def prepare(self, **kwargs):
29 |         self.api_data = json.loads(get_content(self.__class__.ep.format(self.vid)))
30 |         self.title = self.api_data['title']
31 |         for s in self.api_data['video']:
32 |             for st in self.__class__.stream_types:
33 |                 if st['map_to'] == s:
34 |                     urls = self.api_data['video'][s]
35 |                     src = [u['url'] for u in urls]
36 |                     stream_data = dict(src=src, size=0, container='mp4', video_profile=st['video_profile'])
37 |                     self.streams[st['id']] = stream_data
38 | 
39 | 
40 | def cntv_download_by_id(rid, **kwargs):
41 |     CNTV().download_by_vid(rid, **kwargs)
42 | 
43 | 
44 | def cntv_download(url, **kwargs):
45 |     if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url):
46 |         rid = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)')
47 |     elif re.match(r'http://tv\.cctv\.com/\d+/\d+/\d+/\w+.shtml', url):
48 |         rid = r1(r'var guid = "(\w+)"', get_content(url))
49 |     elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \
50 |          re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \
51 |          re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \
52 |          re.match(r'http://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \
53 |          re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): 
54 |         page = get_content(url)
55 |         rid = r1(r'videoCenterId","(\w+)"', page)
56 |         if rid is None:
57 |             guid = re.search(r'guid\s*=\s*"([0-9a-z]+)"', page).group(1)
58 |             rid = guid
59 |     elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url):
60 |         rid = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url)
61 |     else:
62 |         raise NotImplementedError(url)
63 | 
64 |     CNTV().download_by_vid(rid, **kwargs)
65 | 
66 | site_info = "CNTV.com"
67 | download = cntv_download
68 | download_playlist = playlist_not_supported('cntv')
69 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/imgur.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from ..extractor import VideoExtractor
 5 | from .universal import *
 6 | 
 7 | class Imgur(VideoExtractor):
 8 |     name = "Imgur"
 9 | 
10 |     stream_types = [
11 |         {'id': 'original'},
12 |         {'id': 'thumbnail'},
13 |     ]
14 | 
15 |     def prepare(self, **kwargs):
16 |         if re.search(r'imgur\.com/a/', self.url):
17 |             # album
18 |             content = get_content(self.url)
19 |             album = match1(content, r'album\s*:\s*({.*}),') or \
20 |                     match1(content, r'image\s*:\s*({.*}),')
21 |             album = json.loads(album)
22 |             count = album['album_images']['count']
23 |             images = album['album_images']['images']
24 |             ext = images[0]['ext']
25 |             self.streams = {
26 |                 'original': {
27 |                     'src': ['http://i.imgur.com/%s%s' % (i['hash'], ext)
28 |                             for i in images],
29 |                     'size': sum([i['size'] for i in images]),
30 |                     'container': ext[1:]
31 |                 },
32 |                 'thumbnail': {
33 |                     'src': ['http://i.imgur.com/%ss%s' % (i['hash'], '.jpg')
34 |                             for i in images],
35 |                     'container': 'jpg'
36 |                 }
37 |             }
38 |             self.title = album['title']
39 | 
40 |         elif re.search(r'i\.imgur\.com/', self.url):
41 |             # direct image
42 |             _, container, size = url_info(self.url)
43 |             self.streams = {
44 |                 'original': {
45 |                     'src': [self.url],
46 |                     'size': size,
47 |                     'container': container
48 |                 }
49 |             }
50 |             self.title = r1(r'i\.imgur\.com/([^./]*)', self.url)
51 | 
52 |         else:
53 |             # gallery image
54 |             content = get_content(self.url)
55 |             image = json.loads(match1(content, r'image\s*:\s*({.*}),'))
56 |             ext = image['ext']
57 |             self.streams = {
58 |                 'original': {
59 |                     'src': ['http://i.imgur.com/%s%s' % (image['hash'], ext)],
60 |                     'size': image['size'],
61 |                     'container': ext[1:]
62 |                 },
63 |                 'thumbnail': {
64 |                     'src': ['http://i.imgur.com/%ss%s' % (image['hash'], '.jpg')],
65 |                     'container': 'jpg'
66 |                 }
67 |             }
68 |             self.title = image['title'] or image['hash']
69 | 
70 |     def extract(self, **kwargs):
71 |         if 'stream_id' in kwargs and kwargs['stream_id']:
72 |             i = kwargs['stream_id']
73 |             if 'size' not in self.streams[i]:
74 |                 self.streams[i]['size'] = urls_size(self.streams[i]['src'])
75 | 
76 | site = Imgur()
77 | download = site.download_by_url
78 | download_playlist = site.download_by_url
79 | 


--------------------------------------------------------------------------------
/src/you_get/util/log.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # This file is Python 2 compliant.
 3 | 
 4 | from ..version import script_name
 5 | 
 6 | import os, sys
 7 | 
 8 | TERM = os.getenv('TERM', '')
 9 | IS_ANSI_TERMINAL = TERM in (
10 |     'eterm-color',
11 |     'linux',
12 |     'screen',
13 |     'vt100',
14 | ) or TERM.startswith('xterm')
15 | 
16 | # ANSI escape code
17 | # See <http://en.wikipedia.org/wiki/ANSI_escape_code>
18 | RESET = 0
19 | BOLD = 1
20 | UNDERLINE = 4
21 | NEGATIVE = 7
22 | NO_BOLD = 21
23 | NO_UNDERLINE = 24
24 | POSITIVE = 27
25 | BLACK = 30
26 | RED = 31
27 | GREEN = 32
28 | YELLOW = 33
29 | BLUE = 34
30 | MAGENTA = 35
31 | CYAN = 36
32 | LIGHT_GRAY = 37
33 | DEFAULT = 39
34 | BLACK_BACKGROUND = 40
35 | RED_BACKGROUND = 41
36 | GREEN_BACKGROUND = 42
37 | YELLOW_BACKGROUND = 43
38 | BLUE_BACKGROUND = 44
39 | MAGENTA_BACKGROUND = 45
40 | CYAN_BACKGROUND = 46
41 | LIGHT_GRAY_BACKGROUND = 47
42 | DEFAULT_BACKGROUND = 49
43 | DARK_GRAY = 90                 # xterm
44 | LIGHT_RED = 91                 # xterm
45 | LIGHT_GREEN = 92               # xterm
46 | LIGHT_YELLOW = 93              # xterm
47 | LIGHT_BLUE = 94                # xterm
48 | LIGHT_MAGENTA = 95             # xterm
49 | LIGHT_CYAN = 96                # xterm
50 | WHITE = 97                     # xterm
51 | DARK_GRAY_BACKGROUND = 100     # xterm
52 | LIGHT_RED_BACKGROUND = 101     # xterm
53 | LIGHT_GREEN_BACKGROUND = 102   # xterm
54 | LIGHT_YELLOW_BACKGROUND = 103  # xterm
55 | LIGHT_BLUE_BACKGROUND = 104    # xterm
56 | LIGHT_MAGENTA_BACKGROUND = 105 # xterm
57 | LIGHT_CYAN_BACKGROUND = 106    # xterm
58 | WHITE_BACKGROUND = 107         # xterm
59 | 
60 | def sprint(text, *colors):
61 |     """Format text with color or other effects into ANSI escaped string."""
62 |     return "\33[{}m{content}\33[{}m".format(";".join([str(color) for color in colors]), RESET, content=text) if IS_ANSI_TERMINAL and colors else text
63 | 
64 | def println(text, *colors):
65 |     """Print text to standard output."""
66 |     sys.stdout.write(sprint(text, *colors) + "\n")
67 | 
68 | def print_err(text, *colors):
69 |     """Print text to standard error."""
70 |     sys.stderr.write(sprint(text, *colors) + "\n")
71 | 
72 | def print_log(text, *colors):
73 |     """Print a log message to standard error."""
74 |     sys.stderr.write(sprint("{}: {}".format(script_name, text), *colors) + "\n")
75 | 
76 | def i(message):
77 |     """Print a normal log message."""
78 |     print_log(message)
79 | 
80 | def d(message):
81 |     """Print a debug log message."""
82 |     print_log(message, BLUE)
83 | 
84 | def w(message):
85 |     """Print a warning log message."""
86 |     print_log(message, YELLOW)
87 | 
88 | def e(message, exit_code=None):
89 |     """Print an error log message."""
90 |     print_log(message, YELLOW, BOLD)
91 |     if exit_code is not None:
92 |         sys.exit(exit_code)
93 | 
94 | def wtf(message, exit_code=1):
95 |     """What a Terrible Failure!"""
96 |     print_log(message, RED, BOLD)
97 |     if exit_code is not None:
98 |         sys.exit(exit_code)
99 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/douyutv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['douyutv_download']
 4 | 
 5 | from ..common import *
 6 | from ..util.log import *
 7 | import json
 8 | import hashlib
 9 | import time
10 | import re
11 | 
12 | def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
13 |     ep = 'http://vmobile.douyu.com/video/getInfo?vid='
14 |     patt = r'show/([0-9A-Za-z]+)'
15 |     title_patt = r'<h1>(.+?)</h1>'
16 | 
17 |     hit = re.search(patt, url)
18 |     if hit is None:
19 |         log.wtf('Unknown url pattern')
20 |     vid = hit.group(1)
21 | 
22 |     page = get_content(url)
23 |     hit = re.search(title_patt, page)
24 |     if hit is None:
25 |         title = vid
26 |     else:
27 |         title = hit.group(1)
28 | 
29 |     meta = json.loads(get_content(ep + vid))
30 |     if meta['error'] != 0:
31 |         log.wtf('Error from API server')
32 |     m3u8_url = meta['data']['video_url']
33 |     print_info('Douyu Video', title, 'm3u8', 0, m3u8_url=m3u8_url)
34 |     if not info_only:
35 |         urls = general_m3u8_extractor(m3u8_url)
36 |         download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs)
37 | 
38 | def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
39 |     if 'v.douyu.com/show/' in url:
40 |         douyutv_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
41 |         return
42 | 
43 |     headers = {
44 |         'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4'
45 |     }
46 |     
47 |     url = re.sub(r'[w.]*douyu.com','m.douyu.com',url)
48 |     html = get_content(url, headers)
49 |     room_id_patt = r'room_id\s*:\s*(\d+),'
50 |     room_id = match1(html, room_id_patt)
51 |     if room_id == "0":
52 |         room_id = url[url.rfind('/')+1:]
53 | 
54 |     api_url = "http://www.douyutv.com/api/v1/"
55 |     args = "room/%s?aid=wp&client_sys=wp&time=%d" % (room_id, int(time.time()))
56 |     auth_md5 = (args + "zNzMV1y4EMxOHS6I5WKm").encode("utf-8")
57 |     auth_str = hashlib.md5(auth_md5).hexdigest()
58 |     json_request_url = "%s%s&auth=%s" % (api_url, args, auth_str)
59 | 
60 |     content = get_content(json_request_url, headers)
61 |     json_content = json.loads(content)
62 |     data = json_content['data']
63 |     server_status = json_content.get('error',0)
64 |     if server_status is not 0:
65 |         raise ValueError("Server returned error:%s" % server_status)
66 | 
67 |     title = data.get('room_name')
68 |     show_status = data.get('show_status')
69 |     if show_status is not "1":
70 |         raise ValueError("The live stream is not online! (Errno:%s)" % server_status)
71 | 
72 |     real_url = data.get('rtmp_url') + '/' + data.get('rtmp_live')
73 | 
74 |     print_info(site_info, title, 'flv', float('inf'))
75 |     if not info_only:
76 |         download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir = output_dir, merge = merge)
77 | 
78 | site_info = "douyu.com"
79 | download = douyutv_download
80 | download_playlist = playlist_not_supported('douyu')
81 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ku6.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['ku6_download', 'ku6_download_by_id']
 4 | 
 5 | from ..common import *
 6 | 
 7 | import json
 8 | import re
 9 | 
10 | def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
11 |     data = json.loads(get_html('http://v.ku6.com/fetchVideo4Player/%s...html' % id))['data']
12 |     t = data['t']
13 |     f = data['f']
14 |     title = title or t
15 |     assert title
16 |     urls = f.split(',')
17 |     ext = match1(urls[0], r'.*\.(\w+)\??[^\.]*')
18 |     assert ext in ('flv', 'mp4', 'f4v'), ext
19 |     ext = {'f4v': 'flv'}.get(ext, ext)
20 |     size = 0
21 |     for url in urls:
22 |         _, _, temp = url_info(url)
23 |         size += temp
24 |     
25 |     print_info(site_info, title, ext, size)
26 |     if not info_only:
27 |         download_urls(urls, title, ext, size, output_dir, merge = merge)
28 | 
29 | def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
30 |     id = None
31 | 
32 |     if match1(url, r'http://baidu.ku6.com/watch/(.*)\.html') is not None:
33 |         id = baidu_ku6(url)
34 |     else:
35 |         patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html',
36 |                 r'http://v.ku6.com/show/(.*)\.\.\.html',
37 |                 r'http://my.ku6.com/watch\?.*v=(.*)\.\..*']
38 |         id = r1_of(patterns, url)
39 | 
40 |     if id is None:
41 |         # http://www.ku6.com/2017/detail-zt.html?vid=xvqTmvZrH8MNvErpvRxFn3
42 |         page = get_content(url)
43 |         meta = re.search(r'detailDataMap=(\{.+?\});', page)
44 |         if meta is not None:
45 |             meta = meta.group(1)
46 |         else:
47 |             raise Exception('Unsupported url')
48 |         vid = re.search(r'vid=([^&]+)', url)
49 |         if vid is not None:
50 |             vid = vid.group(1)
51 |         else:
52 |             raise Exception('Unsupported url')
53 |         this_meta = re.search('"?'+vid+'"?:\{(.+?)\}', meta)
54 |         if this_meta is not None:
55 |             this_meta = this_meta.group(1)
56 |             title = re.search('title:"(.+?)"', this_meta).group(1)
57 |             video_url = re.search('playUrl:"(.+?)"', this_meta).group(1)
58 |         video_size = url_size(video_url)
59 |         print_info(site_info, title, 'mp4', video_size)
60 |         if not info_only:
61 |             download_urls([video_url], title, 'mp4', video_size, output_dir, merge=merge, **kwargs)
62 |         return
63 | 
64 |     ku6_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
65 | 
66 | def baidu_ku6(url):
67 |     id = None
68 | 
69 |     h1 = get_html(url)
70 |     isrc = match1(h1, r'<iframe id="innerFrame" src="([^"]*)"')
71 | 
72 |     if isrc is not None:
73 |         h2 = get_html(isrc)
74 |         id = match1(h2, r'http://v.ku6.com/show/(.*)\.\.\.html')
75 | #fix #1746
76 | #some ku6 urls really ends with three dots? A bug?
77 |         if id is None:
78 |             id = match1(h2, r'http://v.ku6.com/show/(.*)\.html')
79 | 
80 |     return id
81 | 
82 | site_info = "Ku6.com"
83 | download = ku6_download
84 | download_playlist = playlist_not_supported('ku6')
85 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/qie_video.py:
--------------------------------------------------------------------------------
 1 | from ..common import *
 2 | from ..extractor import VideoExtractor
 3 | from ..util.log import *
 4 | 
 5 | import json
 6 | import math
 7 | 
 8 | class QieVideo(VideoExtractor):
 9 |     name = 'QiE Video'
10 |     vid_patt = r'"stream_name":"(\d+)"'
11 |     title_patt = r'"title":"([^\"]+)"'
12 |     cdn = 'http://qietv-play.wcs.8686c.com/'
13 |     ep = 'http://api.qiecdn.com/api/v1/video/stream/{}'
14 |     stream_types = [
15 |         {'id':'1080p', 'video_profile':'1920x1080', 'container':'m3u8'},
16 |         {'id':'720p', 'video_profile':'1280x720', 'container':'m3u8'},
17 |         {'id':'480p', 'video_profile':'853x480', 'container':'m3u8'}
18 |     ]
19 | 
20 |     def get_vid_from_url(self):
21 |         hit = re.search(self.__class__.vid_patt, self.page)
22 |         if hit is None:
23 |             log.wtf('Cannot get stream_id')
24 |         return hit.group(1)
25 | 
26 |     def get_title(self):
27 |         hit = re.search(self.__class__.title_patt, self.page)
28 |         if hit is None:
29 |             return self.vid
30 |         return hit.group(1).strip()
31 | 
32 |     def prepare(self, **kwargs):
33 |         self.page = get_content(self.url)
34 |         if self.vid is None:
35 |             self.vid = self.get_vid_from_url()
36 |         self.title = self.get_title()
37 |         meta = json.loads(get_content(self.__class__.ep.format(self.vid)))
38 |         if meta['code'] != 200:
39 |             log.wtf(meta['message'])
40 |         for video in meta['result']['videos']:
41 |             height = video['height']
42 |             url = self.__class__.cdn + video['key']
43 |             stream_meta = dict(m3u8_url=url, size=0, container='m3u8')
44 |             video_profile = '{}x{}'.format(video['width'], video['height'])
45 |             stream_meta['video_profile'] = video_profile
46 |             for stream_type in self.__class__.stream_types:
47 |                 if height // 10 == int(stream_type['id'][:-1]) // 10:
48 | # width 481, 482... 489 are all 480p here
49 |                     stream_id = stream_type['id']
50 |                     self.streams[stream_id] = stream_meta
51 | 
52 |     def extract(self, **kwargs):
53 |         for stream_id in self.streams:
54 |             self.streams[stream_id]['src'], dur = general_m3u8_extractor(self.streams[stream_id]['m3u8_url'])
55 |             self.streams[stream_id]['video_profile'] += ', Duration: {}s'.format(math.floor(dur))
56 | 
57 | def general_m3u8_extractor(url):
58 |     dur = 0
59 |     base_url = url[:url.rfind('/')]
60 |     m3u8_content = get_content(url).split('\n')
61 |     result = []
62 |     for line in m3u8_content:
63 |         trimmed = line.strip()
64 |         if len(trimmed) > 0:
65 |             if trimmed.startswith('#'):
66 |                 if trimmed.startswith('#EXTINF'):
67 |                     t_str = re.search(r'(\d+\.\d+)', trimmed).group(1)
68 |                     dur += float(t_str)
69 |             else:
70 |                 if trimmed.startswith('http'):
71 |                     result.append(trimmed)
72 |                 else:
73 |                     result.append(base_url + '/' + trimmed)
74 |     return result, dur 
75 |     
76 | site = QieVideo()
77 | download_by_url = site.download_by_url
78 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/sohu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['sohu_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | import json
 8 | import time
 9 | from random import random
10 | from urllib.parse import urlparse
11 | 
12 | '''
13 | Changelog:
14 |     1. http://tv.sohu.com/upload/swf/20150604/Main.swf
15 |         new api
16 | '''
17 | 
18 | def real_url(host,vid,tvid,new,clipURL,ck):
19 |     url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())+'&rb=1'
20 |     return json.loads(get_html(url))['url']
21 | 
22 | def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None, **kwargs):
23 |     if re.match(r'http://share.vrs.sohu.com', url):
24 |         vid = r1('id=(\d+)', url)
25 |     else:
26 |         html = get_html(url)
27 |         vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
28 |     assert vid
29 | 
30 |     if re.match(r'http[s]://tv.sohu.com/', url):
31 |         if extractor_proxy:
32 |             set_proxy(tuple(extractor_proxy.split(":")))
33 |         info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid))
34 |         for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]:
35 |             if 'data' in info:
36 |                 hqvid = info['data'][qtyp]
37 |             else:
38 |                 hqvid = info[qtyp]
39 |             if hqvid != 0 and hqvid != vid :
40 |                 info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))
41 |                 if not 'allot' in info:
42 |                     continue
43 |                 break
44 |         if extractor_proxy:
45 |             unset_proxy()
46 |         host = info['allot']
47 |         prot = info['prot']
48 |         tvid = info['tvid']
49 |         urls = []
50 |         data = info['data']
51 |         title = data['tvName']
52 |         size = sum(data['clipsBytes'])
53 |         assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
54 |         for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
55 |             clipURL = urlparse(clip).path
56 |             urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))
57 |         # assert data['clipsURL'][0].endswith('.mp4')
58 | 
59 |     else:
60 |         info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))
61 |         host = info['allot']
62 |         prot = info['prot']
63 |         tvid = info['tvid']
64 |         urls = []
65 |         data = info['data']
66 |         title = data['tvName']
67 |         size = sum(map(int,data['clipsBytes']))
68 |         assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
69 |         for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
70 |             clipURL = urlparse(clip).path
71 |             urls.append(real_url(host,vid,tvid,new,clipURL,ck))
72 | 
73 |     print_info(site_info, title, 'mp4', size)
74 |     if not info_only:
75 |         download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge)
76 | 
77 | site_info = "Sohu.com"
78 | download = sohu_download
79 | download_playlist = playlist_not_supported('sohu')
80 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/bokecc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from ..extractor import VideoExtractor
 5 | import xml.etree.ElementTree as ET
 6 | 
 7 | class BokeCC(VideoExtractor):
 8 |     name = "BokeCC"
 9 | 
10 |     stream_types = [  # we do now know for now, as we have to check the
11 |                       # output from the API
12 |     ]
13 | 
14 |     API_ENDPOINT = 'http://p.bokecc.com/'
15 | 
16 | 
17 |     def download_by_id(self, vid = '', title = None, output_dir='.', merge=True, info_only=False,**kwargs):
18 |         """self, str->None
19 |         
20 |         Keyword arguments:
21 |         self: self
22 |         vid: The video ID for BokeCC cloud, something like
23 |         FE3BB999594978049C33DC5901307461
24 |         
25 |         Calls the prepare() to download the video.
26 |         
27 |         If no title is provided, this method shall try to find a proper title
28 |         with the information providin within the
29 |         returned content of the API."""
30 | 
31 |         assert vid
32 | 
33 |         self.prepare(vid = vid, title = title, **kwargs)
34 | 
35 |         self.extract(**kwargs)
36 | 
37 |         self.download(output_dir = output_dir, 
38 |                     merge = merge, 
39 |                     info_only = info_only, **kwargs)
40 | 
41 |     def prepare(self, vid = '', title = None, **kwargs):
42 |         assert vid
43 | 
44 |         api_url = self.API_ENDPOINT + \
45 |             'servlet/playinfo?vid={vid}&m=0'.format(vid = vid)  #return XML
46 | 
47 |         html = get_content(api_url)
48 |         self.tree = ET.ElementTree(ET.fromstring(html))
49 | 
50 |         if self.tree.find('result').text != '1':
51 |             log.wtf('API result says failed!')
52 |             raise 
53 | 
54 |         if title is None:
55 |             self.title = '_'.join([i.text for i in self.tree.iterfind('video/videomarks/videomark/markdesc')])
56 |         else:
57 |             self.title = title
58 | 
59 |         if not title:
60 |             self.title = vid
61 | 
62 |         for i in self.tree.iterfind('video/quality'):
63 |             quality = i.attrib ['value']
64 |             url = i[0].attrib['playurl']
65 |             self.stream_types.append({'id': quality,
66 |                                       'video_profile': i.attrib ['desp']})
67 |             self.streams[quality] = {'url': url,
68 |                                      'video_profile': i.attrib ['desp']}
69 |             self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
70 | 
71 | 
72 |     def extract(self, **kwargs):
73 |         for i in self.streams:
74 |             s = self.streams[i]
75 |             _, s['container'], s['size'] = url_info(s['url'])
76 |             s['src'] = [s['url']]
77 |         if 'stream_id' in kwargs and kwargs['stream_id']:
78 |             # Extract the stream
79 |             stream_id = kwargs['stream_id']
80 | 
81 |             if stream_id not in self.streams:
82 |                 log.e('[Error] Invalid video format.')
83 |                 log.e('Run \'-i\' command with no specific video format to view all available formats.')
84 |                 exit(2)
85 |         else:
86 |             # Extract stream with the best quality
87 |             stream_id = self.streams_sorted[0]['id']
88 |             _, s['container'], s['size'] = url_info(s['url'])
89 |             s['src'] = [s['url']]
90 | 
91 | site = BokeCC()
92 | 
93 | # I don't know how to call the player directly so I just put it here
94 | # just in case anyone touchs it -- Beining@Aug.24.2016
95 | #download = site.download_by_url
96 | #download_playlist = site.download_by_url
97 | 
98 | bokecc_download_by_id = site.download_by_id
99 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/showroom.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['showroom_download']
 4 | 
 5 | from ..common import *
 6 | import urllib.error
 7 | from json import loads
 8 | from time import time, sleep
 9 | 
10 | #----------------------------------------------------------------------
11 | def showroom_get_roomid_by_room_url_key(room_url_key):
12 |     """str->str"""
13 |     fake_headers_mobile = {
14 |         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
15 |         'Accept-Charset': 'UTF-8,*;q=0.5',
16 |         'Accept-Encoding': 'gzip,deflate,sdch',
17 |         'Accept-Language': 'en-US,en;q=0.8',
18 |         'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
19 |     }
20 |     webpage_url = 'https://www.showroom-live.com/' + room_url_key
21 |     html = get_content(webpage_url, headers = fake_headers_mobile)
22 |     roomid = match1(html, r'room\?room_id\=(\d+)')
23 |     assert roomid
24 |     return roomid
25 | 
26 | def showroom_download_by_room_id(room_id, output_dir = '.', merge = False, info_only = False, **kwargs):
27 |     '''Source: Android mobile'''
28 |     while True:
29 |         timestamp = str(int(time() * 1000))
30 |         api_endpoint = 'https://www.showroom-live.com/api/live/streaming_url?room_id={room_id}&_={timestamp}'.format(room_id = room_id, timestamp = timestamp)
31 |         html = get_content(api_endpoint)
32 |         html = json.loads(html)
33 |         #{'streaming_url_list': [{'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 1, 'label': 'original spec(low latency)', 'is_default': True, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed/playlist.m3u8', 'is_default': True, 'id': 2, 'type': 'hls', 'label': 'original spec'}, {'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 3, 'label': 'low spec(low latency)', 'is_default': False, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low/playlist.m3u8', 'is_default': False, 'id': 4, 'type': 'hls', 'label': 'low spec'}]}
34 |         if len(html) >= 1:
35 |             break
36 |         log.w('The live show is currently offline.')
37 |         sleep(1)
38 | 
39 |     #This is mainly for testing the M3U FFmpeg parser so I would ignore any non-m3u ones
40 |     stream_url = [i['url'] for i in html['streaming_url_list'] if i['is_default'] and i['type'] == 'hls'][0]
41 | 
42 |     assert stream_url
43 | 
44 |     #title
45 |     title = ''
46 |     profile_api = 'https://www.showroom-live.com/api/room/profile?room_id={room_id}'.format(room_id = room_id)
47 |     html = loads(get_content(profile_api))
48 |     try:
49 |         title = html['main_name']
50 |     except KeyError:
51 |         title = 'Showroom_{room_id}'.format(room_id = room_id)
52 | 
53 |     type_, ext, size = url_info(stream_url)
54 |     print_info(site_info, title, type_, size)
55 |     if not info_only:
56 |         download_url_ffmpeg(url=stream_url, title=title, ext= 'mp4', output_dir=output_dir)
57 | 
58 | 
59 | #----------------------------------------------------------------------
60 | def showroom_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
61 |     """"""
62 |     if re.match( r'(\w+)://www.showroom-live.com/([-\w]+)', url):
63 |         room_url_key = match1(url, r'\w+://www.showroom-live.com/([-\w]+)')
64 |         room_id = showroom_get_roomid_by_room_url_key(room_url_key)
65 |         showroom_download_by_room_id(room_id, output_dir, merge,
66 |                                     info_only)
67 | 
68 | site_info = "Showroom"
69 | download = showroom_download
70 | download_playlist = playlist_not_supported('showroom')
71 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/pixnet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['pixnet_download']
 4 | 
 5 | from ..common import *
 6 | import urllib.error
 7 | from time import time
 8 | from urllib.parse import quote
 9 | from json import loads
10 | 
11 | def pixnet_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
12 |     if re.match(r'http://(\w)+.pixnet.net/album/video/(\d)+', url):
13 |         # http://eric6513.pixnet.net/album/video/206644535
14 |         html = get_content(url)
15 |         title = ''.join(r1(r'<meta property="og:description\" content="([^"]*)"', html).split('-')[1:]).strip()
16 |         
17 |         time_now = int(time())
18 |         
19 |         m = re.match(r'http://(\w+).pixnet.net/album/video/(\d+)', url)
20 |         
21 |         username = m.group(1)
22 |         # eric6513
23 |         id = m.group(2)
24 |         # 206644535
25 |         
26 |         data_dict = {'username': username, 'autoplay': 1, 'id': id, 'loop': 0, 'profile': 9, 'time': time_now}
27 |         data_dict_str= quote(str(data_dict).replace("'", '"'), safe='"')  #have to be like this
28 |         url2 = 'http://api.pixnet.tv/content?type=json&customData=' + data_dict_str
29 |         # &sig=edb07258e6a9ff40e375e11d30607983  can be blank for now
30 |         # if required, can be obtained from url like
31 |         # http://s.ext.pixnet.tv/user/eric6513/html5/autoplay/206644507.js
32 |         # http://api.pixnet.tv/content?type=json&customData={%22username%22:%22eric6513%22,%22id%22:%22206644535%22,%22time%22:1441823350,%22autoplay%22:0,%22loop%22:0,%22profile%22:7}
33 |         
34 |         video_json = get_content(url2)
35 |         content = loads(video_json)
36 |         url_main = content['element']['video_url']
37 |         url_backup = content['element']['backup_video_uri']
38 |         # {"element":{"video_url":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","backup_video_uri":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","thumb_url":"\/\/imageproxy.pimg.tw\/zoomcrop?width=480&height=360&url=http%3A%2F%2Fpimg.pixnet.tv%2Fuser%2Feric6513%2F206644507%2Fbg_000000%2F480x360%2Fdefault.jpg%3Fv%3D1422870050","profiles":{"360p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"backup_profiles":{"360p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"count_play_url":["http:\/\/api.v6.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=v6play&amp;sig=3350496782","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=play&amp;sig=930187858","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=html5play&amp;sig=4191197761"],"count_finish_url":["http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819715&amp;type=finish&amp;sig=638797202","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819715&amp;type=html5finish&amp;sig=3215728991"]}}
39 |         
40 |         try:
41 |             # In some rare cases the main URL is IPv6 only...
42 |             # Something like #611
43 |             url_info(url_main)
44 |             url = url_main
45 |         except:
46 |             url = url_backup
47 |         
48 |         type, ext, size = url_info(url)
49 |         print_info(site_info, title, type, size)
50 |         if not info_only:
51 |             download_urls([url], title, ext, size, output_dir, merge=merge)
52 | 
53 | site_info = "Pixnet"
54 | download = pixnet_download
55 | download_playlist = playlist_not_supported('pixnet')
56 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/qie.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from ..common import *
 5 | from ..extractor import VideoExtractor
 6 | from ..util.log import *
 7 | 
 8 | from json import loads
 9 | 
10 | class QiE(VideoExtractor):
11 |     name = "QiE （企鹅直播）"
12 | 
13 |     # Last updated: 2015-11-24
14 |     stream_types = [
15 |         {'id': 'normal', 'container': 'flv', 'video_profile': '标清'},
16 |         {'id': 'middle', 'container': 'flv', 'video_profile': '550'},
17 |         {'id': 'middle2', 'container': 'flv', 'video_profile': '900'},
18 |     ]
19 |     
20 |     id_dic = {i['video_profile']:(i['id']) for i in stream_types}
21 |     
22 |     api_endpoint = 'http://www.qie.tv/api/v1/room/{room_id}'
23 |     game_ep = 'http://live.qq.com/game/game_details/get_game_details_info/'
24 | 
25 |     def get_room_id_from_url(self, match_id):
26 |         meta = json.loads(get_content(self.game_ep + str(match_id)))
27 |         if meta['error'] != 0:
28 |             log.wtf('Error happens when accessing game_details api')
29 |         rooms = meta['data']['anchor_data']
30 |         for room in rooms:
31 |             if room['is_use_room']:
32 |                 return room['room_id']
33 |         log.wtf('No room available for match {}'.format(match_id))
34 | 
35 |     def get_vid_from_url(self, url):
36 |         """Extracts video ID from live.qq.com.
37 |         """
38 |         hit = re.search(r'live.qq.com/(\d+)', url)
39 |         if hit is not None:
40 |             return hit.group(1)
41 |         hit = re.search(r'live.qq.com/directory/match/(\d+)', url)
42 |         if hit is not None:
43 |             return self.get_room_id_from_url(hit.group(1))
44 |         html = get_content(url)
45 |         room_id = match1(html, r'room_id\":(\d+)')
46 |         if room_id is None:
47 |             log.wtf('Unknown page {}'.format(url))
48 |         return room_id
49 | 
50 |     def download_playlist_by_url(self, url, **kwargs):
51 |         pass
52 | 
53 |     def prepare(self, **kwargs):
54 |         if self.url:
55 |             self.vid = self.get_vid_from_url(self.url)
56 |         
57 |         content = get_content(self.api_endpoint.format(room_id = self.vid))
58 |         content = loads(content)
59 |         self.title = content['data']['room_name']
60 |         rtmp_url =  content['data']['rtmp_url']
61 |         #stream_avalable = [i['name'] for i in content['data']['stream']]
62 |         stream_available = {}
63 |         stream_available['normal'] = rtmp_url + '/' + content['data']['rtmp_live']
64 |         if len(content['data']['rtmp_multi_bitrate']) > 0:
65 |             for k , v in content['data']['rtmp_multi_bitrate'].items():
66 |                 stream_available[k] = rtmp_url + '/' + v
67 |         
68 |         for s in self.stream_types:
69 |             if s['id'] in stream_available.keys():
70 |                 quality_id = s['id']
71 |                 url = stream_available[quality_id]
72 |                 self.streams[quality_id] = {
73 |                     'container': 'flv',
74 |                     'video_profile': s['video_profile'],
75 |                     'size': 0,
76 |                     'url': url
77 |                 }
78 | 
79 |     def extract(self, **kwargs):
80 |         for i in self.streams:
81 |             s = self.streams[i]
82 |             s['src'] = [s['url']]
83 |         if 'stream_id' in kwargs and kwargs['stream_id']:
84 |             # Extract the stream
85 |             stream_id = kwargs['stream_id']
86 | 
87 |             if stream_id not in self.streams:
88 |                 log.e('[Error] Invalid video format.')
89 |                 log.e('Run \'-i\' command with no specific video format to view all available formats.')
90 |                 exit(2)
91 |         else:
92 |             # Extract stream with the best quality
93 |             stream_id = self.streams_sorted[0]['id']
94 |             s['src'] = [s['url']]
95 | 
96 | site = QiE()
97 | download = site.download_by_url
98 | download_playlist = playlist_not_supported('QiE')
99 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/yixia.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['yixia_download']
 4 | 
 5 | from ..common import *
 6 | from urllib.parse import urlparse
 7 | from json import loads
 8 | import re
 9 | 
10 | #----------------------------------------------------------------------
11 | def yixia_miaopai_download_by_scid(scid, output_dir = '.', merge = True, info_only = False):
12 |     """"""
13 |     api_endpoint = 'http://api.miaopai.com/m/v2_channel.json?fillType=259&scid={scid}&vend=miaopai'.format(scid = scid)
14 | 
15 |     html = get_content(api_endpoint)
16 | 
17 |     api_content = loads(html)
18 | 
19 |     video_url = match1(api_content['result']['stream']['base'], r'(.+)\?vend')
20 |     title = api_content['result']['ext']['t']
21 | 
22 |     type, ext, size = url_info(video_url)
23 | 
24 |     print_info(site_info, title, type, size)
25 |     if not info_only:
26 |         download_urls([video_url], title, ext, size, output_dir, merge=merge)
27 | 
28 | #----------------------------------------------------------------------
29 | def yixia_xiaokaxiu_download_by_scid(scid, output_dir = '.', merge = True, info_only = False):
30 |     """"""
31 |     api_endpoint = 'http://api.xiaokaxiu.com/video/web/get_play_video?scid={scid}'.format(scid = scid)
32 | 
33 |     html = get_content(api_endpoint)
34 | 
35 |     api_content = loads(html)
36 | 
37 |     video_url = api_content['data']['linkurl']
38 |     title = api_content['data']['title']
39 | 
40 |     type, ext, size = url_info(video_url)
41 | 
42 |     print_info(site_info, title, type, size)
43 |     if not info_only:
44 |         download_urls([video_url], title, ext, size, output_dir, merge=merge)
45 | 
46 | #----------------------------------------------------------------------
47 | def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
48 |     """wrapper"""
49 |     hostname = urlparse(url).hostname
50 |     if 'miaopai.com' in hostname:  #Miaopai
51 |         yixia_download_by_scid = yixia_miaopai_download_by_scid
52 |         site_info = "Yixia Miaopai"
53 | 
54 |         scid = match1(url, r'miaopai\.com/show/channel/(.+)\.htm') or \
55 |                match1(url, r'miaopai\.com/show/(.+)\.htm') or \
56 |                match1(url, r'm\.miaopai\.com/show/channel/(.+)\.htm') or \
57 |                match1(url, r'm\.miaopai\.com/show/channel/(.+)')
58 | 
59 |     elif 'xiaokaxiu.com' in hostname:  #Xiaokaxiu
60 |         yixia_download_by_scid = yixia_xiaokaxiu_download_by_scid
61 |         site_info = "Yixia Xiaokaxiu"
62 | 
63 |         if re.match(r'http://v.xiaokaxiu.com/v/.+\.html', url):  #PC
64 |             scid = match1(url, r'http://v.xiaokaxiu.com/v/(.+)\.html')
65 |         elif re.match(r'http://m.xiaokaxiu.com/m/.+\.html', url):  #Mobile
66 |             scid = match1(url, r'http://m.xiaokaxiu.com/m/(.+)\.html')
67 | 
68 |     else:
69 |         pass
70 | 
71 |     yixia_download_by_scid(scid, output_dir, merge, info_only)
72 | 
73 | site_info = "Yixia"
74 | download = yixia_download
75 | download_playlist = playlist_not_supported('yixia')
76 | 
77 | #Another way
78 | #----------------------------------------------------------------------
79 | #def yixia_miaopai_download_by_scid(scid, output_dir = '.', merge = True, info_only = False):
80 |     #""""""
81 |     #headers = {
82 |     #'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25',
83 |     #'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
84 |     #'Cache-Control': 'max-age=0',
85 |     #}
86 | 
87 |     #html = get_content('http://m.miaopai.com/show/channel/' + scid, headers)
88 | 
89 |     #title = match1(html, r'<title>(\w+)')
90 | 
91 |     #video_url = match1(html, r'<div class="vid_img" data-url=\'(.+)\'')
92 | 
93 |     #type, ext, size = url_info(video_url)
94 | 
95 |     #print_info(site_info, title, type, size)
96 |     #if not info_only:
97 |         #download_urls([video_url], title, ext, size, output_dir, merge=merge)
98 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ckplayer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #coding:utf-8
  3 | # Author:  Beining --<i@cnbeining.com>
  4 | # Purpose: A general extractor for CKPlayer
  5 | # Created: 03/15/2016
  6 | 
  7 | __all__ = ['ckplayer_download']
  8 | 
  9 | from xml.etree import cElementTree as ET
 10 | from copy import copy
 11 | from ..common import *
 12 | #----------------------------------------------------------------------
 13 | def ckplayer_get_info_by_xml(ckinfo):
 14 |     """str->dict
 15 |     Information for CKPlayer API content."""
 16 |     e = ET.XML(ckinfo)
 17 |     video_dict = {'title': '',
 18 |                   #'duration': 0,
 19 |                   'links': [],
 20 |                   'size': 0,
 21 |                   'flashvars': '',}
 22 |     dictified = dictify(e)['ckplayer']
 23 |     if 'info' in dictified:
 24 |         if '_text' in dictified['info'][0]['title'][0]:  #title
 25 |             video_dict['title'] = dictified['info'][0]['title'][0]['_text'].strip()
 26 | 
 27 |     #if dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip():  #duration
 28 |         #video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip()
 29 | 
 30 |     if '_text' in dictified['video'][0]['size'][0]:  #size exists for 1 piece
 31 |         video_dict['size'] = sum([int(i['size'][0]['_text']) for i in dictified['video']])
 32 | 
 33 |     if '_text' in dictified['video'][0]['file'][0]:  #link exist
 34 |         video_dict['links'] = [i['file'][0]['_text'].strip() for i in dictified['video']]
 35 | 
 36 |     if '_text' in dictified['flashvars'][0]:
 37 |         video_dict['flashvars'] = dictified['flashvars'][0]['_text'].strip()
 38 | 
 39 |     return video_dict
 40 | 
 41 | #----------------------------------------------------------------------
 42 | #helper
 43 | #https://stackoverflow.com/questions/2148119/how-to-convert-an-xml-string-to-a-dictionary-in-python
 44 | def dictify(r,root=True):
 45 |     if root:
 46 |         return {r.tag : dictify(r, False)}
 47 |     d=copy(r.attrib)
 48 |     if r.text:
 49 |         d["_text"]=r.text
 50 |     for x in r.findall("./*"):
 51 |         if x.tag not in d:
 52 |             d[x.tag]=[]
 53 |         d[x.tag].append(dictify(x,False))
 54 |     return d
 55 | 
 56 | #----------------------------------------------------------------------
 57 | def ckplayer_download_by_xml(ckinfo, output_dir = '.', merge = False, info_only = False, **kwargs):
 58 |     #Info XML
 59 |     video_info = ckplayer_get_info_by_xml(ckinfo)
 60 |     
 61 |     try:
 62 |         title = kwargs['title']
 63 |     except:
 64 |         title = ''
 65 |     type_ = ''
 66 |     size = 0
 67 |     
 68 |     if len(video_info['links']) > 0:  #has link
 69 |         type_, _ext, size = url_info(video_info['links'][0])  #use 1st to determine type, ext
 70 |     
 71 |     if 'size' in video_info:
 72 |         size = int(video_info['size'])
 73 |     else:
 74 |         for i in video_info['links'][1:]:  #save 1st one
 75 |             size += url_info(i)[2]
 76 |     
 77 |     print_info(site_info, title, type_, size)
 78 |     if not info_only:
 79 |         download_urls(video_info['links'], title, _ext, size, output_dir=output_dir, merge=merge)
 80 | 
 81 | #----------------------------------------------------------------------
 82 | def ckplayer_download(url, output_dir = '.', merge = False, info_only = False, is_xml = True, **kwargs):
 83 |     if is_xml:  #URL is XML URL
 84 |         try:
 85 |             title = kwargs['title']
 86 |         except:
 87 |             title = ''
 88 |         try:
 89 |             headers = kwargs['headers']  #headers provided
 90 |             ckinfo = get_content(url, headers = headers)
 91 |         except NameError:
 92 |             ckinfo = get_content(url)
 93 |         
 94 |         ckplayer_download_by_xml(ckinfo, output_dir, merge, 
 95 |                                 info_only, title = title)
 96 | 
 97 | site_info = "CKPlayer General"
 98 | download = ckplayer_download
 99 | download_playlist = playlist_not_supported('ckplayer')
100 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ixigua.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | __all__ = ['ixigua_download', 'ixigua_download_playlist']
  3 | import base64
  4 | import random
  5 | import binascii
  6 | from ..common import *
  7 | 
  8 | headers = {
  9 |     'User-Agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36'
 10 |                   ' (KHTML, like Gecko) Chrome/61.0.3163.100 Mobile Safari/537.36'
 11 | }
 12 | 
 13 | 
 14 | def get_r():
 15 |     return str(random.random())[2:]
 16 | 
 17 | 
 18 | def right_shift(val, n):
 19 |     return val >> n if val >= 0 else (val + 0x100000000) >> n
 20 | 
 21 | 
 22 | def get_s(text):
 23 |     """get video info"""
 24 |     js_data = json.loads(text)
 25 |     id = js_data['data']['video_id']
 26 |     p = get_r()
 27 |     url = 'http://i.snssdk.com/video/urls/v/1/toutiao/mp4/%s' % id
 28 |     n = parse.urlparse(url).path + '?r=%s' % p
 29 |     c = binascii.crc32(n.encode('utf-8'))
 30 |     s = right_shift(c, 0)
 31 |     return url + '?r=%s&s=%s' % (p, s), js_data['data']['title']
 32 | 
 33 | 
 34 | def get_moment(url, user_id, base_url, video_list):
 35 |     """Recursively obtaining a video list"""
 36 |     video_list_data = json.loads(get_content(url, headers=headers))
 37 |     if not video_list_data['next']['max_behot_time']:
 38 |         return video_list
 39 |     [video_list.append(i["display_url"]) for i in video_list_data["data"]]
 40 |     max_behot_time = video_list_data['next']['max_behot_time']
 41 |     _param = {
 42 |         'user_id': user_id,
 43 |         'base_url': base_url,
 44 |         'video_list': video_list,
 45 |         'url': base_url.format(user_id=user_id, max_behot_time=max_behot_time),
 46 |     }
 47 |     return get_moment(**_param)
 48 | 
 49 | 
 50 | def ixigua_download(url, output_dir='.', info_only=False, **kwargs):
 51 |     """ Download a single video
 52 |         Sample URL: https://www.ixigua.com/a6487187567887254029/#mid=59051127876
 53 |     """
 54 |     try:
 55 |         video_page_id = re.findall('(\d+)', [i for i in url.split('/') if i][3])[0] if 'toutiao.com' in url \
 56 |             else re.findall('(\d+)', [i for i in url.split('/') if i][2])[0]
 57 | 
 58 |         video_start_info_url = r'https://m.ixigua.com/i{}/info/'.format(video_page_id)
 59 |         video_info_url, title = get_s(get_content(video_start_info_url, headers=headers or kwargs.get('headers', {})))
 60 |         video_info = json.loads(get_content(video_info_url, headers=headers or kwargs.get('headers', {})))
 61 |     except Exception:
 62 |         raise NotImplementedError(url)
 63 |     try:
 64 |         video_url = base64.b64decode(video_info["data"]["video_list"]["video_1"]["main_url"]).decode()
 65 |     except Exception:
 66 |         raise NotImplementedError(url)
 67 |     filetype, ext, size = url_info(video_url, headers=headers or kwargs.get('headers', {}))
 68 |     print_info(site_info, title, filetype, size)
 69 |     if not info_only:
 70 |         _param = {
 71 |             'output_dir': output_dir,
 72 |             'headers': headers or kwargs.get('headers', {})
 73 |         }
 74 |         download_urls([video_url], title, ext, size, **_param)
 75 | 
 76 | 
 77 | def ixigua_download_playlist(url, output_dir='.', info_only=False, **kwargs):
 78 |     """Download all video from the user's video list
 79 |         Sample URL: https://www.ixigua.com/c/user/71141690831/
 80 |     """
 81 |     if 'user' not in url:
 82 |         raise NotImplementedError(url)
 83 |     user_id = url.split('/')[-2]
 84 |     max_behot_time = 0
 85 |     if not user_id:
 86 |         raise NotImplementedError(url)
 87 |     base_url = "https://www.ixigua.com/c/user/article/?user_id={user_id}" \
 88 |                "&max_behot_time={max_behot_time}&max_repin_time=0&count=20&page_type=0"
 89 |     _param = {
 90 |         'user_id': user_id,
 91 |         'base_url': base_url,
 92 |         'video_list': [],
 93 |         'url': base_url.format(user_id=user_id, max_behot_time=max_behot_time),
 94 |     }
 95 |     for i in get_moment(**_param):
 96 |         ixigua_download(i, output_dir, info_only, **kwargs)
 97 | 
 98 | 
 99 | site_info = "ixigua.com"
100 | download = ixigua_download
101 | download_playlist = ixigua_download_playlist
102 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/twitter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['twitter_download']
 4 | 
 5 | from ..common import *
 6 | from .vine import vine_download
 7 | 
 8 | def extract_m3u(source):
 9 |     r1 = get_content(source)
10 |     s1 = re.findall(r'(/ext_tw_video/.*)', r1)
11 |     s1 += re.findall(r'(/amplify_video/.*)', r1)
12 |     r2 = get_content('https://video.twimg.com%s' % s1[-1])
13 |     s2 = re.findall(r'(/ext_tw_video/.*)', r2)
14 |     s2 += re.findall(r'(/amplify_video/.*)', r2)
15 |     return ['https://video.twimg.com%s' % i for i in s2]
16 | 
17 | def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
18 |     if re.match(r'https?://mobile', url): # normalize mobile URL
19 |         url = 'https://' + match1(url, r'//mobile\.(.+)')
20 | 
21 |     html = get_html(url)
22 |     screen_name = r1(r'data-screen-name="([^"]*)"', html) or \
23 |         r1(r'<meta name="twitter:title" content="([^"]*)"', html)
24 |     item_id = r1(r'data-item-id="([^"]*)"', html) or \
25 |         r1(r'<meta name="twitter:site:id" content="([^"]*)"', html)
26 |     page_title = "{} [{}]".format(screen_name, item_id)
27 | 
28 |     try: # extract images
29 |         urls = re.findall(r'property="og:image"\s*content="([^"]+:large)"', html)
30 |         assert urls
31 |         images = []
32 |         for url in urls:
33 |             url = ':'.join(url.split(':')[:-1]) + ':orig'
34 |             filename = parse.unquote(url.split('/')[-1])
35 |             title = '.'.join(filename.split('.')[:-1])
36 |             ext = url.split(':')[-2].split('.')[-1]
37 |             size = int(get_head(url)['Content-Length'])
38 |             images.append({'title': title,
39 |                            'url': url,
40 |                            'ext': ext,
41 |                            'size': size})
42 |         size = sum([image['size'] for image in images])
43 |         print_info(site_info, page_title, images[0]['ext'], size)
44 | 
45 |         if not info_only:
46 |             for image in images:
47 |                 title = image['title']
48 |                 ext = image['ext']
49 |                 size = image['size']
50 |                 url = image['url']
51 |                 print_info(site_info, title, ext, size)
52 |                 download_urls([url], title, ext, size,
53 |                               output_dir=output_dir)
54 | 
55 |     except: # extract video
56 |         # always use i/cards or videos url
57 |         if not re.match(r'https?://twitter.com/i/', url):
58 |             url = r1(r'<meta\s*property="og:video:url"\s*content="([^"]+)"', html)
59 |             if not url:
60 |                 url = 'https://twitter.com/i/videos/%s' % item_id
61 |             html = get_content(url)
62 | 
63 |         data_config = r1(r'data-config="([^"]*)"', html) or \
64 |             r1(r'data-player-config="([^"]*)"', html)
65 |         i = json.loads(unescape_html(data_config))
66 |         if 'video_url' in i:
67 |             source = i['video_url']
68 |             item_id = i['tweet_id']
69 |             page_title = "{} [{}]".format(screen_name, item_id)
70 |         elif 'playlist' in i:
71 |             source = i['playlist'][0]['source']
72 |             if not item_id: page_title = i['playlist'][0]['contentId']
73 |         elif 'vmap_url' in i:
74 |             vmap_url = i['vmap_url']
75 |             vmap = get_content(vmap_url)
76 |             source = r1(r'<MediaFile>\s*<!\[CDATA\[(.*)\]\]>', vmap)
77 |             item_id = i['tweet_id']
78 |             page_title = "{} [{}]".format(screen_name, item_id)
79 |         elif 'scribe_playlist_url' in i:
80 |             scribe_playlist_url = i['scribe_playlist_url']
81 |             return vine_download(scribe_playlist_url, output_dir, merge=merge, info_only=info_only)
82 | 
83 |         try:
84 |             urls = extract_m3u(source)
85 |         except:
86 |             urls = [source]
87 |         size = urls_size(urls)
88 |         mime, ext = 'video/mp4', 'mp4'
89 | 
90 |         print_info(site_info, page_title, mime, size)
91 |         if not info_only:
92 |             download_urls(urls, page_title, ext, size, output_dir, merge=merge)
93 | 
94 | site_info = "Twitter.com"
95 | download = twitter_download
96 | download_playlist = playlist_not_supported('twitter')
97 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/coub.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __all__ = ['coub_download']
  4 | 
  5 | from ..common import *
  6 | from ..processor import ffmpeg
  7 | from ..util.fs import legitimize
  8 | 
  9 | 
 10 | def coub_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 11 |     html = get_content(url)
 12 | 
 13 |     try:
 14 |         json_data = get_coub_data(html)
 15 |         title, video_url, audio_url = get_title_and_urls(json_data)
 16 |         video_file_name, video_file_path = get_file_path(merge, output_dir, title, video_url)
 17 |         audio_file_name, audio_file_path = get_file_path(merge, output_dir, title, audio_url)
 18 |         download_url(audio_url, merge, output_dir, title, info_only)
 19 |         download_url(video_url, merge, output_dir, title, info_only)
 20 |         if not info_only:
 21 |             try:
 22 |                 fix_coub_video_file(video_file_path)
 23 |                 audio_duration = float(ffmpeg.ffprobe_get_media_duration(audio_file_path))
 24 |                 video_duration = float(ffmpeg.ffprobe_get_media_duration(video_file_path))
 25 |                 loop_file_path = get_loop_file_path(title, output_dir)
 26 |                 single_file_path = audio_file_path
 27 |                 if audio_duration > video_duration:
 28 |                     write_loop_file(int(audio_duration / video_duration), loop_file_path, video_file_name)
 29 |                 else:
 30 |                     single_file_path = audio_file_path
 31 |                     write_loop_file(int(video_duration / audio_duration), loop_file_path, audio_file_name)
 32 | 
 33 |                 ffmpeg.ffmpeg_concat_audio_and_video([loop_file_path, single_file_path], title + "_full", "mp4")
 34 |                 cleanup_files([video_file_path, audio_file_path, loop_file_path])
 35 |             except EnvironmentError as err:
 36 |                 print("Error preparing full coub video. {}".format(err))
 37 |     except Exception as err:
 38 |         print("Error while downloading files. {}".format(err))
 39 | 
 40 | 
 41 | def write_loop_file(records_number, loop_file_path, file_name):
 42 |     with open(loop_file_path, 'a') as file:
 43 |         for i in range(records_number):
 44 |             file.write("file '{}'\n".format(file_name))
 45 | 
 46 | 
 47 | def download_url(url, merge, output_dir, title, info_only):
 48 |     mime, ext, size = url_info(url)
 49 |     print_info(site_info, title, mime, size)
 50 |     if not info_only:
 51 |         download_urls([url], title, ext, size, output_dir, merge=merge)
 52 | 
 53 | 
 54 | def fix_coub_video_file(file_path):
 55 |     with open(file_path, 'r+b') as file:
 56 |         file.seek(0)
 57 |         file.write(bytes(2))
 58 | 
 59 | 
 60 | def get_title_and_urls(json_data):
 61 |     title = legitimize(re.sub('[\s*]', "_", json_data['title']))
 62 |     video_info = json_data['file_versions']['html5']['video']
 63 |     if 'high' not in video_info:
 64 |         if 'med' not in video_info:
 65 |             video_url = video_info['low']['url']
 66 |         else:
 67 |             video_url = video_info['med']['url']
 68 |     else:
 69 |         video_url = video_info['high']['url']
 70 |     audio_info = json_data['file_versions']['html5']['audio']
 71 |     if 'high' not in audio_info:
 72 |         if 'med' not in audio_info:
 73 |             audio_url = audio_info['low']['url']
 74 |         else:
 75 |             audio_url = audio_info['med']['url']
 76 |     else:
 77 |         audio_url = audio_info['high']['url']
 78 |     return title, video_url, audio_url
 79 | 
 80 | 
 81 | def get_coub_data(html):
 82 |     coub_data = r1(r'<script id=\'coubPageCoubJson\' type=\'text/json\'>([^<]+)</script>', html)
 83 |     json_data = json.loads(coub_data)
 84 |     return json_data
 85 | 
 86 | 
 87 | def get_file_path(merge, output_dir, title, url):
 88 |     mime, ext, size = url_info(url)
 89 |     file_name = get_output_filename([], title, ext, output_dir, merge)
 90 |     file_path = os.path.join(output_dir, file_name)
 91 |     return file_name, file_path
 92 | 
 93 | 
 94 | def get_loop_file_path(title, output_dir):
 95 |     return os.path.join(output_dir, get_output_filename([], title, "txt", None, False))
 96 | 
 97 | 
 98 | def cleanup_files(files):
 99 |     for file in files:
100 |         os.remove(file)
101 | 
102 | 
103 | site_info = "coub.com"
104 | download = coub_download
105 | download_playlist = playlist_not_supported('coub')
106 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ximalaya.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['ximalaya_download_playlist', 'ximalaya_download', 'ximalaya_download_by_id']
 4 | 
 5 | from ..common import *
 6 | 
 7 | import json
 8 | import re
 9 | 
10 | stream_types = [
11 |         {'itag': '1', 'container': 'm4a', 'bitrate': 'default'},
12 |         {'itag': '2', 'container': 'm4a', 'bitrate': '32'},
13 |         {'itag': '3', 'container': 'm4a', 'bitrate': '64'}
14 |         ]
15 | 
16 | def ximalaya_download_by_id(id, title = None, output_dir = '.', info_only = False, stream_id = None):
17 |     BASE_URL = 'http://www.ximalaya.com/tracks/'
18 |     json_url = BASE_URL + id + '.json'
19 |     json_data = json.loads(get_content(json_url, headers=fake_headers))
20 |     if 'res' in json_data:
21 |         if json_data['res'] == False:
22 |             raise ValueError('Server reported id %s is invalid' % id)
23 |     if 'is_paid' in json_data and json_data['is_paid']:
24 |         if 'is_free' in json_data and not json_data['is_free']:
25 |             raise ValueError('%s is paid item' % id)
26 |     if (not title) and 'title' in json_data:
27 |         title = json_data['title']
28 | #no size data in the json. should it be calculated?
29 |     size = 0
30 |     url = json_data['play_path_64']
31 |     if stream_id:
32 |         if stream_id == '1':
33 |             url = json_data['play_path_32']
34 |         elif stream_id == '0':
35 |             url = json_data['play_path']
36 |     logging.debug('ximalaya_download_by_id: %s' % url)
37 |     ext = 'm4a' 
38 |     urls = [url]
39 |     print('Site:        %s' % site_info)
40 |     print('title:       %s' % title)
41 |     if info_only:
42 |         if stream_id:
43 |             print_stream_info(stream_id)
44 |         else:
45 |             for item in range(0, len(stream_types)):
46 |                 print_stream_info(item)
47 |     if not info_only:
48 |         print('Type:        MPEG-4 audio m4a')
49 |         print('Size:        N/A')
50 |         download_urls(urls, title, ext, size, output_dir = output_dir, merge = False)
51 | 
52 | def ximalaya_download(url, output_dir = '.', info_only = False, stream_id = None, **kwargs):
53 |     if re.match(r'http://www\.ximalaya\.com/(\d+)/sound/(\d+)', url):
54 |         id = match1(url, r'http://www\.ximalaya\.com/\d+/sound/(\d+)')
55 |     else:
56 |         raise NotImplementedError(url)
57 |     ximalaya_download_by_id(id, output_dir = output_dir, info_only = info_only, stream_id = stream_id)
58 | 
59 | def ximalaya_download_page(playlist_url, output_dir = '.', info_only = False, stream_id = None, **kwargs):
60 |     if re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', playlist_url):
61 |         page_content = get_content(playlist_url)
62 |         pattern = re.compile(r'<li sound_id="(\d+)"')
63 |         ids = pattern.findall(page_content)
64 |         for id in ids:
65 |             try:
66 |                 ximalaya_download_by_id(id, output_dir=output_dir, info_only=info_only, stream_id=stream_id)
67 |             except(ValueError):
68 |                 print("something wrong with %s, perhaps paid item?" % id)
69 |     else:
70 |         raise NotImplementedError(playlist_url)
71 |     
72 | def ximalaya_download_playlist(url, output_dir='.', info_only=False, stream_id=None, **kwargs):
73 |     match_result = re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', url)
74 |     if not match_result:
75 |         raise NotImplementedError(url)
76 |     pages = []
77 |     page_content = get_content(url)
78 |     if page_content.find('<div class="pagingBar_wrapper"') == -1:
79 |         pages.append(url)
80 |     else:
81 |         base_url = 'http://www.ximalaya.com/' + match_result.group(1) + '/album/' + match_result.group(2)
82 |         html_str = '<a href=(\'|")\/' + match_result.group(1) + '\/album\/' + match_result.group(2) + '\?page='
83 |         count = len(re.findall(html_str, page_content))
84 |         for page_num in range(count):
85 |             pages.append(base_url + '?page=' +str(page_num+1))
86 |             print(pages[-1])
87 |     for page in pages:
88 |         ximalaya_download_page(page, output_dir=output_dir, info_only=info_only, stream_id=stream_id)
89 | def print_stream_info(stream_id):
90 |     print('    - itag:        %s' % stream_id)
91 |     print('      container:   %s' % 'm4a')
92 |     print('      bitrate:     %s' % stream_types[int(stream_id)]['bitrate'])
93 |     print('      size:        %s' % 'N/A')
94 |     print('    # download-with: you-get --itag=%s [URL]' % stream_id)
95 | 
96 | site_info = 'ximalaya.com'
97 | download = ximalaya_download
98 | download_playlist = ximalaya_download_playlist 
99 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/dilidili.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['dilidili_download']
 4 | 
 5 | from ..common import *
 6 | from .ckplayer import ckplayer_download
 7 | 
 8 | headers = {
 9 |     'DNT': '1',
10 |     'Accept-Encoding': 'gzip, deflate, sdch, br',
11 |     'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2',
12 |     'Upgrade-Insecure-Requests': '1',
13 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36',
14 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
15 |     'Cache-Control': 'max-age=0',
16 |     'Referer': 'http://www.dilidili.com/',
17 |     'Connection': 'keep-alive',
18 |     'Save-Data': 'on',
19 | }
20 | 
21 | #----------------------------------------------------------------------
22 | def dilidili_parser_data_to_stream_types(typ ,vid ,hd2 ,sign, tmsign, ulk):
23 |     """->list"""
24 |     another_url = 'https://newplayer.jfrft.com/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign, tmsign = tmsign, ulk = ulk)
25 |     parse_url = 'http://player.005.tv/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign, tmsign = tmsign, ulk = ulk)
26 |     html = get_content(another_url, headers=headers)
27 |     
28 |     info = re.search(r'(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})', html).groups()
29 |     info = [i.strip('{}').split('->') for i in info]
30 |     info = {i[0]: i [1] for i in info}
31 |     
32 |     stream_types = []
33 |     for i in zip(info['deft'].split('|'), info['defa'].split('|')):
34 |         stream_types.append({'id': str(i[1][-1]), 'container': 'mp4', 'video_profile': i[0]})
35 |     return stream_types
36 | 
37 | #----------------------------------------------------------------------
38 | def dilidili_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
39 |     global headers
40 |     re_str = r'http://www.dilidili.com/watch\S+'
41 |     if re.match(r'http://www.dilidili.wang', url):
42 |         re_str = r'http://www.dilidili.wang/watch\S+'
43 |         headers['Referer'] = 'http://www.dilidili.wang/'
44 |     elif re.match(r'http://www.dilidili.mobi', url):
45 |         re_str = r'http://www.dilidili.mobi/watch\S+'
46 |         headers['Referer'] = 'http://www.dilidili.mobi/'
47 | 
48 |     if re.match(re_str, url):
49 |         html = get_content(url)
50 |         title = match1(html, r'<title>(.+)丨(.+)</title>')  #title
51 |         
52 |         # player loaded via internal iframe
53 |         frame_url = re.search(r'<iframe src=\"(.+?)\"', html).group(1)
54 |         logging.debug('dilidili_download: %s' % frame_url)
55 |         
56 |         #https://player.005.tv:60000/?vid=a8760f03fd:a04808d307&v=yun&sign=a68f8110cacd892bc5b094c8e5348432
57 |         html = get_content(frame_url, headers=headers, decoded=False).decode('utf-8')
58 |         
59 |         match = re.search(r'(.+?)var video =(.+?);', html)
60 |         vid = match1(html, r'var vid="(.+)"')
61 |         hd2 = match1(html, r'var hd2="(.+)"')
62 |         typ = match1(html, r'var typ="(.+)"')
63 |         sign = match1(html, r'var sign="(.+)"')
64 |         tmsign = match1(html, r'tmsign=([A-Za-z0-9]+)')
65 |         ulk =  match1(html, r'var ulk="(.+)"')
66 | 
67 |         # here s the parser...
68 |         stream_types = dilidili_parser_data_to_stream_types(typ, vid, hd2, sign, tmsign, ulk)
69 |         
70 |         #get best
71 |         best_id = max([i['id'] for i in stream_types])
72 |         
73 |         parse_url = 'http://player.005.tv/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = best_id, sign = sign, tmsign = tmsign, ulk = ulk)
74 |         
75 |         another_url = 'https://newplayer.jfrft.com/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign, tmsign = tmsign, ulk = ulk)
76 | 
77 |         ckplayer_download(another_url, output_dir, merge, info_only, is_xml = True, title = title, headers = headers)
78 | 
79 |         #type_ = ''
80 |         #size = 0
81 | 
82 |         #type_, ext, size = url_info(url)
83 |         #print_info(site_info, title, type_, size)
84 |         #if not info_only:
85 |             #download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
86 | 
87 | site_info = "dilidili"
88 | download = dilidili_download
89 | download_playlist = playlist_not_supported('dilidili')
90 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/tudou.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', 'tudou_download_by_iid']
  4 | 
  5 | from ..common import *
  6 | from xml.dom.minidom import parseString
  7 | import you_get.extractors.acfun
  8 | 
  9 | def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
 10 |     data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
 11 |     temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x]))
 12 |     vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp])
 13 | 
 14 |     urls = []
 15 |     for vid in vids:
 16 |         for i in parseString(get_html('http://ct.v2.tudou.com/f?id=%s' % vid)).getElementsByTagName('f'):
 17 |             urls.append(i.firstChild.nodeValue.strip())
 18 | 
 19 |     ext = r1(r'http://[\w.]*/(\w+)/[\w.]*', urls[0])
 20 | 
 21 |     print_info(site_info, title, ext, size)
 22 |     if not info_only:
 23 |         download_urls(urls, title, ext, size, output_dir=output_dir, merge = merge)
 24 | 
 25 | def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False):
 26 |     html = get_html('http://www.tudou.com/programs/view/%s/' % id)
 27 | 
 28 |     iid = r1(r'iid\s*[:=]\s*(\S+)', html)
 29 |     try:
 30 |         title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
 31 |     except AttributeError:
 32 |         title = ''
 33 |     tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
 34 | 
 35 | def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 36 |     if 'acfun.tudou.com' in url:  #wrong way!
 37 |         url = url.replace('acfun.tudou.com', 'www.acfun.tv')
 38 |         you_get.extractors.acfun.acfun_download(url, output_dir,
 39 |                                                merge,
 40 |                                                info_only)
 41 |         return  #throw you back
 42 | 
 43 |     # Embedded player
 44 |     id = r1(r'http://www.tudou.com/v/([^/]+)/', url)
 45 |     if id:
 46 |         return tudou_download_by_id(id, title="", info_only=info_only)
 47 | 
 48 |     html = get_content(url)
 49 | 
 50 |     try:
 51 |         title = r1(r'\Wkw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
 52 |         assert title
 53 |         title = unescape_html(title)
 54 |     except AttributeError:
 55 |         title = match1(html, r'id=\"subtitle\"\s*title\s*=\s*\"([^\"]+)\"')
 56 |         if title is None:
 57 |             title = ''
 58 | 
 59 |     vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html)
 60 |     if vcode is None:
 61 |         vcode = match1(html, r'viden\s*[:=]\s*\"([\w+/=]+)\"')
 62 |     if vcode:
 63 |         from .youku import youku_download_by_vid
 64 |         return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only, src='tudou', **kwargs)
 65 | 
 66 |     iid = r1(r'iid\s*[:=]\s*(\d+)', html)
 67 |     if not iid:
 68 |         return tudou_download_playlist(url, output_dir, merge, info_only)
 69 | 
 70 |     tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
 71 | 
 72 | # obsolete?
 73 | def parse_playlist(url):
 74 |     aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
 75 |     html = get_decoded_html(url)
 76 |     if not aid:
 77 |         aid = r1(r"aid\s*[:=]\s*'(\d+)'", html)
 78 |     if re.match(r'http://www.tudou.com/albumcover/', url):
 79 |         atitle = r1(r"title\s*:\s*'([^']+)'", html)
 80 |     elif re.match(r'http://www.tudou.com/playlist/p/', url):
 81 |         atitle = r1(r'atitle\s*=\s*"([^"]+)"', html)
 82 |     else:
 83 |         raise NotImplementedError(url)
 84 |     assert aid
 85 |     assert atitle
 86 |     import json
 87 |     #url = 'http://www.tudou.com/playlist/service/getZyAlbumItems.html?aid='+aid
 88 |     url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
 89 |     return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]
 90 | 
 91 | def parse_plist(url):
 92 |     html = get_decoded_html(url)
 93 |     lcode = r1(r"lcode:\s*'([^']+)'", html)
 94 |     plist_info = json.loads(get_content('http://www.tudou.com/crp/plist.action?lcode=' + lcode))
 95 |     return ([(item['kw'], item['iid']) for item in plist_info['items']])
 96 | 
 97 | def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 98 |     videos = parse_plist(url)
 99 |     for i, (title, id) in enumerate(videos):
100 |         print('Processing %s of %s videos...' % (i + 1, len(videos)))
101 |         tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
102 | 
103 | site_info = "Tudou.com"
104 | download = tudou_download
105 | download_playlist = tudou_download_playlist
106 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/universal.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __all__ = ['universal_download']
  4 | 
  5 | from ..common import *
  6 | from .embed import *
  7 | 
  8 | def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
  9 |     try:
 10 |         content_type = get_head(url, headers=fake_headers)['Content-Type']
 11 |     except:
 12 |         content_type = get_head(url, headers=fake_headers, get_method='GET')['Content-Type']
 13 |     if content_type.startswith('text/html'):
 14 |         try:
 15 |             embed_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
 16 |         except Exception:
 17 |             pass
 18 |         else:
 19 |             return
 20 | 
 21 |     domains = url.split('/')[2].split('.')
 22 |     if len(domains) > 2: domains = domains[1:]
 23 |     site_info = '.'.join(domains)
 24 | 
 25 |     if content_type.startswith('text/html'):
 26 |         # extract an HTML page
 27 |         response = get_response(url, faker=True)
 28 |         page = str(response.data)
 29 | 
 30 |         page_title = r1(r'<title>([^<]*)', page)
 31 |         if page_title:
 32 |             page_title = unescape_html(page_title)
 33 | 
 34 |         hls_urls = re.findall(r'(https?://[^;"\'\\]+' + '\.m3u8?' +
 35 |                               r'[^;"\'\\]*)', page)
 36 |         if hls_urls:
 37 |             for hls_url in hls_urls:
 38 |                 type_, ext, size = url_info(hls_url)
 39 |                 print_info(site_info, page_title, type_, size)
 40 |                 if not info_only:
 41 |                     download_url_ffmpeg(url=hls_url, title=page_title,
 42 |                                         ext='mp4', output_dir=output_dir)
 43 |             return
 44 | 
 45 |         # most common media file extensions on the Internet
 46 |         media_exts = ['\.flv', '\.mp3', '\.mp4', '\.webm',
 47 |                       '[-_]1\d\d\d\.jpe?g', '[-_][6-9]\d\d\.jpe?g', # tumblr
 48 |                       '[-_]1\d\d\dx[6-9]\d\d\.jpe?g',
 49 |                       '[-_][6-9]\d\dx1\d\d\d\.jpe?g',
 50 |                       '[-_][6-9]\d\dx[6-9]\d\d\.jpe?g',
 51 |                       's1600/[\w%]+\.jpe?g', # blogger
 52 |                       'img[6-9]\d\d/[\w%]+\.jpe?g' # oricon?
 53 |         ]
 54 | 
 55 |         urls = []
 56 |         for i in media_exts:
 57 |             urls += re.findall(r'(https?://[^;"\'\\]+' + i + r'[^;"\'\\]*)', page)
 58 | 
 59 |             p_urls = re.findall(r'(https?%3A%2F%2F[^;&]+' + i + r'[^;&]*)', page)
 60 |             urls += [parse.unquote(url) for url in p_urls]
 61 | 
 62 |             q_urls = re.findall(r'(https?:\\\\/\\\\/[^;"\']+' + i + r'[^;"\']*)', page)
 63 |             urls += [url.replace('\\\\/', '/') for url in q_urls]
 64 | 
 65 |         # a link href to an image is often an interesting one
 66 |         urls += re.findall(r'href="(https?://[^"]+\.jpe?g)"', page, re.I)
 67 |         urls += re.findall(r'href="(https?://[^"]+\.png)"', page, re.I)
 68 |         urls += re.findall(r'href="(https?://[^"]+\.gif)"', page, re.I)
 69 | 
 70 |         # MPEG-DASH MPD
 71 |         mpd_urls = re.findall(r'src="(https?://[^"]+\.mpd)"', page)
 72 |         for mpd_url in mpd_urls:
 73 |             cont = get_content(mpd_url)
 74 |             base_url = r1(r'<BaseURL>(.*)</BaseURL>', cont)
 75 |             urls += [ r1(r'(.*/)[^/]*', mpd_url) + base_url ]
 76 | 
 77 |         # have some candy!
 78 |         candies = []
 79 |         i = 1
 80 |         for url in set(urls):
 81 |             filename = parse.unquote(url.split('/')[-1])
 82 |             if 5 <= len(filename) <= 80:
 83 |                 title = '.'.join(filename.split('.')[:-1])
 84 |             else:
 85 |                 title = '%s' % i
 86 |                 i += 1
 87 | 
 88 |             candies.append({'url': url,
 89 |                             'title': title})
 90 | 
 91 |         for candy in candies:
 92 |             try:
 93 |                 mime, ext, size = url_info(candy['url'], faker=True)
 94 |                 if not size: size = float('Int')
 95 |             except:
 96 |                 continue
 97 |             else:
 98 |                 print_info(site_info, candy['title'], ext, size)
 99 |                 if not info_only:
100 |                     download_urls([candy['url']], candy['title'], ext, size,
101 |                                   output_dir=output_dir, merge=merge,
102 |                                   faker=True)
103 |         return
104 | 
105 |     else:
106 |         # direct download
107 |         filename = parse.unquote(url.split('/')[-1])
108 |         title = '.'.join(filename.split('.')[:-1])
109 |         ext = filename.split('.')[-1]
110 |         _, _, size = url_info(url, faker=True)
111 |         print_info(site_info, title, ext, size)
112 |         if not info_only:
113 |             download_urls([url], title, ext, size,
114 |                           output_dir=output_dir, merge=merge,
115 |                           faker=True)
116 |         return
117 | 
118 | site_info = None
119 | download = universal_download
120 | download_playlist = playlist_not_supported('universal')
121 | 


--------------------------------------------------------------------------------