├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── CHANGELOG.rst ├── CONTRIBUTING.md ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.md ├── README.rst ├── SECURITY.md ├── contrib └── completion │ ├── _you-get │ ├── you-get-completion.bash │ └── you-get.fish ├── requirements.txt ├── setup.cfg ├── setup.py ├── src └── you_get │ ├── __init__.py │ ├── __main__.py │ ├── cli_wrapper │ ├── __init__.py │ ├── downloader │ │ └── __init__.py │ ├── openssl │ │ └── __init__.py │ ├── player │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── dragonplayer.py │ │ ├── gnome_mplayer.py │ │ ├── mplayer.py │ │ ├── vlc.py │ │ └── wmp.py │ └── transcoder │ │ ├── __init__.py │ │ ├── ffmpeg.py │ │ ├── libav.py │ │ └── mencoder.py │ ├── common.py │ ├── extractor.py │ ├── extractors │ ├── __init__.py │ ├── acfun.py │ ├── alive.py │ ├── archive.py │ ├── baidu.py │ ├── bandcamp.py │ ├── baomihua.py │ ├── bigthink.py │ ├── bilibili.py │ ├── bokecc.py │ ├── cbs.py │ ├── ckplayer.py │ ├── cntv.py │ ├── coub.py │ ├── dailymotion.py │ ├── douban.py │ ├── douyin.py │ ├── douyutv.py │ ├── ehow.py │ ├── embed.py │ ├── facebook.py │ ├── fc2video.py │ ├── flickr.py │ ├── freesound.py │ ├── funshion.py │ ├── giphy.py │ ├── google.py │ ├── heavymusic.py │ ├── huomaotv.py │ ├── icourses.py │ ├── ifeng.py │ ├── imgur.py │ ├── infoq.py │ ├── instagram.py │ ├── interest.py │ ├── iqilu.py │ ├── iqiyi.py │ ├── iwara.py │ ├── ixigua.py │ ├── joy.py │ ├── kakao.py │ ├── khan.py │ ├── ku6.py │ ├── kuaishou.py │ ├── kugou.py │ ├── kuwo.py │ ├── le.py │ ├── lizhi.py │ ├── longzhu.py │ ├── lrts.py │ ├── magisto.py │ ├── metacafe.py │ ├── mgtv.py │ ├── miaopai.py │ ├── miomio.py │ ├── missevan.py │ ├── mixcloud.py │ ├── mtv81.py │ ├── nanagogo.py │ ├── naver.py │ ├── netease.py │ ├── nicovideo.py │ ├── pinterest.py │ ├── pixnet.py │ ├── pptv.py │ ├── qie.py │ ├── qie_video.py │ ├── qingting.py │ ├── qq.py │ ├── qq_egame.py │ ├── showroom.py │ ├── sina.py │ ├── sohu.py │ ├── soundcloud.py │ ├── suntv.py │ ├── ted.py │ ├── theplatform.py │ ├── tiktok.py │ ├── toutiao.py │ ├── tucao.py │ ├── tudou.py │ ├── tumblr.py │ ├── twitter.py │ ├── ucas.py │ ├── universal.py │ ├── veoh.py │ ├── vimeo.py │ ├── vk.py │ ├── w56.py │ ├── wanmen.py │ ├── ximalaya.py │ ├── xinpianchang.py │ ├── yixia.py │ ├── yizhibo.py │ ├── youku.py │ ├── youtube.py │ ├── zhanqi.py │ ├── zhibo.py │ └── zhihu.py │ ├── json_output.py │ ├── processor │ ├── __init__.py │ ├── ffmpeg.py │ ├── join_flv.py │ ├── join_mp4.py │ ├── join_ts.py │ └── rtmpdump.py │ ├── util │ ├── __init__.py │ ├── fs.py │ ├── git.py │ ├── log.py │ ├── os.py │ ├── strings.py │ └── term.py │ └── version.py ├── tests ├── __init__.py ├── test.py ├── test_common.py └── test_util.py ├── you-get ├── you-get.json └── you-get.plugin.zsh /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | 3 | name: develop 4 | 5 | on: 6 | push: 7 | branches: [ develop ] 8 | pull_request: 9 | branches: [ develop ] 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [3.8, 3.9, '3.10', '3.11', '3.12', '3.13', pypy-3.8, pypy-3.9, pypy-3.10] 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip setuptools 29 | pip install flake8 30 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 31 | - name: Lint with flake8 32 | run: | 33 | # stop the build if there are Python syntax errors or undefined names 34 | flake8 . --count --select=E9,F63,F7,F82 --ignore=F824 --show-source --statistics 35 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 36 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 37 | - name: Test with unittest 38 | run: | 39 | make test 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | # Misc 62 | _* 63 | *_ 64 | *.3gp 65 | *.asf 66 | *.download 67 | *.f4v 68 | *.flv 69 | *.gif 70 | *.html 71 | *.jpg 72 | *.lrc 73 | *.mkv 74 | *.mp3 75 | *.mp4 76 | *.mpg 77 | *.png 78 | *.srt 79 | *.ts 80 | *.webm 81 | *.xml 82 | *.json 83 | /.env 84 | /.idea 85 | *.m4a 86 | *.DS_Store 87 | *.txt 88 | *.sw[a-p] 89 | 90 | *.zip 91 | 92 | .emacs* 93 | .vscode 94 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Report an Issue 2 | 3 | If you would like to report a problem you find when using `you-get`, please open a [Pull Request](https://github.com/soimort/you-get/pulls), which should include: 4 | 5 | 1. A detailed description of the encountered problem; 6 | 2. At least one commit, addressing the problem through some unit test(s). 7 | * Examples of good commits: [#2675](https://github.com/soimort/you-get/pull/2675/files), [#2680](https://github.com/soimort/you-get/pull/2680/files), [#2685](https://github.com/soimort/you-get/pull/2685/files) 8 | 9 | PRs that fail to meet the above criteria may be closed summarily with no further action. 10 | 11 | A valid PR will remain open until its addressed problem is fixed. 12 | 13 | 14 | 15 | # 如何汇报问题 16 | 17 | 为了防止对 GitHub Issues 的滥用,本项目不接受一般的 Issue。 18 | 19 | 如您在使用 `you-get` 的过程中发现任何问题,请开启一个 [Pull Request](https://github.com/soimort/you-get/pulls)。该 PR 应当包含: 20 | 21 | 1. 详细的问题描述; 22 | 2. 至少一个 commit,其内容是**与问题相关的**单元测试。**不要通过随意修改无关文件的方式来提交 PR!** 23 | * 有效的 commit 示例:[#2675](https://github.com/soimort/you-get/pull/2675/files), [#2680](https://github.com/soimort/you-get/pull/2680/files), [#2685](https://github.com/soimort/you-get/pull/2685/files) 24 | 25 | 不符合以上条件的 PR 可能被直接关闭。 26 | 27 | 有效的 PR 将会被一直保留,直至相应的问题得以修复。 28 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2012-2024 Mort Yao and other contributors 4 | (https://github.com/soimort/you-get/graphs/contributors) 5 | Copyright (c) 2012 Boyu Guo 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst 2 | include *.txt 3 | include Makefile 4 | include CONTRIBUTING.md 5 | include README.md 6 | include you-get 7 | include you-get.json 8 | include you-get.plugin.zsh 9 | recursive-include contrib * 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: default i test clean all html rst build install release 2 | 3 | default: i 4 | 5 | i: 6 | @(cd src; python -i -c 'import you_get; print("You-Get %s\n>>> import you_get" % you_get.version.__version__)') 7 | 8 | test: 9 | (cd src; python -m unittest discover -s ../tests) 10 | 11 | clean: 12 | zenity --question 13 | rm -fr build/ dist/ src/*.egg-info/ 14 | find . | grep __pycache__ | xargs rm -fr 15 | find . | grep .pyc | xargs rm -f 16 | 17 | all: build 18 | 19 | html: 20 | pandoc README.md > README.html 21 | 22 | rst: 23 | pandoc -s -t rst README.md > README.rst 24 | 25 | build: 26 | python -m build 27 | 28 | install: 29 | python -m pip install . 30 | 31 | release: build 32 | @echo 'Upload new version to PyPI using:' 33 | @echo ' twine upload --sign dist/you_get-VERSION*' 34 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | You-Get 2 | ======= 3 | 4 | |PyPI version| |Build Status| |Gitter| 5 | 6 | `You-Get `__ is a tiny command-line utility to 7 | download media contents (videos, audios, images) from the Web, in case 8 | there is no other handy way to do it. 9 | 10 | Here's how you use ``you-get`` to download a video from `this web 11 | page `__: 12 | 13 | .. code:: console 14 | 15 | $ you-get http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society 16 | Site: fsf.org 17 | Title: TEDxGE2014_Stallman05_LQ 18 | Type: WebM video (video/webm) 19 | Size: 27.12 MiB (28435804 Bytes) 20 | 21 | Downloading TEDxGE2014_Stallman05_LQ.webm ... 22 | 100.0% ( 27.1/27.1 MB) ├████████████████████████████████████████┤[1/1] 12 MB/s 23 | 24 | And here's why you might want to use it: 25 | 26 | - You enjoyed something on the Internet, and just want to download them 27 | for your own pleasure. 28 | - You watch your favorite videos online from your computer, but you are 29 | prohibited from saving them. You feel that you have no control over 30 | your own computer. (And it's not how an open Web is supposed to 31 | work.) 32 | - You want to get rid of any closed-source technology or proprietary 33 | JavaScript code, and disallow things like Flash running on your 34 | computer. 35 | - You are an adherent of hacker culture and free software. 36 | 37 | What ``you-get`` can do for you: 38 | 39 | - Download videos / audios from popular websites such as YouTube, 40 | Youku, Niconico, and a bunch more. (See the `full list of supported 41 | sites <#supported-sites>`__) 42 | - Stream an online video in your media player. No web browser, no more 43 | ads. 44 | - Download images (of interest) by scraping a web page. 45 | - Download arbitrary non-HTML contents, i.e., binary files. 46 | 47 | Interested? `Install it <#installation>`__ now and `get started by 48 | examples <#getting-started>`__. 49 | 50 | Are you a Python programmer? Then check out `the 51 | source `__ and fork it! 52 | 53 | .. |PyPI version| image:: https://badge.fury.io/py/you-get.png 54 | :target: http://badge.fury.io/py/you-get 55 | .. |Build Status| image:: https://github.com/soimort/you-get/workflows/develop/badge.svg 56 | :target: https://github.com/soimort/you-get/actions 57 | .. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg 58 | :target: https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge 59 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | Please report security issues to . 6 | -------------------------------------------------------------------------------- /contrib/completion/_you-get: -------------------------------------------------------------------------------- 1 | #compdef you-get 2 | 3 | # Zsh completion definition for soimort/you-get. 4 | 5 | setopt localoptions noshwordsplit noksharrays 6 | local -a args 7 | 8 | args=( 9 | '(- : *)'{-V,--version}'[print version and exit]' 10 | '(- : *)'{-h,--help}'[print help and exit]' 11 | '(-i --info)'{-i,--info}'[print extracted information]' 12 | '(-u --url)'{-u,--url}'[print extracted information with URLs]' 13 | '(--json)--json[print extracted URLs in JSON format]' 14 | '(-n --no-merge)'{-n,--no-merge}'[do not merge video parts]' 15 | '(--no-caption)--no-caption[do not download captions]' 16 | '(-f --force)'{-f,--force}'[force overwrite existing files]' 17 | '(-F --format)'{-F,--format}'[set video format to the specified stream id]:stream id' 18 | '(-O --output-filename)'{-O,--output-filename}'[set output filename]:filename:_files' 19 | '(-o --output-dir)'{-o,--output-dir}'[set output directory]:directory:_files -/' 20 | '(-p --player)'{-p,--player}'[stream extracted URL to the specified player]:player and options' 21 | '(-c --cookies)'{-c,--cookies}'[load cookies.txt or cookies.sqlite]:cookies file:_files' 22 | '(-x --http-proxy)'{-x,--http-proxy}'[use the specified HTTP proxy for downloading]:host\:port:' 23 | '(-y --extractor-proxy)'{-y,--extractor-proxy}'[use the specified HTTP proxy for extraction only]:host\:port' 24 | '(--no-proxy)--no-proxy[do not use a proxy]' 25 | '(-t --timeout)'{-t,--timeout}'[set socket timeout]:seconds' 26 | '(-d --debug)'{-d,--debug}'[show traceback and other debug info]' 27 | '*: :_guard "^-*" url' 28 | ) 29 | _arguments -S -s $args 30 | -------------------------------------------------------------------------------- /contrib/completion/you-get-completion.bash: -------------------------------------------------------------------------------- 1 | # Bash completion definition for you-get. 2 | 3 | _you-get () { 4 | COMPREPLY=() 5 | local IFS=$' \n' 6 | local cur=$2 prev=$3 7 | local -a opts_without_arg opts_with_arg 8 | opts_without_arg=( 9 | -V --version -h --help -i --info -u --url --json -n --no-merge 10 | --no-caption -f --force --no-proxy -d --debug 11 | ) 12 | opts_with_arg=( 13 | -F --format -O --output-filename -o --output-dir -p --player 14 | -c --cookies -x --http-proxy -y --extractor-proxy -t --timeout 15 | ) 16 | 17 | # Do not complete non option names 18 | [[ $cur == -* ]] || return 1 19 | 20 | # Do not complete when the previous arg is an option expecting an argument 21 | for opt in "${opts_with_arg[@]}"; do 22 | [[ $opt == $prev ]] && return 1 23 | done 24 | 25 | # Complete option names 26 | COMPREPLY=( $(compgen -W "${opts_without_arg[*]} ${opts_with_arg[*]}" \ 27 | -- "$cur") ) 28 | return 0 29 | } 30 | 31 | complete -F _you-get you-get 32 | -------------------------------------------------------------------------------- /contrib/completion/you-get.fish: -------------------------------------------------------------------------------- 1 | # Fish completion definition for you-get. 2 | 3 | complete -c you-get -s V -l version -d 'print version and exit' 4 | complete -c you-get -s h -l help -d 'print help and exit' 5 | complete -c you-get -s i -l info -d 'print extracted information' 6 | complete -c you-get -s u -l url -d 'print extracted information' 7 | complete -c you-get -l json -d 'print extracted URLs in JSON format' 8 | complete -c you-get -s n -l no-merge -d 'do not merge video parts' 9 | complete -c you-get -l no-caption -d 'do not download captions' 10 | complete -c you-get -s f -l force -d 'force overwrite existing files' 11 | complete -c you-get -s F -l format -x -d 'set video format to the specified stream id' 12 | complete -c you-get -s O -l output-filename -d 'set output filename' \ 13 | -x -a '(__fish_complete_path (commandline -ct) "output filename")' 14 | complete -c you-get -s o -l output-dir -d 'set output directory' \ 15 | -x -a '(__fish_complete_directories (commandline -ct) "output directory")' 16 | complete -c you-get -s p -l player -x -d 'stream extracted URL to the specified player' 17 | complete -c you-get -s c -l cookies -d 'load cookies.txt or cookies.sqlite' \ 18 | -x -a '(__fish_complete_path (commandline -ct) "cookies.txt or cookies.sqlite")' 19 | complete -c you-get -s x -l http-proxy -x -d 'use the specified HTTP proxy for downloading' 20 | complete -c you-get -s y -l extractor-proxy -x -d 'use the specified HTTP proxy for extraction only' 21 | complete -c you-get -l no-proxy -d 'do not use a proxy' 22 | complete -c you-get -s t -l timeout -x -d 'set socket timeout' 23 | complete -c you-get -s d -l debug -d 'show traceback and other debug info' 24 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # runtime dependencies 2 | dukpy 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [build] 2 | force = 0 3 | 4 | [global] 5 | verbose = 0 6 | 7 | [egg_info] 8 | tag_build = 9 | tag_date = 0 10 | tag_svn_revision = 0 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | PROJ_NAME = 'you-get' 4 | PACKAGE_NAME = 'you_get' 5 | 6 | PROJ_METADATA = '%s.json' % PROJ_NAME 7 | 8 | import importlib.util 9 | import importlib.machinery 10 | 11 | def load_source(modname, filename): 12 | loader = importlib.machinery.SourceFileLoader(modname, filename) 13 | spec = importlib.util.spec_from_file_location(modname, filename, loader=loader) 14 | module = importlib.util.module_from_spec(spec) 15 | # The module is always executed and not cached in sys.modules. 16 | # Uncomment the following line to cache the module. 17 | # sys.modules[module.__name__] = module 18 | loader.exec_module(module) 19 | return module 20 | 21 | import os, json 22 | here = os.path.abspath(os.path.dirname(__file__)) 23 | proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read()) 24 | try: 25 | README = open(os.path.join(here, 'README.rst'), encoding='utf-8').read() 26 | except: 27 | README = "" 28 | CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst'), encoding='utf-8').read() 29 | VERSION = load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__ 30 | 31 | from setuptools import setup, find_packages 32 | setup( 33 | name = proj_info['name'], 34 | version = VERSION, 35 | 36 | author = proj_info['author'], 37 | author_email = proj_info['author_email'], 38 | url = proj_info['url'], 39 | license = proj_info['license'], 40 | 41 | description = proj_info['description'], 42 | keywords = proj_info['keywords'], 43 | 44 | long_description = README, 45 | 46 | packages = find_packages('src'), 47 | package_dir = {'' : 'src'}, 48 | 49 | test_suite = 'tests', 50 | 51 | platforms = 'any', 52 | zip_safe = True, 53 | include_package_data = True, 54 | 55 | classifiers = proj_info['classifiers'], 56 | 57 | entry_points = {'console_scripts': proj_info['console_scripts']}, 58 | 59 | install_requires = ['dukpy'], 60 | extras_require = { 61 | 'socks': ['PySocks'], 62 | } 63 | ) 64 | -------------------------------------------------------------------------------- /src/you_get/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This file is Python 2 compliant. 3 | 4 | import sys 5 | 6 | if sys.version_info[0] == 3: 7 | #from .extractor import Extractor, VideoExtractor 8 | #from .util import log 9 | 10 | from .__main__ import * 11 | 12 | #from .common import * 13 | #from .version import * 14 | #from .cli_wrapper import * 15 | #from .extractor import * 16 | else: 17 | # Don't import anything. 18 | pass 19 | -------------------------------------------------------------------------------- /src/you_get/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import getopt 4 | import os 5 | import platform 6 | import sys 7 | from .version import script_name, __version__ 8 | from .util import git, log 9 | 10 | _options = [ 11 | 'help', 12 | 'version', 13 | 'gui', 14 | 'force', 15 | 'playlists', 16 | ] 17 | _short_options = 'hVgfl' 18 | 19 | _help = """Usage: {} [OPTION]... [URL]... 20 | TODO 21 | """.format(script_name) 22 | 23 | # TBD 24 | def main_dev(**kwargs): 25 | """Main entry point. 26 | you-get-dev 27 | """ 28 | 29 | # Get (branch, commit) if running from a git repo. 30 | head = git.get_head(kwargs['repo_path']) 31 | 32 | # Get options and arguments. 33 | try: 34 | opts, args = getopt.getopt(sys.argv[1:], _short_options, _options) 35 | except getopt.GetoptError as e: 36 | log.wtf(""" 37 | [Fatal] {}. 38 | Try '{} --help' for more options.""".format(e, script_name)) 39 | 40 | if not opts and not args: 41 | # Display help. 42 | print(_help) 43 | # Enter GUI mode. 44 | #from .gui import gui_main 45 | #gui_main() 46 | else: 47 | conf = {} 48 | for opt, arg in opts: 49 | if opt in ('-h', '--help'): 50 | # Display help. 51 | print(_help) 52 | 53 | elif opt in ('-V', '--version'): 54 | # Display version. 55 | log.println("you-get:", log.BOLD) 56 | log.println(" version: {}".format(__version__)) 57 | if head is not None: 58 | log.println(" branch: {}\n commit: {}".format(*head)) 59 | else: 60 | log.println(" branch: {}\n commit: {}".format("(stable)", "(tag v{})".format(__version__))) 61 | 62 | log.println(" platform: {}".format(platform.platform())) 63 | log.println(" python: {}".format(sys.version.split('\n')[0])) 64 | 65 | elif opt in ('-g', '--gui'): 66 | # Run using GUI. 67 | conf['gui'] = True 68 | 69 | elif opt in ('-f', '--force'): 70 | # Force download. 71 | conf['force'] = True 72 | 73 | elif opt in ('-l', '--playlist', '--playlists'): 74 | # Download playlist whenever possible. 75 | conf['playlist'] = True 76 | 77 | if args: 78 | if 'gui' in conf and conf['gui']: 79 | # Enter GUI mode. 80 | from .gui import gui_main 81 | gui_main(*args, **conf) 82 | else: 83 | # Enter console mode. 84 | from .console import console_main 85 | console_main(*args, **conf) 86 | 87 | def main(**kwargs): 88 | """Main entry point. 89 | you-get (legacy) 90 | """ 91 | from .common import main 92 | main(**kwargs) 93 | 94 | if __name__ == '__main__': 95 | main() 96 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/__init__.py -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/downloader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/downloader/__init__.py -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/openssl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/openssl/__init__.py -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .mplayer import * 4 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' WIP 4 | def main(): 5 | script_main('you-get', any_download, any_download_playlist) 6 | 7 | if __name__ == "__main__": 8 | main() 9 | ''' 10 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/dragonplayer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/player/dragonplayer.py -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/gnome_mplayer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/player/gnome_mplayer.py -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/mplayer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/player/mplayer.py -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/vlc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/player/wmp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/player/wmp.py -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/transcoder/__init__.py -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/ffmpeg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/transcoder/ffmpeg.py -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/libav.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/transcoder/libav.py -------------------------------------------------------------------------------- /src/you_get/cli_wrapper/transcoder/mencoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/transcoder/mencoder.py -------------------------------------------------------------------------------- /src/you_get/extractors/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .acfun import * 4 | from .alive import * 5 | from .archive import * 6 | from .baidu import * 7 | from .bandcamp import * 8 | from .bigthink import * 9 | from .bilibili import * 10 | from .bokecc import * 11 | from .cbs import * 12 | from .ckplayer import * 13 | from .cntv import * 14 | from .coub import * 15 | from .dailymotion import * 16 | from .douban import * 17 | from .douyin import * 18 | from .douyutv import * 19 | from .ehow import * 20 | from .facebook import * 21 | from .fc2video import * 22 | from .flickr import * 23 | from .freesound import * 24 | from .funshion import * 25 | from .google import * 26 | from .heavymusic import * 27 | from .icourses import * 28 | from .ifeng import * 29 | from .imgur import * 30 | from .infoq import * 31 | from .instagram import * 32 | from .interest import * 33 | from .iqilu import * 34 | from .iqiyi import * 35 | from .joy import * 36 | from .khan import * 37 | from .ku6 import * 38 | from .kakao import * 39 | from .kuaishou import * 40 | from .kugou import * 41 | from .kuwo import * 42 | from .le import * 43 | from .lizhi import * 44 | from .longzhu import * 45 | from .magisto import * 46 | from .metacafe import * 47 | from .mgtv import * 48 | from .miaopai import * 49 | from .miomio import * 50 | from .mixcloud import * 51 | from .mtv81 import * 52 | from .nanagogo import * 53 | from .naver import * 54 | from .netease import * 55 | from .nicovideo import * 56 | from .pinterest import * 57 | from .pixnet import * 58 | from .pptv import * 59 | from .qie import * 60 | from .qingting import * 61 | from .qq import * 62 | from .showroom import * 63 | from .sina import * 64 | from .sohu import * 65 | from .soundcloud import * 66 | from .suntv import * 67 | from .ted import * 68 | from .theplatform import * 69 | from .tiktok import * 70 | from .tucao import * 71 | from .tudou import * 72 | from .tumblr import * 73 | from .twitter import * 74 | from .ucas import * 75 | from .veoh import * 76 | from .vimeo import * 77 | from .vk import * 78 | from .w56 import * 79 | from .wanmen import * 80 | from .xinpianchang import * 81 | from .yixia import * 82 | from .youku import * 83 | from .youtube import * 84 | from .zhanqi import * 85 | from .zhibo import * 86 | from .zhihu import * 87 | -------------------------------------------------------------------------------- /src/you_get/extractors/alive.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['alive_download'] 4 | 5 | from ..common import * 6 | 7 | def alive_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 8 | html = get_html(url) 9 | 10 | title = r1(r'(.*)', html) 37 | assert title 38 | id = r1(r'flvid\s*=\s*(\d+)', html) 39 | assert id 40 | baomihua_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only) 41 | 42 | site_info = "baomihua.com" 43 | download = baomihua_download 44 | download_playlist = playlist_not_supported('baomihua') 45 | -------------------------------------------------------------------------------- /src/you_get/extractors/bigthink.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from ..extractor import VideoExtractor 5 | 6 | import json 7 | 8 | class Bigthink(VideoExtractor): 9 | name = "Bigthink" 10 | 11 | stream_types = [ #this is just a sample. Will make it in prepare() 12 | # {'id': '1080'}, 13 | # {'id': '720'}, 14 | # {'id': '360'}, 15 | # {'id': '288'}, 16 | # {'id': '190'}, 17 | # {'id': '180'}, 18 | 19 | ] 20 | 21 | @staticmethod 22 | def get_streams_by_id(account_number, video_id): 23 | """ 24 | int, int->list 25 | 26 | Get the height of the videos. 27 | 28 | Since brightcove is using 3 kinds of links: rtmp, http and https, 29 | we will be using the HTTPS one to make it secure. 30 | 31 | If somehow akamaihd.net is blocked by the Great Fucking Wall, 32 | change the "startswith https" to http. 33 | """ 34 | endpoint = 'https://edge.api.brightcove.com/playback/v1/accounts/{account_number}/videos/{video_id}'.format(account_number = account_number, video_id = video_id) 35 | fake_header_id = fake_headers 36 | #is this somehow related to the time? Magic.... 37 | fake_header_id['Accept'] ='application/json;pk=BCpkADawqM1cc6wmJQC2tvoXZt4mrB7bFfi6zGt9QnOzprPZcGLE9OMGJwspQwKfuFYuCjAAJ53JdjI8zGFx1ll4rxhYJ255AXH1BQ10rnm34weknpfG-sippyQ' 38 | 39 | html = get_content(endpoint, headers= fake_header_id) 40 | html_json = json.loads(html) 41 | 42 | link_list = [] 43 | 44 | for i in html_json['sources']: 45 | if 'src' in i: #to avoid KeyError 46 | if i['src'].startswith('https'): 47 | link_list.append((str(i['height']), i['src'])) 48 | 49 | return link_list 50 | 51 | def prepare(self, **kwargs): 52 | 53 | html = get_content(self.url) 54 | 55 | self.title = match1(html, r'None 19 | 20 | Keyword arguments: 21 | self: self 22 | vid: The video ID for BokeCC cloud, something like 23 | FE3BB999594978049C33DC5901307461 24 | 25 | Calls the prepare() to download the video. 26 | 27 | If no title is provided, this method shall try to find a proper title 28 | with the information providin within the 29 | returned content of the API.""" 30 | 31 | assert vid 32 | 33 | self.prepare(vid = vid, title = title, **kwargs) 34 | 35 | self.extract(**kwargs) 36 | 37 | self.download(output_dir = output_dir, 38 | merge = merge, 39 | info_only = info_only, **kwargs) 40 | 41 | def prepare(self, vid = '', title = None, **kwargs): 42 | assert vid 43 | 44 | api_url = self.API_ENDPOINT + \ 45 | 'servlet/playinfo?vid={vid}&m=0'.format(vid = vid) #return XML 46 | 47 | html = get_content(api_url) 48 | self.tree = ET.ElementTree(ET.fromstring(html)) 49 | 50 | if self.tree.find('result').text != '1': 51 | log.wtf('API result says failed!') 52 | raise 53 | 54 | if title is None: 55 | self.title = '_'.join([i.text for i in self.tree.iterfind('video/videomarks/videomark/markdesc')]) 56 | else: 57 | self.title = title 58 | 59 | if not title: 60 | self.title = vid 61 | 62 | for i in self.tree.iterfind('video/quality'): 63 | quality = i.attrib ['value'] 64 | url = i[0].attrib['playurl'] 65 | self.stream_types.append({'id': quality, 66 | 'video_profile': i.attrib ['desp']}) 67 | self.streams[quality] = {'url': url, 68 | 'video_profile': i.attrib ['desp']} 69 | self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams] 70 | 71 | 72 | def extract(self, **kwargs): 73 | for i in self.streams: 74 | s = self.streams[i] 75 | _, s['container'], s['size'] = url_info(s['url']) 76 | s['src'] = [s['url']] 77 | if 'stream_id' in kwargs and kwargs['stream_id']: 78 | # Extract the stream 79 | stream_id = kwargs['stream_id'] 80 | 81 | if stream_id not in self.streams: 82 | log.e('[Error] Invalid video format.') 83 | log.e('Run \'-i\' command with no specific video format to view all available formats.') 84 | exit(2) 85 | else: 86 | # Extract stream with the best quality 87 | stream_id = self.streams_sorted[0]['id'] 88 | _, s['container'], s['size'] = url_info(s['url']) 89 | s['src'] = [s['url']] 90 | 91 | site = BokeCC() 92 | 93 | # I don't know how to call the player directly so I just put it here 94 | # just in case anyone touchs it -- Beining@Aug.24.2016 95 | #download = site.download_by_url 96 | #download_playlist = site.download_by_url 97 | 98 | bokecc_download_by_id = site.download_by_id 99 | -------------------------------------------------------------------------------- /src/you_get/extractors/cbs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['cbs_download'] 4 | 5 | from ..common import * 6 | 7 | from .theplatform import theplatform_download_by_pid 8 | 9 | def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 10 | """Downloads CBS videos by URL. 11 | """ 12 | 13 | html = get_content(url) 14 | pid = match1(html, r'video\.settings\.pid\s*=\s*\'([^\']+)\'') 15 | title = match1(html, r'video\.settings\.title\s*=\s*\"([^\"]+)\"') 16 | 17 | theplatform_download_by_pid(pid, title, output_dir=output_dir, merge=merge, info_only=info_only) 18 | 19 | site_info = "CBS.com" 20 | download = cbs_download 21 | download_playlist = playlist_not_supported('cbs') 22 | -------------------------------------------------------------------------------- /src/you_get/extractors/ckplayer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding:utf-8 3 | # Author: Beining -- 4 | # Purpose: A general extractor for CKPlayer 5 | # Created: 03/15/2016 6 | 7 | __all__ = ['ckplayer_download'] 8 | 9 | from xml.etree import ElementTree as ET 10 | from copy import copy 11 | from ..common import * 12 | #---------------------------------------------------------------------- 13 | def ckplayer_get_info_by_xml(ckinfo): 14 | """str->dict 15 | Information for CKPlayer API content.""" 16 | e = ET.XML(ckinfo) 17 | video_dict = {'title': '', 18 | #'duration': 0, 19 | 'links': [], 20 | 'size': 0, 21 | 'flashvars': '',} 22 | dictified = dictify(e)['ckplayer'] 23 | if 'info' in dictified: 24 | if '_text' in dictified['info'][0]['title'][0]: #title 25 | video_dict['title'] = dictified['info'][0]['title'][0]['_text'].strip() 26 | 27 | #if dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip(): #duration 28 | #video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip() 29 | 30 | if '_text' in dictified['video'][0]['size'][0]: #size exists for 1 piece 31 | video_dict['size'] = sum([int(i['size'][0]['_text']) for i in dictified['video']]) 32 | 33 | if '_text' in dictified['video'][0]['file'][0]: #link exist 34 | video_dict['links'] = [i['file'][0]['_text'].strip() for i in dictified['video']] 35 | 36 | if '_text' in dictified['flashvars'][0]: 37 | video_dict['flashvars'] = dictified['flashvars'][0]['_text'].strip() 38 | 39 | return video_dict 40 | 41 | #---------------------------------------------------------------------- 42 | #helper 43 | #https://stackoverflow.com/questions/2148119/how-to-convert-an-xml-string-to-a-dictionary-in-python 44 | def dictify(r,root=True): 45 | if root: 46 | return {r.tag : dictify(r, False)} 47 | d=copy(r.attrib) 48 | if r.text: 49 | d["_text"]=r.text 50 | for x in r.findall("./*"): 51 | if x.tag not in d: 52 | d[x.tag]=[] 53 | d[x.tag].append(dictify(x,False)) 54 | return d 55 | 56 | #---------------------------------------------------------------------- 57 | def ckplayer_download_by_xml(ckinfo, output_dir = '.', merge = False, info_only = False, **kwargs): 58 | #Info XML 59 | video_info = ckplayer_get_info_by_xml(ckinfo) 60 | 61 | try: 62 | title = kwargs['title'] 63 | except: 64 | title = '' 65 | type_ = '' 66 | size = 0 67 | 68 | if len(video_info['links']) > 0: #has link 69 | type_, _ext, size = url_info(video_info['links'][0]) #use 1st to determine type, ext 70 | 71 | if 'size' in video_info: 72 | size = int(video_info['size']) 73 | else: 74 | for i in video_info['links'][1:]: #save 1st one 75 | size += url_info(i)[2] 76 | 77 | print_info(site_info, title, type_, size) 78 | if not info_only: 79 | download_urls(video_info['links'], title, _ext, size, output_dir=output_dir, merge=merge) 80 | 81 | #---------------------------------------------------------------------- 82 | def ckplayer_download(url, output_dir = '.', merge = False, info_only = False, is_xml = True, **kwargs): 83 | if is_xml: #URL is XML URL 84 | try: 85 | title = kwargs['title'] 86 | except: 87 | title = '' 88 | try: 89 | headers = kwargs['headers'] #headers provided 90 | ckinfo = get_content(url, headers = headers) 91 | except NameError: 92 | ckinfo = get_content(url) 93 | 94 | ckplayer_download_by_xml(ckinfo, output_dir, merge, 95 | info_only, title = title) 96 | 97 | site_info = "CKPlayer General" 98 | download = ckplayer_download 99 | download_playlist = playlist_not_supported('ckplayer') 100 | -------------------------------------------------------------------------------- /src/you_get/extractors/cntv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json 4 | import re 5 | 6 | from ..common import get_content, r1, match1, playlist_not_supported 7 | from ..extractor import VideoExtractor 8 | 9 | __all__ = ['cntv_download', 'cntv_download_by_id'] 10 | 11 | 12 | class CNTV(VideoExtractor): 13 | name = 'CNTV.com' 14 | stream_types = [ 15 | {'id': '1', 'video_profile': '1280x720_2000kb/s', 'map_to': 'chapters4'}, 16 | {'id': '2', 'video_profile': '1280x720_1200kb/s', 'map_to': 'chapters3'}, 17 | {'id': '3', 'video_profile': '640x360_850kb/s', 'map_to': 'chapters2'}, 18 | {'id': '4', 'video_profile': '480x270_450kb/s', 'map_to': 'chapters'}, 19 | {'id': '5', 'video_profile': '320x180_200kb/s', 'map_to': 'lowChapters'}, 20 | ] 21 | 22 | ep = 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid={}' 23 | 24 | def __init__(self): 25 | super().__init__() 26 | self.api_data = None 27 | 28 | def prepare(self, **kwargs): 29 | self.api_data = json.loads(get_content(self.__class__.ep.format(self.vid))) 30 | self.title = self.api_data['title'] 31 | for s in self.api_data['video']: 32 | for st in self.__class__.stream_types: 33 | if st['map_to'] == s: 34 | urls = self.api_data['video'][s] 35 | src = [u['url'] for u in urls] 36 | stream_data = dict(src=src, size=0, container='mp4', video_profile=st['video_profile']) 37 | self.streams[st['id']] = stream_data 38 | 39 | 40 | def cntv_download_by_id(rid, **kwargs): 41 | CNTV().download_by_vid(rid, **kwargs) 42 | 43 | 44 | def cntv_download(url, **kwargs): 45 | if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url): 46 | rid = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)') 47 | elif re.match(r'http(s)?://tv\.cctv\.com/\d+/\d+/\d+/\w+.shtml', url): 48 | rid = r1(r'var guid = "(\w+)"', get_content(url)) 49 | elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \ 50 | re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \ 51 | re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \ 52 | re.match(r'http(s)?://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \ 53 | re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): 54 | page = get_content(url) 55 | rid = r1(r'videoCenterId","(\w+)"', page) 56 | if rid is None: 57 | guid = re.search(r'guid\s*=\s*"([0-9a-z]+)"', page).group(1) 58 | rid = guid 59 | elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): 60 | rid = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url) 61 | else: 62 | raise NotImplementedError(url) 63 | 64 | CNTV().download_by_vid(rid, **kwargs) 65 | 66 | site_info = "CNTV.com" 67 | download = cntv_download 68 | download_playlist = playlist_not_supported('cntv') 69 | -------------------------------------------------------------------------------- /src/you_get/extractors/coub.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['coub_download'] 4 | 5 | from ..common import * 6 | from ..processor import ffmpeg 7 | from ..util.fs import legitimize 8 | 9 | 10 | def coub_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 11 | html = get_content(url) 12 | 13 | try: 14 | json_data = get_coub_data(html) 15 | title, video_url, audio_url = get_title_and_urls(json_data) 16 | video_file_name, video_file_path = get_file_path(merge, output_dir, title, video_url) 17 | audio_file_name, audio_file_path = get_file_path(merge, output_dir, title, audio_url) 18 | download_url(audio_url, merge, output_dir, title, info_only) 19 | download_url(video_url, merge, output_dir, title, info_only) 20 | if not info_only: 21 | try: 22 | fix_coub_video_file(video_file_path) 23 | audio_duration = float(ffmpeg.ffprobe_get_media_duration(audio_file_path)) 24 | video_duration = float(ffmpeg.ffprobe_get_media_duration(video_file_path)) 25 | loop_file_path = get_loop_file_path(title, output_dir) 26 | single_file_path = audio_file_path 27 | if audio_duration > video_duration: 28 | write_loop_file(round(audio_duration / video_duration), loop_file_path, video_file_name) 29 | else: 30 | single_file_path = audio_file_path 31 | write_loop_file(round(video_duration / audio_duration), loop_file_path, audio_file_name) 32 | 33 | ffmpeg.ffmpeg_concat_audio_and_video([loop_file_path, single_file_path], title + "_full", "mp4") 34 | cleanup_files([video_file_path, audio_file_path, loop_file_path]) 35 | except EnvironmentError as err: 36 | print("Error preparing full coub video. {}".format(err)) 37 | except Exception as err: 38 | print("Error while downloading files. {}".format(err)) 39 | 40 | 41 | def write_loop_file(records_number, loop_file_path, file_name): 42 | with open(loop_file_path, 'a') as file: 43 | for i in range(records_number): 44 | file.write("file '{}'\n".format(file_name)) 45 | 46 | 47 | def download_url(url, merge, output_dir, title, info_only): 48 | mime, ext, size = url_info(url) 49 | print_info(site_info, title, mime, size) 50 | if not info_only: 51 | download_urls([url], title, ext, size, output_dir, merge=merge) 52 | 53 | 54 | def fix_coub_video_file(file_path): 55 | with open(file_path, 'r+b') as file: 56 | file.seek(0) 57 | file.write(bytes(2)) 58 | 59 | 60 | def get_title_and_urls(json_data): 61 | title = legitimize(re.sub(r'[\s*]', "_", json_data['title'])) 62 | video_info = json_data['file_versions']['html5']['video'] 63 | if 'high' not in video_info: 64 | if 'med' not in video_info: 65 | video_url = video_info['low']['url'] 66 | else: 67 | video_url = video_info['med']['url'] 68 | else: 69 | video_url = video_info['high']['url'] 70 | audio_info = json_data['file_versions']['html5']['audio'] 71 | if 'high' not in audio_info: 72 | if 'med' not in audio_info: 73 | audio_url = audio_info['low']['url'] 74 | else: 75 | audio_url = audio_info['med']['url'] 76 | else: 77 | audio_url = audio_info['high']['url'] 78 | return title, video_url, audio_url 79 | 80 | 81 | def get_coub_data(html): 82 | coub_data = r1(r'))', html) 83 | json_data = json.loads(coub_data) 84 | return json_data 85 | 86 | 87 | def get_file_path(merge, output_dir, title, url): 88 | mime, ext, size = url_info(url) 89 | file_name = get_output_filename([], title, ext, output_dir, merge) 90 | file_path = os.path.join(output_dir, file_name) 91 | return file_name, file_path 92 | 93 | 94 | def get_loop_file_path(title, output_dir): 95 | return os.path.join(output_dir, get_output_filename([], title, "txt", None, False)) 96 | 97 | 98 | def cleanup_files(files): 99 | for file in files: 100 | os.remove(file) 101 | 102 | 103 | site_info = "coub.com" 104 | download = coub_download 105 | download_playlist = playlist_not_supported('coub') 106 | -------------------------------------------------------------------------------- /src/you_get/extractors/dailymotion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['dailymotion_download'] 4 | 5 | from ..common import * 6 | import urllib.parse 7 | 8 | def rebuilt_url(url): 9 | path = urllib.parse.urlparse(url).path 10 | aid = path.split('/')[-1].split('_')[0] 11 | return 'http://www.dailymotion.com/embed/video/{}?autoplay=1'.format(aid) 12 | 13 | def dailymotion_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 14 | """Downloads Dailymotion videos by URL. 15 | """ 16 | 17 | html = get_content(rebuilt_url(url)) 18 | info = json.loads(match1(html, r'qualities":({.+?}),"')) 19 | title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \ 20 | match1(html, r'"title"\s*:\s*"([^"]+)"') 21 | title = unicodize(title) 22 | 23 | for quality in ['1080','720','480','380','240','144','auto']: 24 | try: 25 | real_url = info[quality][1]["url"] 26 | if real_url: 27 | break 28 | except KeyError: 29 | pass 30 | 31 | mime, ext, size = url_info(real_url) 32 | 33 | print_info(site_info, title, mime, size) 34 | if not info_only: 35 | download_urls([real_url], title, ext, size, output_dir=output_dir, merge=merge) 36 | 37 | site_info = "Dailymotion.com" 38 | download = dailymotion_download 39 | download_playlist = playlist_not_supported('dailymotion') 40 | -------------------------------------------------------------------------------- /src/you_get/extractors/douban.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['douban_download'] 4 | 5 | import urllib.request, urllib.parse 6 | from ..common import * 7 | 8 | def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 9 | html = get_html(url) 10 | 11 | if re.match(r'https?://movie', url): 12 | title = match1(html, 'name="description" content="([^"]+)') 13 | tid = match1(url, r'trailer/(\d+)') 14 | real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid 15 | type, ext, size = url_info(real_url) 16 | 17 | print_info(site_info, title, type, size) 18 | if not info_only: 19 | download_urls([real_url], title, ext, size, output_dir, merge = merge) 20 | 21 | elif 'subject' in url: 22 | titles = re.findall(r'data-title="([^"]*)">', html) 23 | song_id = re.findall(r'
  • (.+?)' 20 | 21 | hit = re.search(patt, url) 22 | if hit is None: 23 | log.wtf('Unknown url pattern') 24 | vid = hit.group(1) 25 | 26 | page = get_content(url, headers=headers) 27 | hit = re.search(title_patt, page) 28 | if hit is None: 29 | title = vid 30 | else: 31 | title = hit.group(1) 32 | 33 | meta = json.loads(get_content(ep + vid)) 34 | if meta['error'] != 0: 35 | log.wtf('Error from API server') 36 | m3u8_url = meta['data']['video_url'] 37 | print_info('Douyu Video', title, 'm3u8', 0, m3u8_url=m3u8_url) 38 | if not info_only: 39 | urls = general_m3u8_extractor(m3u8_url) 40 | download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs) 41 | 42 | 43 | def douyutv_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 44 | if 'v.douyu.com/show/' in url: 45 | douyutv_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) 46 | return 47 | 48 | url = re.sub(r'.*douyu.com','https://m.douyu.com/room', url) 49 | html = get_content(url, headers) 50 | room_id_patt = r'"rid"\s*:\s*(\d+),' 51 | room_id = match1(html, room_id_patt) 52 | if room_id == "0": 53 | room_id = url[url.rfind('/') + 1:] 54 | 55 | api_url = "http://www.douyutv.com/api/v1/" 56 | args = "room/%s?aid=wp&client_sys=wp&time=%d" % (room_id, int(time.time())) 57 | auth_md5 = (args + "zNzMV1y4EMxOHS6I5WKm").encode("utf-8") 58 | auth_str = hashlib.md5(auth_md5).hexdigest() 59 | json_request_url = "%s%s&auth=%s" % (api_url, args, auth_str) 60 | 61 | content = get_content(json_request_url, headers) 62 | json_content = json.loads(content) 63 | data = json_content['data'] 64 | server_status = json_content.get('error', 0) 65 | if server_status != 0: 66 | raise ValueError("Server returned error:%s" % server_status) 67 | 68 | title = data.get('room_name') 69 | show_status = data.get('show_status') 70 | if show_status != "1": 71 | raise ValueError("The live stream is not online! (Errno:%s)" % server_status) 72 | 73 | real_url = data.get('rtmp_url') + '/' + data.get('rtmp_live') 74 | 75 | print_info(site_info, title, 'flv', float('inf')) 76 | if not info_only: 77 | download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge) 78 | 79 | 80 | site_info = "douyu.com" 81 | download = douyutv_download 82 | download_playlist = playlist_not_supported('douyu') 83 | -------------------------------------------------------------------------------- /src/you_get/extractors/ehow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['ehow_download'] 4 | 5 | from ..common import * 6 | 7 | def ehow_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 8 | 9 | assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported" 10 | 11 | html = get_html(url) 12 | contentid = r1(r'', html) 13 | vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html) 14 | assert vid 15 | 16 | xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid) 17 | 18 | from xml.dom.minidom import parseString 19 | doc = parseString(xml) 20 | tab = doc.getElementsByTagName('related')[0].firstChild 21 | 22 | for video in tab.childNodes: 23 | if re.search(contentid, video.attributes['link'].value): 24 | url = video.attributes['flv'].value 25 | break 26 | 27 | title = video.attributes['title'].value 28 | assert title 29 | 30 | type, ext, size = url_info(url) 31 | print_info(site_info, title, type, size) 32 | 33 | if not info_only: 34 | download_urls([url], title, ext, size, output_dir, merge = merge) 35 | 36 | site_info = "ehow.com" 37 | download = ehow_download 38 | download_playlist = playlist_not_supported('ehow') 39 | -------------------------------------------------------------------------------- /src/you_get/extractors/facebook.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['facebook_download'] 4 | 5 | from ..common import * 6 | 7 | def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | url = re.sub(r'//.*?facebook.com','//facebook.com',url) 9 | html = get_html(url) 10 | 11 | title = r1(r'(.+)', html) 12 | 13 | if title is None: 14 | title = url 15 | 16 | sd_urls = list(set([ 17 | unicodize(str.replace(i, '\\/', '/')) 18 | for i in re.findall(r'sd_src_no_ratelimit:"([^"]*)"', html) 19 | ])) 20 | hd_urls = list(set([ 21 | unicodize(str.replace(i, '\\/', '/')) 22 | for i in re.findall(r'hd_src_no_ratelimit:"([^"]*)"', html) 23 | ])) 24 | urls = hd_urls if hd_urls else sd_urls 25 | 26 | type, ext, size = url_info(urls[0], True) 27 | size = urls_size(urls) 28 | 29 | print_info(site_info, title, type, size) 30 | if not info_only: 31 | download_urls(urls, title, ext, size, output_dir, merge=False) 32 | 33 | site_info = "Facebook.com" 34 | download = facebook_download 35 | download_playlist = playlist_not_supported('facebook') 36 | -------------------------------------------------------------------------------- /src/you_get/extractors/fc2video.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['fc2video_download'] 4 | 5 | from ..common import * 6 | from hashlib import md5 7 | from urllib.parse import urlparse 8 | 9 | #---------------------------------------------------------------------- 10 | def makeMimi(upid): 11 | """From http://cdn37.atwikiimg.com/sitescript/pub/dksitescript/FC2.site.js 12 | Also com.hps.util.fc2.FC2EncrptUtil.makeMimiLocal 13 | L110""" 14 | strSeed = "gGddgPfeaf_gzyr" 15 | prehash = upid + "_" + strSeed 16 | return md5(prehash.encode('utf-8')).hexdigest() 17 | 18 | #---------------------------------------------------------------------- 19 | def fc2video_download_by_upid(upid, output_dir = '.', merge = True, info_only = False, **kwargs): 20 | """""" 21 | fake_headers = { 22 | 'DNT': '1', 23 | 'Accept-Encoding': 'gzip, deflate, sdch', 24 | 'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2', 25 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.58 Safari/537.36', 26 | 'Accept': '*/*', 27 | 'X-Requested-With': 'ShockwaveFlash/19.0.0.245', 28 | 'Connection': 'keep-alive', 29 | } 30 | api_base = 'http://video.fc2.com/ginfo.php?upid={upid}&mimi={mimi}'.format(upid = upid, mimi = makeMimi(upid)) 31 | html = get_content(api_base, headers=fake_headers) 32 | 33 | video_url = match1(html, r'filepath=(.+)&sec') 34 | video_url = video_url.replace('&mid', '?mid') 35 | 36 | title = match1(html, r'&title=([^&]+)') 37 | 38 | type, ext, size = url_info(video_url, headers=fake_headers) 39 | 40 | print_info(site_info, title, type, size) 41 | if not info_only: 42 | download_urls([video_url], title, ext, size, output_dir, merge=merge, headers = fake_headers) 43 | 44 | #---------------------------------------------------------------------- 45 | def fc2video_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 46 | """wrapper""" 47 | #'http://video.fc2.com/en/content/20151021bTVKnbEw' 48 | #'http://xiaojiadianvideo.asia/content/20151021bTVKnbEw' 49 | #'http://video.fc2.com/ja/content/20151021bTVKnbEw' 50 | #'http://video.fc2.com/tw/content/20151021bTVKnbEw' 51 | hostname = urlparse(url).hostname 52 | if not ('fc2.com' in hostname or 'xiaojiadianvideo.asia' in hostname): 53 | return False 54 | upid = match1(url, r'.+/content/(\w+)') 55 | 56 | fc2video_download_by_upid(upid, output_dir, merge, info_only) 57 | 58 | site_info = "FC2Video" 59 | download = fc2video_download 60 | download_playlist = playlist_not_supported('fc2video') 61 | -------------------------------------------------------------------------------- /src/you_get/extractors/freesound.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['freesound_download'] 4 | 5 | from ..common import * 6 | 7 | def freesound_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 8 | page = get_html(url) 9 | 10 | title = r1(r'', html) 13 | ])) 14 | 15 | title = r1(r'', html) 16 | 17 | if title is None: 18 | title = url[0] 19 | 20 | type, ext, size = url_info(url[0], True) 21 | size = urls_size(url) 22 | 23 | type = "video/mp4" 24 | ext = "mp4" 25 | 26 | print_info(site_info, title, type, size) 27 | if not info_only: 28 | download_urls(url, title, ext, size, output_dir, merge=False) 29 | 30 | site_info = "Giphy.com" 31 | download = giphy_download 32 | download_playlist = playlist_not_supported('giphy') 33 | -------------------------------------------------------------------------------- /src/you_get/extractors/heavymusic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['heavymusic_download'] 4 | 5 | from ..common import * 6 | 7 | def heavymusic_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | html = get_html(url) 9 | tracks = re.findall(r'href="(online2\.php[^"]+)"', html) 10 | for track in tracks: 11 | band = r1(r'band=([^&]*)', track) 12 | album = r1(r'album=([^&]*)', track) 13 | title = r1(r'track=([^&]*)', track) 14 | file_url = 'http://www.heavy-music.ru/online2.php?band=%s&album=%s&track=%s' % (parse.quote(band), parse.quote(album), parse.quote(title)) 15 | _, _, size = url_info(file_url) 16 | 17 | print_info(site_info, title, 'mp3', size) 18 | if not info_only: 19 | download_urls([file_url], title[:-4], 'mp3', size, output_dir, merge=merge) 20 | 21 | site_info = "heavy-music.ru" 22 | download = heavymusic_download 23 | download_playlist = heavymusic_download 24 | -------------------------------------------------------------------------------- /src/you_get/extractors/huomaotv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['huomaotv_download'] 4 | 5 | from ..common import * 6 | 7 | 8 | def get_mobile_room_url(room_id): 9 | return 'http://www.huomao.com/mobile/mob_live/%s' % room_id 10 | 11 | 12 | def get_m3u8_url(stream_id): 13 | return 'http://live-ws.huomaotv.cn/live/%s/playlist.m3u8' % stream_id 14 | 15 | 16 | def huomaotv_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 17 | room_id_pattern = r'huomao.com/(\d+)' 18 | room_id = match1(url, room_id_pattern) 19 | html = get_content(get_mobile_room_url(room_id)) 20 | 21 | stream_id_pattern = r'id="html_stream" value="(\w+)"' 22 | stream_id = match1(html, stream_id_pattern) 23 | 24 | m3u8_url = get_m3u8_url(stream_id) 25 | 26 | title = match1(html, r'([^<]{1,9999})') 27 | 28 | print_info(site_info, title, 'm3u8', float('inf')) 29 | 30 | if not info_only: 31 | download_url_ffmpeg(m3u8_url, title, 'm3u8', None, output_dir=output_dir, merge=merge) 32 | 33 | 34 | site_info = 'huomao.com' 35 | download = huomaotv_download 36 | download_playlist = playlist_not_supported('huomao') 37 | -------------------------------------------------------------------------------- /src/you_get/extractors/ifeng.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['ifeng_download', 'ifeng_download_by_id'] 4 | 5 | from ..common import * 6 | 7 | def ifeng_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): 8 | assert r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', id), id 9 | url = 'http://vxml.ifengimg.com/video_info_new/%s/%s/%s.xml' % (id[-2], id[-2:], id) 10 | xml = get_html(url, 'utf-8') 11 | title = r1(r'Name="([^"]+)"', xml) 12 | title = unescape_html(title) 13 | url = r1(r'VideoPlayUrl="([^"]+)"', xml) 14 | from random import randint 15 | r = randint(10, 19) 16 | url = url.replace('http://wideo.ifeng.com/', 'http://ips.ifeng.com/wideo.ifeng.com/') 17 | type, ext, size = url_info(url) 18 | 19 | print_info(site_info, title, ext, size) 20 | if not info_only: 21 | download_urls([url], title, ext, size, output_dir, merge = merge) 22 | 23 | def ifeng_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 24 | # old pattern /uuid.shtml 25 | # now it could be #uuid 26 | id = r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', url) 27 | if id: 28 | return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only) 29 | 30 | html = get_content(url) 31 | uuid_pattern = r'"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"' 32 | id = r1(r'var vid="([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"', html) 33 | if id is None: 34 | video_pattern = r'"vid"\s*:\s*' + uuid_pattern 35 | id = match1(html, video_pattern) 36 | assert id, "can't find video info" 37 | return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only) 38 | 39 | site_info = "ifeng.com" 40 | download = ifeng_download 41 | download_playlist = playlist_not_supported('ifeng') 42 | -------------------------------------------------------------------------------- /src/you_get/extractors/imgur.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from ..extractor import VideoExtractor 5 | from .universal import * 6 | 7 | class Imgur(VideoExtractor): 8 | name = "Imgur" 9 | 10 | stream_types = [ 11 | {'id': 'original'}, 12 | {'id': 'thumbnail'}, 13 | ] 14 | 15 | def prepare(self, **kwargs): 16 | self.ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/123.0.2420.97' 17 | 18 | if re.search(r'imgur\.com/a/', self.url): 19 | # album 20 | content = get_content(self.url, headers=fake_headers) 21 | album = match1(content, r'album\s*:\s*({.*}),') or \ 22 | match1(content, r'image\s*:\s*({.*}),') 23 | album = json.loads(album) 24 | count = album['album_images']['count'] 25 | images = album['album_images']['images'] 26 | ext = images[0]['ext'] 27 | self.streams = { 28 | 'original': { 29 | 'src': ['http://i.imgur.com/%s%s' % (i['hash'], ext) 30 | for i in images], 31 | 'size': sum([i['size'] for i in images]), 32 | 'container': ext[1:] 33 | }, 34 | 'thumbnail': { 35 | 'src': ['http://i.imgur.com/%ss%s' % (i['hash'], '.jpg') 36 | for i in images], 37 | 'container': 'jpg' 38 | } 39 | } 40 | self.title = album['title'] 41 | 42 | elif re.search(r'i\.imgur\.com/', self.url): 43 | # direct image 44 | _, container, size = url_info(self.url, faker=True) 45 | self.streams = { 46 | 'original': { 47 | 'src': [self.url], 48 | 'size': size, 49 | 'container': container 50 | } 51 | } 52 | self.title = r1(r'i\.imgur\.com/([^./]*)', self.url) 53 | 54 | else: 55 | # gallery image 56 | content = get_content(self.url, headers=fake_headers) 57 | url = match1(content, r'meta property="og:video"[^>]+(https?://i.imgur.com/[^"?]+)') or \ 58 | match1(content, r'meta property="og:image"[^>]+(https?://i.imgur.com/[^"?]+)') 59 | _, container, size = url_info(url, headers={'User-Agent': fake_headers['User-Agent']}) 60 | self.streams = { 61 | 'original': { 62 | 'src': [url], 63 | 'size': size, 64 | 'container': container 65 | } 66 | } 67 | self.title = r1(r'i\.imgur\.com/([^./]*)', url) 68 | 69 | def extract(self, **kwargs): 70 | if 'stream_id' in kwargs and kwargs['stream_id']: 71 | i = kwargs['stream_id'] 72 | if 'size' not in self.streams[i]: 73 | self.streams[i]['size'] = urls_size(self.streams[i]['src']) 74 | 75 | site = Imgur() 76 | download = site.download_by_url 77 | download_playlist = site.download_by_url 78 | -------------------------------------------------------------------------------- /src/you_get/extractors/infoq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from ..extractor import VideoExtractor 5 | 6 | import ssl 7 | 8 | class Infoq(VideoExtractor): 9 | name = "InfoQ" 10 | 11 | stream_types = [ 12 | {'id': 'video'}, 13 | {'id': 'audio'}, 14 | {'id': 'slides'} 15 | ] 16 | 17 | def prepare(self, **kwargs): 18 | content = get_content(self.url) 19 | self.title = match1(content, r'([^<]+)') 20 | s = match1(content, r'P\.s\s*=\s*\'([^\']+)\'') 21 | scp = match1(content, r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'') 22 | scs = match1(content, r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'') 23 | sck = match1(content, r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'') 24 | 25 | mp3 = match1(content, r'name="filename"\s*value="([^"]+\.mp3)"') 26 | if mp3: mp3 = 'http://res.infoq.com/downloads/mp3downloads/%s' % mp3 27 | 28 | pdf = match1(content, r'name="filename"\s*value="([^"]+\.pdf)"') 29 | if pdf: pdf = 'http://res.infoq.com/downloads/pdfdownloads/%s' % pdf 30 | 31 | # cookie handler 32 | ssl_context = request.HTTPSHandler( 33 | context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) 34 | cookie_handler = request.HTTPCookieProcessor() 35 | opener = request.build_opener(ssl_context, cookie_handler) 36 | opener.addheaders = [ 37 | ('Referer', self.url), 38 | ('Cookie', 39 | 'CloudFront-Policy=%s;CloudFront-Signature=%s;CloudFront-Key-Pair-Id=%s' % (scp, scs, sck)) 40 | ] 41 | request.install_opener(opener) 42 | 43 | if s: self.streams['video'] = {'url': s } 44 | if mp3: self.streams['audio'] = { 'url': mp3 } 45 | if pdf: self.streams['slides'] = { 'url': pdf } 46 | 47 | def extract(self, **kwargs): 48 | for i in self.streams: 49 | s = self.streams[i] 50 | _, s['container'], s['size'] = url_info(s['url']) 51 | s['src'] = [s['url']] 52 | 53 | site = Infoq() 54 | download = site.download_by_url 55 | download_playlist = site.download_by_url 56 | -------------------------------------------------------------------------------- /src/you_get/extractors/instagram.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['instagram_download'] 4 | 5 | from ..common import * 6 | 7 | def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | headers = { 9 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.2592.87', 10 | 'sec-fetch-mode': 'navigate' # important 11 | } 12 | 13 | url = r1(r'([^?]*)', url) 14 | cont = get_content(url, headers=headers) 15 | 16 | vid = r1(r'instagram.com/\w+/([^/]+)', url) 17 | description = r1(r'([^<]*)', cont) # with logged-in cookies 19 | title = "{} [{}]".format(description.replace("\n", " "), vid) 20 | 21 | appId = r1(r'"appId":"(\d+)"', cont) 22 | media_id = r1(r'"media_id":"(\d+)"', cont) 23 | logging.debug('appId: %s' % appId) 24 | logging.debug('media_id: %s' % media_id) 25 | 26 | api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id 27 | try: 28 | api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}}) 29 | post = json.loads(api_cont) 30 | except: 31 | log.wtf('[Error] Please specify a cookie file.') 32 | 33 | for item in post['items']: 34 | code = item['code'] 35 | carousel_media = item.get('carousel_media') or [item] 36 | for i, media in enumerate(carousel_media): 37 | title = '%s [%s]' % (code, i) 38 | image_url = media['image_versions2']['candidates'][0]['url'] 39 | ext = image_url.split('?')[0].split('.')[-1] 40 | size = int(get_head(image_url)['Content-Length']) 41 | 42 | print_info(site_info, title, ext, size) 43 | if not info_only: 44 | download_urls(urls=[image_url], 45 | title=title, 46 | ext=ext, 47 | total_size=size, 48 | output_dir=output_dir) 49 | 50 | # download videos (if any) 51 | if 'video_versions' in media: 52 | video_url = media['video_versions'][0]['url'] 53 | ext = video_url.split('?')[0].split('.')[-1] 54 | size = int(get_head(video_url)['Content-Length']) 55 | 56 | print_info(site_info, title, ext, size) 57 | if not info_only: 58 | download_urls(urls=[video_url], 59 | title=title, 60 | ext=ext, 61 | total_size=size, 62 | output_dir=output_dir) 63 | 64 | site_info = "Instagram.com" 65 | download = instagram_download 66 | download_playlist = playlist_not_supported('instagram') 67 | -------------------------------------------------------------------------------- /src/you_get/extractors/interest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from json import loads 5 | 6 | def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 7 | #http://ch.interest.me/zhtv/VOD/View/114789 8 | #http://program.interest.me/zhtv/sonja/8/Vod/View/15794 9 | html = get_content(url) 10 | #get title 11 | title = match1(html, r'(.*)', html) 27 | api_url = video_url + '/api/video/' + video_hash 28 | content = get_content(api_url, headers=headers) 29 | data = json.loads(content) 30 | if len(data)<1 : 31 | print('Maybe is Private Video?'+'['+title+']') 32 | return True; 33 | down_urls = 'https:' + data[0]['uri'] 34 | type, ext, size = url_info(down_urls, headers=headers) 35 | print_info(site_info, title+data[0]['resolution'], type, size) 36 | 37 | if not info_only: 38 | download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers) 39 | 40 | def download_playlist_by_url( url, **kwargs): 41 | video_page = get_html(url) 42 | url_first=match1(url, r"(http[s]?://[^/]+)") 43 | videos = set(re.findall(r'0): 45 | for video in videos: 46 | iwara_download(url_first+video, **kwargs) 47 | else: 48 | maybe_print('this page not found any videos') 49 | site_info = "Iwara" 50 | download = iwara_download 51 | download_playlist = download_playlist_by_url 52 | -------------------------------------------------------------------------------- /src/you_get/extractors/joy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['joy_download'] 4 | 5 | from ..common import * 6 | 7 | def video_info(channel_id, program_id, volumn_id): 8 | url = 'http://msx.app.joy.cn/service.php' 9 | if program_id: 10 | url += '?action=vodmsxv6' 11 | url += '&channelid=%s' % channel_id 12 | url += '&programid=%s' % program_id 13 | url += '&volumnid=%s' % volumn_id 14 | else: 15 | url += '?action=msxv6' 16 | url += '&videoid=%s' % volumn_id 17 | 18 | xml = get_html(url) 19 | 20 | name = r1(r'(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?', xml) 21 | urls = re.findall(r']*>(?:)?', xml) 22 | hostpath = r1(r']*>(?:)?', xml) 23 | 24 | return name, urls, hostpath 25 | 26 | def joy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 27 | channel_id = r1(r'[^_]channelId\s*:\s*"([^\"]+)"', get_html(url)) 28 | program_id = r1(r'[^_]programId\s*:\s*"([^\"]+)"', get_html(url)) 29 | volumn_id = r1(r'[^_]videoId\s*:\s*"([^\"]+)"', get_html(url)) 30 | 31 | title, urls, hostpath = video_info(channel_id, program_id, volumn_id) 32 | urls = [hostpath + url for url in urls] 33 | 34 | size = 0 35 | for url in urls: 36 | _, ext, temp = url_info(url) 37 | size += temp 38 | 39 | print_info(site_info, title, ext, size) 40 | if not info_only: 41 | download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) 42 | 43 | site_info = "Joy.cn" 44 | download = joy_download 45 | download_playlist = playlist_not_supported('joy') 46 | -------------------------------------------------------------------------------- /src/you_get/extractors/kakao.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from .universal import * 5 | 6 | __all__ = ['kakao_download'] 7 | 8 | 9 | def kakao_download(url, output_dir='.', info_only=False, **kwargs): 10 | json_request_url = 'https://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?vid={}' 11 | 12 | # in this implementation playlist not supported so use url_without_playlist 13 | # if want to support playlist need to change that 14 | if re.search('playlistId', url): 15 | url = re.search(r"(.+)\?.+?", url).group(1) 16 | 17 | page = get_content(url) 18 | try: 19 | vid = re.search(r"", page).group(1) 20 | title = re.search(r"", page).group(1) 21 | 22 | meta_str = get_content(json_request_url.format(vid)) 23 | meta_json = json.loads(meta_str) 24 | 25 | standard_preset = meta_json['output_list']['standard_preset'] 26 | output_videos = meta_json['output_list']['output_list'] 27 | size = '' 28 | if meta_json['svcname'] == 'smr_pip': 29 | for v in output_videos: 30 | if v['preset'] == 'mp4_PIP_SMR_480P': 31 | size = int(v['filesize']) 32 | break 33 | else: 34 | for v in output_videos: 35 | if v['preset'] == standard_preset: 36 | size = int(v['filesize']) 37 | break 38 | 39 | video_url = meta_json['location']['url'] 40 | 41 | print_info(site_info, title, 'mp4', size) 42 | if not info_only: 43 | download_urls([video_url], title, 'mp4', size, output_dir, **kwargs) 44 | except: 45 | universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs) 46 | 47 | 48 | site_info = "tv.kakao.com" 49 | download = kakao_download 50 | download_playlist = playlist_not_supported('kakao') 51 | -------------------------------------------------------------------------------- /src/you_get/extractors/khan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['khan_download'] 4 | 5 | from ..common import * 6 | from .youtube import YouTube 7 | 8 | def khan_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | html = get_content(url) 10 | youtube_url = re.search('(.*?)", page).group(1) 23 | size = url_size(video_url) 24 | video_format = "flv"#video_url.split('.')[-1] 25 | print_info(site_info, title, video_format, size) 26 | if not info_only: 27 | download_urls([video_url], title, video_format, size, **kwargs) 28 | except:# extract image 29 | og_image_url = re.search(r"", page).group(1) 30 | image_url = og_image_url 31 | title = url.split('/')[-1] 32 | size = url_size(image_url) 33 | image_format = image_url.split('.')[-1] 34 | print_info(site_info, title, image_format, size) 35 | if not info_only: 36 | download_urls([image_url], title, image_format, size, **kwargs) 37 | 38 | site_info = "kuaishou.com" 39 | download = kuaishou_download_by_url 40 | download_playlist = playlist_not_supported('kuaishou') 41 | -------------------------------------------------------------------------------- /src/you_get/extractors/kugou.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['kugou_download'] 4 | 5 | from ..common import * 6 | from json import loads 7 | from base64 import b64decode 8 | import re 9 | 10 | 11 | def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs): 12 | if url.lower().find("5sing") != -1: 13 | # for 5sing.kugou.com 14 | html = get_html(url) 15 | ticket = r1(r'"ticket":\s*"(.*)"', html) 16 | j = loads(str(b64decode(ticket), encoding="utf-8")) 17 | url = j['file'] 18 | title = j['songName'] 19 | songtype, ext, size = url_info(url) 20 | print_info(site_info, title, songtype, size) 21 | if not info_only: 22 | download_urls([url], title, ext, size, output_dir, merge=merge) 23 | elif url.lower().find("hash") != -1: 24 | return kugou_download_by_hash(url, output_dir, merge, info_only) 25 | else: 26 | # for the www.kugou.com/ 27 | return kugou_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only) 28 | # raise NotImplementedError(url) 29 | 30 | 31 | def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False): 32 | # sample 33 | # url_sample:http://www.kugou.com/song/#hash=93F7D2FC6E95424739448218B591AEAF&album_id=9019462 34 | hash_val = match1(url, r'hash=(\w+)') 35 | album_id = match1(url, r'album_id=(\d+)') 36 | if not album_id: 37 | album_id = 123 38 | html = get_html("http://www.kugou.com/yy/index.php?r=play/getdata&hash={}&album_id={}&mid=123".format(hash_val, album_id)) 39 | j = loads(html) 40 | url = j['data']['play_url'] 41 | title = j['data']['audio_name'] 42 | # some songs cann't play because of copyright protection 43 | if (url == ''): 44 | return 45 | songtype, ext, size = url_info(url) 46 | print_info(site_info, title, songtype, size) 47 | if not info_only: 48 | download_urls([url], title, ext, size, output_dir, merge=merge) 49 | 50 | 51 | def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs): 52 | urls = [] 53 | 54 | # download music leaderboard 55 | # sample: http://www.kugou.com/yy/html/rank.html 56 | if url.lower().find('rank') != -1: 57 | html = get_html(url) 58 | pattern = re.compile('(.*)") 11 | #to get title 12 | #format =aac|mp3 ->to get aac format=mp3 ->to get mp3 13 | url=get_content("http://antiserver.kuwo.cn/anti.s?format=mp3&rid=MUSIC_%s&type=convert_url&response=url"%rid) 14 | songtype, ext, size = url_info(url) 15 | print_info(site_info, title, songtype, size) 16 | if not info_only: 17 | download_urls([url], title, ext, size, output_dir) 18 | 19 | def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 20 | html=get_content(url) 21 | matched=set(re.compile(r"yinyue/(\d+)").findall(html))#reduce duplicated 22 | for rid in matched: 23 | kuwo_download_by_rid(rid,output_dir,merge,info_only) 24 | 25 | 26 | 27 | def kuwo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 28 | if "www.kuwo.cn/yinyue" in url: 29 | rid=match1(url, r'yinyue/(\d+)') 30 | kuwo_download_by_rid(rid,output_dir, merge, info_only) 31 | else: 32 | kuwo_playlist_download(url,output_dir,merge,info_only) 33 | 34 | site_info = "kuwo.cn" 35 | download = kuwo_download 36 | # download_playlist = playlist_not_supported("kugou") 37 | # download_playlist=playlist_not_supported("kuwo") 38 | download_playlist=kuwo_playlist_download 39 | -------------------------------------------------------------------------------- /src/you_get/extractors/lizhi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['lizhi_download'] 4 | import json 5 | import datetime 6 | from ..common import * 7 | 8 | # 9 | # Worked well but not perfect. 10 | # TODO: add option --format={sd|hd} 11 | # 12 | def get_url(ep): 13 | readable = datetime.datetime.fromtimestamp(int(ep['create_time']) / 1000).strftime('%Y/%m/%d') 14 | return 'http://cdn5.lizhi.fm/audio/{}/{}_hd.mp3'.format(readable, ep['id']) 15 | 16 | # radio_id: e.g. 549759 from http://www.lizhi.fm/549759/ 17 | # 18 | # Returns a list of tuples (audio_id, title, url) for each episode 19 | # (audio) in the radio playlist. url is the direct link to the audio 20 | # file. 21 | def lizhi_extract_playlist_info(radio_id): 22 | # /api/radio_audios API parameters: 23 | # 24 | # - s: starting episode 25 | # - l: count (per page) 26 | # - band: radio_id 27 | # 28 | # We use l=65535 for poor man's pagination (that is, no pagination 29 | # at all -- hope all fits on a single page). 30 | # 31 | # TODO: Use /api/radio?band={radio_id} to get number of episodes 32 | # (au_cnt), then handle pagination properly. 33 | api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id 34 | api_response = json.loads(get_content(api_url)) 35 | return [(ep['id'], ep['name'], get_url(ep)) for ep in api_response] 36 | 37 | def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False): 38 | filetype, ext, size = url_info(url) 39 | print_info(site_info, title, filetype, size) 40 | if not info_only: 41 | download_urls([url], title, ext, size, output_dir=output_dir) 42 | 43 | def lizhi_download_playlist(url, output_dir='.', info_only=False, **kwargs): 44 | # Sample URL: http://www.lizhi.fm/549759/ 45 | radio_id = match1(url,r'/(\d+)') 46 | if not radio_id: 47 | raise NotImplementedError('%s not supported' % url) 48 | for audio_id, title, url in lizhi_extract_playlist_info(radio_id): 49 | lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only) 50 | 51 | def lizhi_download(url, output_dir='.', info_only=False, **kwargs): 52 | # Sample URL: http://www.lizhi.fm/549759/18864883431656710/ 53 | m = re.search(r'/(?P\d+)/(?P\d+)', url) 54 | if not m: 55 | raise NotImplementedError('%s not supported' % url) 56 | radio_id = m.group('radio_id') 57 | audio_id = m.group('audio_id') 58 | # Look for the audio_id among the full list of episodes 59 | for aid, title, url in lizhi_extract_playlist_info(radio_id): 60 | if aid == audio_id: 61 | lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only) 62 | break 63 | else: 64 | raise NotImplementedError('Audio #%s not found in playlist #%s' % (audio_id, radio_id)) 65 | 66 | site_info = "lizhi.fm" 67 | download = lizhi_download 68 | download_playlist = lizhi_download_playlist 69 | -------------------------------------------------------------------------------- /src/you_get/extractors/longzhu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['longzhu_download'] 4 | 5 | import json 6 | from ..common import ( 7 | get_content, 8 | general_m3u8_extractor, 9 | match1, 10 | print_info, 11 | download_urls, 12 | playlist_not_supported, 13 | ) 14 | from ..common import player 15 | 16 | def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwargs): 17 | web_domain = url.split('/')[2] 18 | if (web_domain == 'star.longzhu.com') or (web_domain == 'y.longzhu.com'): 19 | domain = url.split('/')[3].split('?')[0] 20 | m_url = 'http://m.longzhu.com/{0}'.format(domain) 21 | m_html = get_content(m_url) 22 | room_id_patt = r'var\s*roomId\s*=\s*(\d+);' 23 | room_id = match1(m_html,room_id_patt) 24 | 25 | json_url = 'http://liveapi.plu.cn/liveapp/roomstatus?roomId={0}'.format(room_id) 26 | content = get_content(json_url) 27 | data = json.loads(content) 28 | streamUri = data['streamUri'] 29 | if len(streamUri) <= 4: 30 | raise ValueError('The live stream is not online!') 31 | title = data['title'] 32 | streamer = data['userName'] 33 | title = str.format(streamer,': ',title) 34 | 35 | steam_api_url = 'http://livestream.plu.cn/live/getlivePlayurl?roomId={0}'.format(room_id) 36 | content = get_content(steam_api_url) 37 | data = json.loads(content) 38 | isonline = data.get('isTransfer') 39 | if isonline == '0': 40 | raise ValueError('The live stream is not online!') 41 | 42 | real_url = data['playLines'][0]['urls'][0]['securityUrl'] 43 | 44 | print_info(site_info, title, 'flv', float('inf')) 45 | 46 | if not info_only: 47 | download_urls([real_url], title, 'flv', None, output_dir, merge=merge) 48 | 49 | elif web_domain == 'replay.longzhu.com': 50 | videoid = match1(url, r'(\d+)$') 51 | json_url = 'http://liveapi.longzhu.com/livereplay/getreplayfordisplay?videoId={0}'.format(videoid) 52 | content = get_content(json_url) 53 | data = json.loads(content) 54 | 55 | username = data['userName'] 56 | title = data['title'] 57 | title = str.format(username,':',title) 58 | real_url = data['videoUrl'] 59 | 60 | if player: 61 | print_info('Longzhu Video', title, 'm3u8', 0) 62 | download_urls([real_url], title, 'm3u8', 0, output_dir, merge=merge) 63 | else: 64 | urls = general_m3u8_extractor(real_url) 65 | print_info('Longzhu Video', title, 'm3u8', 0) 66 | if not info_only: 67 | download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs) 68 | 69 | else: 70 | raise ValueError('Wrong url or unsupported link ... {0}'.format(url)) 71 | 72 | site_info = 'longzhu.com' 73 | download = longzhu_download 74 | download_playlist = playlist_not_supported('longzhu') 75 | -------------------------------------------------------------------------------- /src/you_get/extractors/lrts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['lrts_download'] 4 | 5 | import logging 6 | from ..common import * 7 | from ..util import log, term 8 | 9 | def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 10 | html = get_html(url) 11 | args = kwargs.get('args') 12 | if not args: args = {} 13 | matched = re.search(r"/book/(\d+)", url) 14 | if not matched: 15 | raise AssertionError("not found book number: %s" % url) 16 | book_no = matched.group(1) 17 | book_title = book_no 18 | matched = re.search(r"([^-]*)[-](.*)[,](.*)", html) 19 | if matched: 20 | book_title = matched.group(1) 21 | 22 | matched = re.search(r"var totalCount='(\d+)'", html) 23 | if not matched: 24 | raise AssertionError("not found total count in html") 25 | total_count = int(matched.group(1)) 26 | log.i('%s total: %s' % (book_title, total_count)) 27 | first_page = 0 28 | if ('first' in args and args.first!= None): 29 | first_page = int(args.first) 30 | 31 | page_size = 10 32 | if ('page_size' in args and args.page_size != None): 33 | page_size = int(args.page_size) 34 | last_page = (total_count // page_size) + 1 35 | if ('last' in args and args.last != None): 36 | last_page = int(args.last) 37 | 38 | log.i('page size is %s, page from %s to %s' % (page_size, first_page, last_page)) 39 | headers = { 40 | 'Referer': url 41 | } 42 | items = [] 43 | for page in range(first_page, last_page): 44 | page_url = 'http://www.lrts.me/ajax/book/%s/%s/%s' % (book_no, page, page_size) 45 | response_content = json.loads(post_content(page_url, headers)) 46 | if response_content['status'] != 'success': 47 | raise AssertionError("got the page failed: %s" % (page_url)) 48 | data = response_content['data']['data'] 49 | if data: 50 | for i in data: 51 | i['resName'] = parse.unquote(i['resName']) 52 | items.extend(data) 53 | else: 54 | break 55 | headers = { 56 | 'Referer': 'http://www.lrts.me/playlist' 57 | } 58 | 59 | for item in items: 60 | i_url = 'http://www.lrts.me/ajax/path/4/%s/%s' % (item['fatherResId'], item['resId']) 61 | response_content = json.loads(post_content(i_url, headers)) 62 | if response_content['status'] == 'success' and response_content['data']: 63 | item['ok'] = True 64 | item['url'] = response_content['data'] 65 | logging.debug('ok') 66 | 67 | items = list(filter(lambda i: 'ok' in i and i['ok'], items)) 68 | log.i('Downloading %s: %s count ...' % (book_title, len(items))) 69 | 70 | for item in items: 71 | title = item['resName'] 72 | file_url = item['url'] 73 | # if not file_url: continue 74 | _, _, size = url_info(file_url) 75 | print_info(site_info, title, 'mp3', size) 76 | if not info_only: 77 | download_urls([file_url], title, 'mp3', size, output_dir, merge=merge) 78 | 79 | site_info = "lrts.me" 80 | download = lrts_download 81 | download_playlist = lrts_download 82 | -------------------------------------------------------------------------------- /src/you_get/extractors/magisto.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['magisto_download'] 4 | 5 | from ..common import * 6 | import json 7 | 8 | def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | html = get_html(url) 10 | 11 | video_hash = r1(r'video\/([a-zA-Z0-9]+)', url) 12 | api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash) 13 | content = get_html(api_url) 14 | data = json.loads(content) 15 | title1 = data['title'] 16 | title2 = data['creator'] 17 | title = "%s - %s" % (title1, title2) 18 | url = data['video_direct_url'] 19 | type, ext, size = url_info(url) 20 | 21 | print_info(site_info, title, type, size) 22 | if not info_only: 23 | download_urls([url], title, ext, size, output_dir, merge=merge) 24 | 25 | site_info = "Magisto.com" 26 | download = magisto_download 27 | download_playlist = playlist_not_supported('magisto') 28 | -------------------------------------------------------------------------------- /src/you_get/extractors/metacafe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['metacafe_download'] 4 | 5 | from ..common import * 6 | import urllib.error 7 | from urllib.parse import unquote 8 | 9 | def metacafe_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 10 | if re.match(r'http://www.metacafe.com/watch/\w+', url): 11 | html =get_content(url) 12 | title = r1(r'list 43 | Convert XML to URL List. 44 | From Biligrab. 45 | """ 46 | rawurl = [] 47 | dom = parseString(xml_data) 48 | for node in dom.getElementsByTagName('durl'): 49 | url = node.getElementsByTagName('url')[0] 50 | rawurl.append(url.childNodes[0].data) 51 | return rawurl 52 | 53 | site_info = "MioMio.tv" 54 | download = miomio_download 55 | download_playlist = playlist_not_supported('miomio') 56 | -------------------------------------------------------------------------------- /src/you_get/extractors/mixcloud.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['mixcloud_download'] 4 | 5 | from ..common import * 6 | 7 | def mixcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | html = get_html(url, faker=True) 9 | title = r1(r'(.*?)").split("|")[:-2])) 16 | 17 | # mgid%3Auma%3Avideo%3Amtv81.com%3A897974 18 | vid = match1(html, r'getTheVideo\("(.*?)"') 19 | xml = parseString( 20 | get_content("http://intl.esperanto.mtvi.com/www/xml/media/mediaGen.jhtml?uri={}&flashPlayer=LNX%2013,0,0,206&geo=CN&sid=123456".format(vid))) 21 | 22 | url = sorted( 23 | map(lambda x: x.firstChild.nodeValue, xml.getElementsByTagName("src")), 24 | key=lambda x: int(match1(x, r'_(\d+?)_')))[-1] 25 | 26 | mediatype, ext, size = 'mp4', 'mp4', 0 27 | print_info(site_info, title, mediatype, size) 28 | # 29 | # rtmpdump -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf 30 | # 31 | # because rtmpdump is unstable,may try several times 32 | # 33 | if not info_only: 34 | # import pdb 35 | # pdb.set_trace() 36 | download_rtmp_url(url=url, title=title, ext=ext, params={ 37 | "--swfVfy": "http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf"}, output_dir=output_dir) 38 | 39 | 40 | site_info = "mtv81.com" 41 | download = mtv81_download 42 | download_playlist = playlist_not_supported('mtv81') 43 | -------------------------------------------------------------------------------- /src/you_get/extractors/nanagogo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['nanagogo_download'] 4 | 5 | from ..common import * 6 | from .universal import * 7 | 8 | def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | if re.match(r'https?://stat.7gogo.jp', url): 10 | universal_download(url, output_dir, merge=merge, info_only=info_only) 11 | return 12 | 13 | talk_id = r1(r'7gogo.jp/([^/]+)/', url) 14 | post_id = r1(r'7gogo.jp/[^/]+/(\d+)', url) 15 | title = '%s_%s' % (talk_id, post_id) 16 | api_url = 'https://api.7gogo.jp/web/v2/talks/%s/posts/%s' % (talk_id, post_id) 17 | info = json.loads(get_content(api_url)) 18 | 19 | items = [] 20 | if info['data']['posts']['post'] is None: 21 | return 22 | if info['data']['posts']['post']['body'] is None: 23 | return 24 | for i in info['data']['posts']['post']['body']: 25 | if 'image' in i: 26 | image_url = i['image'] 27 | if image_url[:2] == '//': continue # skip stamp images 28 | _, ext, size = url_info(image_url) 29 | items.append({'title': title, 30 | 'url': image_url, 31 | 'ext': ext, 32 | 'size': size}) 33 | elif 'movieUrlHq' in i: 34 | movie_url = i['movieUrlHq'] 35 | _, ext, size = url_info(movie_url) 36 | items.append({'title': title, 37 | 'url': movie_url, 38 | 'ext': ext, 39 | 'size': size}) 40 | 41 | size = sum([i['size'] for i in items]) 42 | if size == 0: return # do not fail the whole process 43 | print_info(site_info, title, ext, size) 44 | if not info_only: 45 | for i in items: 46 | print_info(site_info, i['title'], i['ext'], i['size']) 47 | download_urls([i['url']], i['title'], i['ext'], i['size'], 48 | output_dir=output_dir, 49 | merge=merge) 50 | 51 | site_info = "7gogo.jp" 52 | download = nanagogo_download 53 | download_playlist = playlist_not_supported('nanagogo') 54 | -------------------------------------------------------------------------------- /src/you_get/extractors/naver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import urllib.request 4 | import urllib.parse 5 | import json 6 | import re 7 | 8 | from ..util import log 9 | from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size 10 | from .universal import * 11 | 12 | __all__ = ['naver_download_by_url'] 13 | 14 | 15 | def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs): 16 | ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}' 17 | page = get_content(url) 18 | try: 19 | vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1) 20 | key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1) 21 | meta_str = get_content(ep.format(vid, key)) 22 | meta_json = json.loads(meta_str) 23 | if 'errorCode' in meta_json: 24 | log.wtf(meta_json['errorCode']) 25 | title = meta_json['meta']['subject'] 26 | videos = meta_json['videos']['list'] 27 | video_list = sorted(videos, key=lambda video: video['encodingOption']['width']) 28 | video_url = video_list[-1]['source'] 29 | # size = video_list[-1]['size'] 30 | # result wrong size 31 | size = url_size(video_url) 32 | print_info(site_info, title, 'mp4', size) 33 | if not info_only: 34 | download_urls([video_url], title, 'mp4', size, output_dir, **kwargs) 35 | except: 36 | universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) 37 | 38 | site_info = "naver.com" 39 | download = naver_download_by_url 40 | download_playlist = playlist_not_supported('naver') 41 | -------------------------------------------------------------------------------- /src/you_get/extractors/nicovideo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['nicovideo_download'] 4 | 5 | from ..common import * 6 | 7 | def nicovideo_login(user, password): 8 | data = "current_form=login&mail=" + user +"&password=" + password + "&login_submit=Log+In" 9 | response = request.urlopen(request.Request("https://secure.nicovideo.jp/secure/login?site=niconico", headers=fake_headers, data=data.encode('utf-8'))) 10 | return response.headers 11 | 12 | def nicovideo_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 13 | import ssl 14 | ssl_context = request.HTTPSHandler( 15 | context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) 16 | cookie_handler = request.HTTPCookieProcessor() 17 | opener = request.build_opener(ssl_context, cookie_handler) 18 | request.install_opener(opener) 19 | 20 | import netrc, getpass 21 | try: 22 | info = netrc.netrc().authenticators('nicovideo') 23 | except: 24 | info = None 25 | if info is None: 26 | user = input("User: ") 27 | password = getpass.getpass("Password: ") 28 | else: 29 | user, password = info[0], info[2] 30 | print("Logging in...") 31 | nicovideo_login(user, password) 32 | 33 | html = get_html(url) # necessary! 34 | title = r1(r'(.+?)', html) 35 | #title = unicodize(r1(r']*>([^<]+)', html)) 36 | 37 | vid = url.split('/')[-1].split('?')[0] 38 | api_html = get_html('http://flapi.nicovideo.jp/api/getflv?v=%s' % vid) 39 | real_url = parse.unquote(r1(r'url=([^&]+)&', api_html)) 40 | 41 | type, ext, size = url_info(real_url) 42 | 43 | print_info(site_info, title, type, size) 44 | if not info_only: 45 | download_urls([real_url], title, ext, size, output_dir, merge = merge) 46 | 47 | site_info = "Nicovideo.jp" 48 | download = nicovideo_download 49 | download_playlist = playlist_not_supported('nicovideo') 50 | -------------------------------------------------------------------------------- /src/you_get/extractors/pinterest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | from ..extractor import VideoExtractor 5 | 6 | class Pinterest(VideoExtractor): 7 | # site name 8 | name = "Pinterest" 9 | 10 | # ordered list of supported stream types / qualities on this site 11 | # order: high quality -> low quality 12 | stream_types = [ 13 | {'id': 'original'}, # contains an 'id' or 'itag' field at minimum 14 | {'id': 'small'}, 15 | ] 16 | 17 | def prepare(self, **kwargs): 18 | # scrape the html 19 | content = get_content(self.url) 20 | 21 | # extract title 22 | self.title = match1(content, 23 | r' 0: 65 | for k , v in content['data']['rtmp_multi_bitrate'].items(): 66 | stream_available[k] = rtmp_url + '/' + v 67 | 68 | for s in self.stream_types: 69 | if s['id'] in stream_available.keys(): 70 | quality_id = s['id'] 71 | url = stream_available[quality_id] 72 | self.streams[quality_id] = { 73 | 'container': 'flv', 74 | 'video_profile': s['video_profile'], 75 | 'size': 0, 76 | 'url': url 77 | } 78 | 79 | def extract(self, **kwargs): 80 | for i in self.streams: 81 | s = self.streams[i] 82 | s['src'] = [s['url']] 83 | if 'stream_id' in kwargs and kwargs['stream_id']: 84 | # Extract the stream 85 | stream_id = kwargs['stream_id'] 86 | 87 | if stream_id not in self.streams: 88 | log.e('[Error] Invalid video format.') 89 | log.e('Run \'-i\' command with no specific video format to view all available formats.') 90 | exit(2) 91 | else: 92 | # Extract stream with the best quality 93 | stream_id = self.streams_sorted[0]['id'] 94 | s['src'] = [s['url']] 95 | 96 | site = QiE() 97 | download = site.download_by_url 98 | download_playlist = playlist_not_supported('QiE') 99 | -------------------------------------------------------------------------------- /src/you_get/extractors/qie_video.py: -------------------------------------------------------------------------------- 1 | from ..common import * 2 | from ..extractor import VideoExtractor 3 | from ..util.log import * 4 | 5 | import json 6 | import math 7 | 8 | class QieVideo(VideoExtractor): 9 | name = 'QiE Video' 10 | vid_patt = r'"stream_name":"(\d+)"' 11 | title_patt = r'"title":"([^\"]+)"' 12 | cdn = 'http://qietv-play.wcs.8686c.com/' 13 | ep = 'http://api.qiecdn.com/api/v1/video/stream/{}' 14 | stream_types = [ 15 | {'id':'1080p', 'video_profile':'1920x1080', 'container':'m3u8'}, 16 | {'id':'720p', 'video_profile':'1280x720', 'container':'m3u8'}, 17 | {'id':'480p', 'video_profile':'853x480', 'container':'m3u8'} 18 | ] 19 | 20 | def get_vid_from_url(self): 21 | hit = re.search(self.__class__.vid_patt, self.page) 22 | if hit is None: 23 | log.wtf('Cannot get stream_id') 24 | return hit.group(1) 25 | 26 | def get_title(self): 27 | hit = re.search(self.__class__.title_patt, self.page) 28 | if hit is None: 29 | return self.vid 30 | return hit.group(1).strip() 31 | 32 | def prepare(self, **kwargs): 33 | self.page = get_content(self.url) 34 | if self.vid is None: 35 | self.vid = self.get_vid_from_url() 36 | self.title = self.get_title() 37 | meta = json.loads(get_content(self.__class__.ep.format(self.vid))) 38 | if meta['code'] != 200: 39 | log.wtf(meta['message']) 40 | for video in meta['result']['videos']: 41 | height = video['height'] 42 | url = self.__class__.cdn + video['key'] 43 | stream_meta = dict(m3u8_url=url, size=0, container='m3u8') 44 | video_profile = '{}x{}'.format(video['width'], video['height']) 45 | stream_meta['video_profile'] = video_profile 46 | for stream_type in self.__class__.stream_types: 47 | if height // 10 == int(stream_type['id'][:-1]) // 10: 48 | # width 481, 482... 489 are all 480p here 49 | stream_id = stream_type['id'] 50 | self.streams[stream_id] = stream_meta 51 | 52 | def extract(self, **kwargs): 53 | for stream_id in self.streams: 54 | self.streams[stream_id]['src'], dur = general_m3u8_extractor(self.streams[stream_id]['m3u8_url']) 55 | self.streams[stream_id]['video_profile'] += ', Duration: {}s'.format(math.floor(dur)) 56 | 57 | def general_m3u8_extractor(url): 58 | dur = 0 59 | base_url = url[:url.rfind('/')] 60 | m3u8_content = get_content(url).split('\n') 61 | result = [] 62 | for line in m3u8_content: 63 | trimmed = line.strip() 64 | if len(trimmed) > 0: 65 | if trimmed.startswith('#'): 66 | if trimmed.startswith('#EXTINF'): 67 | t_str = re.search(r'(\d+\.\d+)', trimmed).group(1) 68 | dur += float(t_str) 69 | else: 70 | if trimmed.startswith('http'): 71 | result.append(trimmed) 72 | else: 73 | result.append(base_url + '/' + trimmed) 74 | return result, dur 75 | 76 | site = QieVideo() 77 | download_by_url = site.download_by_url 78 | -------------------------------------------------------------------------------- /src/you_get/extractors/qingting.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | 4 | from ..common import get_content, playlist_not_supported, url_size 5 | from ..extractors import VideoExtractor 6 | from ..util import log 7 | 8 | __all__ = ['qingting_download_by_url'] 9 | 10 | 11 | class Qingting(VideoExtractor): 12 | # every resource is described by its channel id and program id 13 | # so vid is tuple (channel_id, program_id) 14 | 15 | name = 'Qingting' 16 | stream_types = [ 17 | {'id': '_default'} 18 | ] 19 | 20 | ep = 'http://i.qingting.fm/wapi/channels/{}/programs/{}' 21 | file_host = 'http://od.qingting.fm/{}' 22 | mobile_pt = r'channels\/(\d+)\/programs/(\d+)' 23 | 24 | def prepare(self, **kwargs): 25 | if self.vid is None: 26 | hit = re.search(self.__class__.mobile_pt, self.url) 27 | self.vid = (hit.group(1), hit.group(2)) 28 | 29 | ep_url = self.__class__.ep.format(self.vid[0], self.vid[1]) 30 | meta = json.loads(get_content(ep_url)) 31 | 32 | if meta['code'] != 0: 33 | log.wtf(meta['message']['errormsg']) 34 | 35 | file_path = self.__class__.file_host.format(meta['data']['file_path']) 36 | self.title = meta['data']['name'] 37 | duration = str(meta['data']['duration']) + 's' 38 | 39 | self.streams['_default'] = {'src': [file_path], 'video_profile': duration, 'container': 'm4a'} 40 | 41 | def extract(self, **kwargs): 42 | self.streams['_default']['size'] = url_size(self.streams['_default']['src'][0]) 43 | 44 | 45 | def qingting_download_by_url(url, **kwargs): 46 | Qingting().download_by_url(url, **kwargs) 47 | 48 | site_info = 'Qingting' 49 | download = qingting_download_by_url 50 | download_playlist = playlist_not_supported('Qingting') 51 | -------------------------------------------------------------------------------- /src/you_get/extractors/qq_egame.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | 4 | from ..common import * 5 | from ..extractors import VideoExtractor 6 | from ..util import log 7 | from ..util.strings import unescape_html 8 | 9 | __all__ = ['qq_egame_download'] 10 | 11 | 12 | def qq_egame_download(url, 13 | output_dir='.', 14 | merge=True, 15 | info_only=False, 16 | **kwargs): 17 | uid = re.search('\d\d\d+', url) 18 | an_url = "https://m.egame.qq.com/live?anchorid={}&".format(uid.group(0)) 19 | page = get_content(an_url) 20 | server_data = re.search(r'window\.serverData\s*=\s*({.+?});', page) 21 | if server_data is None: 22 | log.wtf('Can not find window.server_data') 23 | json_data = json.loads(server_data.group(1)) 24 | if json_data['anchorInfo']['data']['isLive'] == 0: 25 | log.wtf('Offline...') 26 | live_info = json_data['liveInfo']['data'] 27 | title = '{}_{}'.format(live_info['profileInfo']['nickName'], 28 | live_info['videoInfo']['title']) 29 | real_url = live_info['videoInfo']['streamInfos'][0]['playUrl'] 30 | 31 | print_info(site_info, title, 'flv', float('inf')) 32 | if not info_only: 33 | download_url_ffmpeg( 34 | real_url, 35 | title, 36 | 'flv', 37 | params={}, 38 | output_dir=output_dir, 39 | merge=merge) 40 | 41 | 42 | site_info = "egame.qq.com" 43 | download = qq_egame_download 44 | download_playlist = playlist_not_supported('qq_egame') 45 | -------------------------------------------------------------------------------- /src/you_get/extractors/showroom.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['showroom_download'] 4 | 5 | from ..common import * 6 | import urllib.error 7 | from json import loads 8 | from time import time, sleep 9 | 10 | #---------------------------------------------------------------------- 11 | def showroom_get_roomid_by_room_url_key(room_url_key): 12 | """str->str""" 13 | fake_headers_mobile = { 14 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 15 | 'Accept-Charset': 'UTF-8,*;q=0.5', 16 | 'Accept-Encoding': 'gzip,deflate,sdch', 17 | 'Accept-Language': 'en-US,en;q=0.8', 18 | 'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36' 19 | } 20 | webpage_url = 'https://www.showroom-live.com/' + room_url_key 21 | html = get_content(webpage_url, headers = fake_headers_mobile) 22 | roomid = match1(html, r'room\?room_id\=(\d+)') 23 | assert roomid 24 | return roomid 25 | 26 | def showroom_download_by_room_id(room_id, output_dir = '.', merge = False, info_only = False, **kwargs): 27 | '''Source: Android mobile''' 28 | while True: 29 | timestamp = str(int(time() * 1000)) 30 | api_endpoint = 'https://www.showroom-live.com/api/live/streaming_url?room_id={room_id}&_={timestamp}'.format(room_id = room_id, timestamp = timestamp) 31 | html = get_content(api_endpoint) 32 | html = json.loads(html) 33 | #{'streaming_url_list': [{'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 1, 'label': 'original spec(low latency)', 'is_default': True, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed/playlist.m3u8', 'is_default': True, 'id': 2, 'type': 'hls', 'label': 'original spec'}, {'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 3, 'label': 'low spec(low latency)', 'is_default': False, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low/playlist.m3u8', 'is_default': False, 'id': 4, 'type': 'hls', 'label': 'low spec'}]} 34 | if len(html) >= 1: 35 | break 36 | log.w('The live show is currently offline.') 37 | sleep(1) 38 | 39 | #This is mainly for testing the M3U FFmpeg parser so I would ignore any non-m3u ones 40 | stream_url = [i['url'] for i in html['streaming_url_list'] if i['is_default'] and i['type'] == 'hls'][0] 41 | 42 | assert stream_url 43 | 44 | #title 45 | title = '' 46 | profile_api = 'https://www.showroom-live.com/api/room/profile?room_id={room_id}'.format(room_id = room_id) 47 | html = loads(get_content(profile_api)) 48 | try: 49 | title = html['main_name'] 50 | except KeyError: 51 | title = 'Showroom_{room_id}'.format(room_id = room_id) 52 | 53 | type_, ext, size = url_info(stream_url) 54 | print_info(site_info, title, type_, size) 55 | if not info_only: 56 | download_url_ffmpeg(url=stream_url, title=title, ext= 'mp4', output_dir=output_dir) 57 | 58 | 59 | #---------------------------------------------------------------------- 60 | def showroom_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): 61 | """""" 62 | if re.match( r'(\w+)://www.showroom-live.com/([-\w]+)', url): 63 | room_url_key = match1(url, r'\w+://www.showroom-live.com/([-\w]+)') 64 | room_id = showroom_get_roomid_by_room_url_key(room_url_key) 65 | showroom_download_by_room_id(room_id, output_dir, merge, 66 | info_only) 67 | 68 | site_info = "Showroom" 69 | download = showroom_download 70 | download_playlist = playlist_not_supported('showroom') 71 | -------------------------------------------------------------------------------- /src/you_get/extractors/sina.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey'] 4 | 5 | from ..common import * 6 | from ..util.log import * 7 | 8 | from hashlib import md5 9 | from random import randint 10 | from time import time 11 | from xml.dom.minidom import parseString 12 | import urllib.parse 13 | 14 | def api_req(vid): 15 | rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000)) 16 | t = str(int('{0:b}'.format(int(time()))[:-6], 2)) 17 | k = md5((vid + 'Z6prk18aWxP278cVAH' + t + rand).encode('utf-8')).hexdigest()[:16] + t 18 | url = 'http://ask.ivideo.sina.com.cn/v_play.php?vid={0}&ran={1}&p=i&k={2}'.format(vid, rand, k) 19 | xml = get_content(url, headers=fake_headers) 20 | return xml 21 | 22 | def video_info(xml): 23 | video = parseString(xml).getElementsByTagName('video')[0] 24 | result = video.getElementsByTagName('result')[0] 25 | if result.firstChild.nodeValue == 'error': 26 | message = video.getElementsByTagName('message')[0] 27 | return None, message.firstChild.nodeValue, None 28 | vname = video.getElementsByTagName('vname')[0].firstChild.nodeValue 29 | durls = video.getElementsByTagName('durl') 30 | 31 | urls = [] 32 | size = 0 33 | for durl in durls: 34 | url = durl.getElementsByTagName('url')[0].firstChild.nodeValue 35 | seg_size = durl.getElementsByTagName('filesize')[0].firstChild.nodeValue 36 | urls.append(url) 37 | size += int(seg_size) 38 | 39 | return urls, vname, size 40 | 41 | def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False): 42 | """Downloads a Sina video by its unique vid. 43 | http://video.sina.com.cn/ 44 | """ 45 | xml = api_req(vid) 46 | urls, name, size = video_info(xml) 47 | if urls is None: 48 | log.wtf(name) 49 | title = name 50 | print_info(site_info, title, 'flv', size) 51 | if not info_only: 52 | download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge) 53 | 54 | def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_only=False): 55 | """Downloads a Sina video by its unique vkey. 56 | http://video.sina.com/ 57 | """ 58 | 59 | url = 'http://video.sina.com/v/flvideo/%s_0.flv' % vkey 60 | type, ext, size = url_info(url) 61 | 62 | print_info(site_info, title, 'flv', size) 63 | if not info_only: 64 | download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge) 65 | 66 | def sina_zxt(url, output_dir='.', merge=True, info_only=False, **kwargs): 67 | ep = 'http://s.video.sina.com.cn/video/play?video_id=' 68 | frag = urllib.parse.urlparse(url).fragment 69 | if not frag: 70 | log.wtf('No video specified with fragment') 71 | meta = json.loads(get_content(ep + frag)) 72 | if meta['code'] != 1: 73 | # Yes they use 1 for success. 74 | log.wtf(meta['message']) 75 | title = meta['data']['title'] 76 | videos = sorted(meta['data']['videos'], key = lambda i: int(i['size'])) 77 | 78 | if len(videos) == 0: 79 | log.wtf('No video file returned by API server') 80 | 81 | vid = videos[-1]['file_id'] 82 | container = videos[-1]['type'] 83 | size = int(videos[-1]['size']) 84 | 85 | if container == 'hlv': 86 | container = 'flv' 87 | 88 | urls, _, _ = video_info(api_req(vid)) 89 | print_info(site_info, title, container, size) 90 | if not info_only: 91 | download_urls(urls, title, container, size, output_dir=output_dir, merge=merge, **kwargs) 92 | return 93 | 94 | def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 95 | """Downloads Sina videos by URL. 96 | """ 97 | if 'news.sina.com.cn/zxt' in url: 98 | sina_zxt(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) 99 | return 100 | 101 | vid = match1(url, r'vid=(\d+)') 102 | if vid is None: 103 | video_page = get_content(url) 104 | vid = hd_vid = match1(video_page, r'hd_vid\s*:\s*\'([^\']+)\'') 105 | if hd_vid == '0': 106 | vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|') 107 | vid = vids[-1] 108 | 109 | if vid is None: 110 | vid = match1(video_page, r'vid:"?(\d+)"?') 111 | if vid: 112 | #title = match1(video_page, r'title\s*:\s*\'([^\']+)\'') 113 | sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only) 114 | else: 115 | vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"') 116 | if vkey is None: 117 | vid = match1(url, r'#(\d+)') 118 | sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only) 119 | return 120 | title = match1(video_page, r'title\s*:\s*"([^"]+)"') 121 | sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only) 122 | 123 | site_info = "Sina.com" 124 | download = sina_download 125 | download_playlist = playlist_not_supported('sina') 126 | -------------------------------------------------------------------------------- /src/you_get/extractors/sohu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['sohu_download'] 4 | 5 | from ..common import * 6 | 7 | import json 8 | 9 | ''' 10 | Changelog: 11 | 1. http://tv.sohu.com/upload/swf/20150604/Main.swf 12 | new api 13 | ''' 14 | 15 | 16 | def real_url(fileName, key, ch): 17 | url = "https://data.vod.itc.cn/ip?new=" + fileName + "&num=1&key=" + key + "&ch=" + ch + "&pt=1&pg=2&prod=h5n" 18 | return json.loads(get_html(url))['servers'][0]['url'] 19 | 20 | 21 | def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_proxy=None, **kwargs): 22 | if re.match(r'http://share.vrs.sohu.com', url): 23 | vid = r1(r'id=(\d+)', url) 24 | else: 25 | html = get_html(url) 26 | vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html) 27 | assert vid 28 | 29 | if extractor_proxy: 30 | set_proxy(tuple(extractor_proxy.split(":"))) 31 | info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) 32 | if info and info.get("data", ""): 33 | for qtyp in ["oriVid", "superVid", "highVid", "norVid", "relativeId"]: 34 | if 'data' in info: 35 | hqvid = info['data'][qtyp] 36 | else: 37 | hqvid = info[qtyp] 38 | if hqvid != 0 and hqvid != vid: 39 | info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) 40 | if not 'allot' in info: 41 | continue 42 | break 43 | if extractor_proxy: 44 | unset_proxy() 45 | host = info['allot'] 46 | prot = info['prot'] 47 | tvid = info['tvid'] 48 | urls = [] 49 | data = info['data'] 50 | title = data['tvName'] 51 | size = sum(data['clipsBytes']) 52 | assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) 53 | for fileName, key in zip(data['su'], data['ck']): 54 | urls.append(real_url(fileName, key, data['ch'])) 55 | # assert data['clipsURL'][0].endswith('.mp4') 56 | 57 | else: 58 | info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) 59 | host = info['allot'] 60 | prot = info['prot'] 61 | tvid = info['tvid'] 62 | urls = [] 63 | data = info['data'] 64 | title = data['tvName'] 65 | size = sum(map(int, data['clipsBytes'])) 66 | assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) 67 | for fileName, key in zip(data['su'], data['ck']): 68 | urls.append(real_url(fileName, key, data['ch'])) 69 | 70 | print_info(site_info, title, 'mp4', size) 71 | if not info_only: 72 | download_urls(urls, title, 'mp4', size, output_dir, refer=url, merge=merge) 73 | 74 | 75 | site_info = "Sohu.com" 76 | download = sohu_download 77 | download_playlist = playlist_not_supported('sohu') 78 | -------------------------------------------------------------------------------- /src/you_get/extractors/soundcloud.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['sndcd_download'] 4 | 5 | from ..common import * 6 | import re 7 | import json 8 | 9 | 10 | def get_sndcd_apikey(): 11 | home_page = get_content('https://soundcloud.com') 12 | js_url = re.findall(r'script crossorigin src="(.+?)">', home_page)[-1] 13 | 14 | client_id = get_content(js_url) 15 | return re.search(r'client_id:"(.+?)"', client_id).group(1) 16 | 17 | 18 | def get_resource_info(resource_url, client_id): 19 | cont = get_content(resource_url, decoded=True) 20 | 21 | x = re.escape('forEach(function(e){n(e)})}catch(e){}})},') 22 | x = re.search(r'' + x + r'(.*)\);', cont) 23 | 24 | info = json.loads(x.group(1))[-1]['data'][0] 25 | 26 | info = info['tracks'] if info.get('track_count') else [info] 27 | 28 | ids = [i['id'] for i in info if i.get('comment_count') is None] 29 | ids = list(map(str, ids)) 30 | ids_split = ['%2C'.join(ids[i:i+10]) for i in range(0, len(ids), 10)] 31 | api_url = 'https://api-v2.soundcloud.com/tracks?ids={ids}&client_id={client_id}&%5Bobject%20Object%5D=&app_version=1584348206&app_locale=en' 32 | 33 | res = [] 34 | for ids in ids_split: 35 | uri = api_url.format(ids=ids, client_id=client_id) 36 | cont = get_content(uri, decoded=True) 37 | res += json.loads(cont) 38 | 39 | res = iter(res) 40 | info = [next(res) if i.get('comment_count') is None else i for i in info] 41 | 42 | return info 43 | 44 | 45 | def sndcd_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 46 | client_id = get_sndcd_apikey() 47 | 48 | r_info = get_resource_info(url, client_id) 49 | 50 | for info in r_info: 51 | title = info['title'] 52 | metadata = info.get('publisher_metadata') 53 | 54 | transcodings = info['media']['transcodings'] 55 | sq = [i for i in transcodings if i['quality'] == 'sq'] 56 | hq = [i for i in transcodings if i['quality'] == 'hq'] 57 | # source url 58 | surl = sq[0] if hq == [] else hq[0] 59 | surl = surl['url'] 60 | 61 | uri = surl + '?client_id=' + client_id 62 | r = get_content(uri) 63 | surl = json.loads(r)['url'] 64 | 65 | m3u8 = get_content(surl) 66 | # url list 67 | urll = re.findall(r'http.*?(?=\n)', m3u8) 68 | 69 | size = urls_size(urll) 70 | print_info(site_info, title, 'audio/mpeg', size) 71 | print(end='', flush=True) 72 | 73 | if not info_only: 74 | download_urls(urll, title=title, ext='mp3', total_size=size, output_dir=output_dir, merge=True) 75 | 76 | 77 | site_info = "SoundCloud.com" 78 | download = sndcd_download 79 | download_playlist = sndcd_download 80 | -------------------------------------------------------------------------------- /src/you_get/extractors/suntv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['suntv_download'] 4 | 5 | from ..common import * 6 | import urllib 7 | import re 8 | 9 | def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 10 | if re.match(r'http://www.isuntv.com/\w+', url): 11 | API_URL = "http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,App_Web_playcatemp4.ascx.9f08f04f.ashx" 12 | 13 | itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html') 14 | values = {"itemid" : itemid, "vodid": ""} 15 | 16 | data = str(values).replace("'", '"') 17 | data = data.encode('utf-8') 18 | req = urllib.request.Request(API_URL, data) 19 | req.add_header('AjaxPro-Method', 'ToPlay') #important! 20 | resp = urllib.request.urlopen(req) 21 | respData = resp.read() 22 | respData = respData.decode('ascii').strip('"') #Ahhhhhhh! 23 | 24 | video_url = 'http://www.isuntv.com' + str(respData) 25 | 26 | html = get_content(url, decoded=False) 27 | html = html.decode('gbk') 28 | title = match1(html, '([^<]+)').strip() #get rid of \r\n s 29 | 30 | size = 0 31 | type, ext, size = url_info(video_url) 32 | 33 | print_info(site_info, title, type, size) 34 | if not info_only: 35 | download_urls([url], title, 'mp4', size, output_dir, merge=merge) 36 | 37 | site_info = "SunTV" 38 | download = suntv_download 39 | download_playlist = playlist_not_supported('suntv') 40 | -------------------------------------------------------------------------------- /src/you_get/extractors/ted.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['ted_download'] 4 | 5 | from ..common import * 6 | import json 7 | 8 | def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 9 | html = get_html(url) 10 | patt = r'"__INITIAL_DATA__"\s*:\s*\{(.+)\}' 11 | metadata = json.loads('{' + match1(html, patt) + '}') 12 | title = metadata['talks'][0]['title'] 13 | nativeDownloads = metadata['talks'][0]['downloads']['nativeDownloads'] 14 | for quality in ['high', 'medium', 'low']: 15 | if quality in nativeDownloads: 16 | url = nativeDownloads[quality] 17 | type, ext, size = url_info(url) 18 | print_info(site_info, title, type, size) 19 | if not info_only: 20 | download_urls([url], title, ext, size, output_dir, merge=merge) 21 | break 22 | 23 | site_info = "TED.com" 24 | download = ted_download 25 | download_playlist = playlist_not_supported('ted') 26 | -------------------------------------------------------------------------------- /src/you_get/extractors/theplatform.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..common import * 4 | 5 | def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_only=False, **kwargs): 6 | smil_url = "http://link.theplatform.com/s/dJ5BDC/%s/meta.smil?format=smil&mbr=true" % pid 7 | smil = get_content(smil_url) 8 | smil_base = unescape_html(match1(smil, r'<meta base="([^"]+)"')) 9 | smil_videos = {y:x for x,y in dict(re.findall(r'<video src="([^"]+)".+height="([^"]+)"', smil)).items()} 10 | for height in ['1080', '720', '480', '360', '240', '216']: 11 | if height in smil_videos: 12 | smil_video = smil_videos[height] 13 | break 14 | assert smil_video 15 | 16 | type, ext, size = 'mp4', 'mp4', 0 17 | 18 | print_info(site_info, title, type, size) 19 | if not info_only: 20 | download_rtmp_url(url=smil_base, title=title, ext=ext,params={"-y":ext+':'+smil_video}, output_dir=output_dir) 21 | 22 | site_info = "thePlatform.com" 23 | download = theplatform_download_by_pid 24 | download_playlist = playlist_not_supported('theplatform') 25 | -------------------------------------------------------------------------------- /src/you_get/extractors/tiktok.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['tiktok_download'] 4 | 5 | from ..common import * 6 | 7 | def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 8 | headers = { 9 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', 10 | 'Accept-Encoding': 'gzip, deflate', 11 | 'Accept': '*/*', 12 | 'Referer': 'https://www.tiktok.com/', 13 | 'Connection': 'keep-alive' # important 14 | } 15 | 16 | m = re.match('(https?://)?([^/]+)(/.*)', url) 17 | host = m.group(2) 18 | if host != 'www.tiktok.com': # non-canonical URL 19 | if host == 'vt.tiktok.com': # short URL 20 | url = get_location(url) 21 | vid = r1(r'/video/(\d+)', url) 22 | url = 'https://www.tiktok.com/@/video/%s/' % vid 23 | host = 'www.tiktok.com' 24 | else: 25 | url = m.group(3).split('?')[0] 26 | vid = url.split('/')[3] # should be a string of numbers 27 | 28 | html, set_cookie = getHttps(host, url, headers=headers) 29 | tt_chain_token = r1('tt_chain_token=([^;]+);', set_cookie) 30 | headers['Cookie'] = 'tt_chain_token=%s' % tt_chain_token 31 | 32 | data = r1(r'<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" type="application/json">(.*?)</script>', html) 33 | info = json.loads(data) 34 | itemStruct = info['__DEFAULT_SCOPE__']['webapp.video-detail']['itemInfo']['itemStruct'] 35 | downloadAddr = itemStruct['video']['downloadAddr'] 36 | author = itemStruct['author']['uniqueId'] 37 | nickname = itemStruct['author']['nickname'] 38 | title = '%s [%s]' % (nickname or author, vid) 39 | 40 | mime, ext, size = url_info(downloadAddr, headers=headers) 41 | 42 | print_info(site_info, title, mime, size) 43 | if not info_only: 44 | download_urls([downloadAddr], title, ext, size, output_dir=output_dir, merge=merge, headers=headers) 45 | 46 | site_info = "TikTok.com" 47 | download = tiktok_download 48 | download_playlist = playlist_not_supported('tiktok') 49 | -------------------------------------------------------------------------------- /src/you_get/extractors/toutiao.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import binascii 3 | import random 4 | from json import loads 5 | from urllib.parse import urlparse 6 | 7 | from ..common import * 8 | 9 | try: 10 | from base64 import decodebytes 11 | except ImportError: 12 | from base64 import decodestring 13 | 14 | decodebytes = decodestring 15 | 16 | __all__ = ['toutiao_download', ] 17 | 18 | 19 | def random_with_n_digits(n): 20 | return random.randint(10 ** (n - 1), (10 ** n) - 1) 21 | 22 | 23 | def sign_video_url(vid): 24 | r = str(random_with_n_digits(16)) 25 | 26 | url = 'https://ib.365yg.com/video/urls/v/1/toutiao/mp4/{vid}'.format(vid=vid) 27 | n = urlparse(url).path + '?r=' + r 28 | b_n = bytes(n, encoding="utf-8") 29 | s = binascii.crc32(b_n) 30 | aid = 1364 31 | ts = int(time.time() * 1000) 32 | return url + '?r={r}&s={s}&aid={aid}&vfrom=xgplayer&callback=axiosJsonpCallback1&_={ts}'.format(r=r, s=s, aid=aid, 33 | ts=ts) 34 | 35 | 36 | class ToutiaoVideoInfo(object): 37 | 38 | def __init__(self): 39 | self.bitrate = None 40 | self.definition = None 41 | self.size = None 42 | self.height = None 43 | self.width = None 44 | self.type = None 45 | self.url = None 46 | 47 | def __str__(self): 48 | return json.dumps(self.__dict__) 49 | 50 | 51 | def get_file_by_vid(video_id): 52 | vRet = [] 53 | url = sign_video_url(video_id) 54 | ret = get_content(url) 55 | ret = loads(ret[20:-1]) 56 | vlist = ret.get('data').get('video_list') 57 | if len(vlist) > 0: 58 | vInfo = vlist.get(sorted(vlist.keys(), reverse=True)[0]) 59 | vUrl = vInfo.get('main_url') 60 | vUrl = decodebytes(vUrl.encode('ascii')).decode('ascii') 61 | videoInfo = ToutiaoVideoInfo() 62 | videoInfo.bitrate = vInfo.get('bitrate') 63 | videoInfo.definition = vInfo.get('definition') 64 | videoInfo.size = vInfo.get('size') 65 | videoInfo.height = vInfo.get('vheight') 66 | videoInfo.width = vInfo.get('vwidth') 67 | videoInfo.type = vInfo.get('vtype') 68 | videoInfo.url = vUrl 69 | vRet.append(videoInfo) 70 | return vRet 71 | 72 | 73 | def toutiao_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 74 | html = get_html(url, faker=True) 75 | video_id = match1(html, r".*?videoId: '(?P<vid>.*)'") 76 | title = match1(html, '.*?<title>(?P<title>.*?)') 77 | video_file_list = get_file_by_vid(video_id) # 调api获取视频源文件 78 | type, ext, size = url_info(video_file_list[0].url, faker=True) 79 | print_info(site_info=site_info, title=title, type=type, size=size) 80 | if not info_only: 81 | download_urls([video_file_list[0].url], title, ext, size, output_dir, merge=merge, faker=True) 82 | 83 | 84 | site_info = "Toutiao.com" 85 | download = toutiao_download 86 | download_playlist = playlist_not_supported("toutiao") 87 | -------------------------------------------------------------------------------- /src/you_get/extractors/tucao.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['tucao_download'] 4 | from ..common import * 5 | # import re 6 | import random 7 | import time 8 | from xml.dom import minidom 9 | #possible raw list types 10 | #1.
  • type=tudou&vid=199687639
  • 11 | #2.
  • type=tudou&vid=199506910|
  • 12 | #3.
  • type=video&file=http://xiaoshen140731.qiniudn.com/lovestage04.flv|
  • 13 | #4 may ?
  • type=video&file=http://xiaoshen140731.qiniudn.com/lovestage04.flv|xx**type=&vid=?
  • 14 | #5.
  • type=tudou&vid=200003098|07**type=tudou&vid=200000350|08
  • 15 | #6.
  • vid=49454694&type=sina|
  • 16 | #7.
  • type=189&vid=513031813243909|
  • 17 | # re_pattern=re.compile(r"(type=(.+?)&(vid|file)=(.*?))[\|<]") 18 | 19 | def tucao_single_download(type_link, title, output_dir=".", merge=True, info_only=False): 20 | if "file" in type_link: 21 | url=type_link[type_link.find("file=")+5:] 22 | vtype, ext, size=url_info(url) 23 | print_info(site_info, title, vtype, size) 24 | if not info_only: 25 | download_urls([url], title, ext, size, output_dir) 26 | #fix for 189 video source, see raw list types 7 27 | elif "189" in type_link: 28 | vid = match1(type_link, r"vid=(\d+)") 29 | assert vid, "vid not exsits" 30 | url = "http://api.tucao.tv/api/down/{}".format(vid) 31 | vtype, ext, size=url_info(url) 32 | print_info(site_info, title, vtype, size) 33 | if not info_only: 34 | download_urls([url], title, ext, size, output_dir) 35 | else: 36 | u="http://www.tucao.tv/api/playurl.php?{}&key=tucao{:07x}.cc&r={}".format(type_link,random.getrandbits(28),int(time.time()*1000)) 37 | xml=minidom.parseString(get_content(u)) 38 | urls=[] 39 | size=0 40 | for i in xml.getElementsByTagName("url"): 41 | urls.append(i.firstChild.nodeValue) 42 | vtype, ext, _size=url_info(i.firstChild.nodeValue) 43 | size+=_size 44 | print_info(site_info, title, vtype, size) 45 | if not info_only: 46 | download_urls(urls, title, ext, size, output_dir) 47 | 48 | def tucao_download(url, output_dir=".", merge=True, info_only=False, **kwargs): 49 | html=get_content(url) 50 | title=match1(html,r'

    (.*?)<\w') 51 | #fix for raw list that vid goes before type, see raw list types 6 52 | raw_list=match1(html,r"
  • \s*(type=.+?|vid=.+?)
  • ") 53 | raw_l=raw_list.split("**") 54 | if len(raw_l)==1: 55 | format_link=raw_l[0][:-1] if raw_l[0].endswith("|") else raw_l[0] 56 | tucao_single_download(format_link,title,output_dir,merge,info_only) 57 | else: 58 | for i in raw_l: 59 | format_link,sub_title=i.split("|") 60 | tucao_single_download(format_link,title+"-"+sub_title,output_dir,merge,info_only) 61 | 62 | 63 | site_info = "tucao.tv" 64 | download = tucao_download 65 | download_playlist = playlist_not_supported("tucao") 66 | -------------------------------------------------------------------------------- /src/you_get/extractors/tudou.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', 'tudou_download_by_iid'] 4 | 5 | from ..common import * 6 | from xml.dom.minidom import parseString 7 | import you_get.extractors.acfun 8 | 9 | def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False): 10 | data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid)) 11 | temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x])) 12 | vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp]) 13 | 14 | urls = [] 15 | for vid in vids: 16 | for i in parseString(get_html('http://ct.v2.tudou.com/f?id=%s' % vid)).getElementsByTagName('f'): 17 | urls.append(i.firstChild.nodeValue.strip()) 18 | 19 | ext = r1(r'http://[\w.]*/(\w+)/[\w.]*', urls[0]) 20 | 21 | print_info(site_info, title, ext, size) 22 | if not info_only: 23 | download_urls(urls, title, ext, size, output_dir=output_dir, merge = merge) 24 | 25 | def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False): 26 | html = get_html('http://www.tudou.com/programs/view/%s/' % id) 27 | 28 | iid = r1(r'iid\s*[:=]\s*(\S+)', html) 29 | try: 30 | title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'") 31 | except AttributeError: 32 | title = '' 33 | tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only) 34 | 35 | def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 36 | if 'acfun.tudou.com' in url: #wrong way! 37 | url = url.replace('acfun.tudou.com', 'www.acfun.tv') 38 | you_get.extractors.acfun.acfun_download(url, output_dir, 39 | merge, 40 | info_only) 41 | return #throw you back 42 | 43 | # Embedded player 44 | id = r1(r'http://www.tudou.com/v/([^/]+)/', url) 45 | if id: 46 | return tudou_download_by_id(id, title="", info_only=info_only) 47 | 48 | html = get_content(url) 49 | 50 | try: 51 | title = r1(r'\Wkw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'") 52 | assert title 53 | title = unescape_html(title) 54 | except AttributeError: 55 | title = match1(html, r'id=\"subtitle\"\s*title\s*=\s*\"([^\"]+)\"') 56 | if title is None: 57 | title = '' 58 | 59 | vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html) 60 | if vcode is None: 61 | vcode = match1(html, r'viden\s*[:=]\s*\"([\w+/=]+)\"') 62 | if vcode: 63 | from .youku import youku_download_by_vid 64 | return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only, src='tudou', **kwargs) 65 | 66 | iid = r1(r'iid\s*[:=]\s*(\d+)', html) 67 | if not iid: 68 | return tudou_download_playlist(url, output_dir, merge, info_only) 69 | 70 | tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only) 71 | 72 | # obsolete? 73 | def parse_playlist(url): 74 | aid = r1(r'http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url) 75 | html = get_decoded_html(url) 76 | if not aid: 77 | aid = r1(r"aid\s*[:=]\s*'(\d+)'", html) 78 | if re.match(r'http://www.tudou.com/albumcover/', url): 79 | atitle = r1(r"title\s*:\s*'([^']+)'", html) 80 | elif re.match(r'http://www.tudou.com/playlist/p/', url): 81 | atitle = r1(r'atitle\s*=\s*"([^"]+)"', html) 82 | else: 83 | raise NotImplementedError(url) 84 | assert aid 85 | assert atitle 86 | import json 87 | #url = 'http://www.tudou.com/playlist/service/getZyAlbumItems.html?aid='+aid 88 | url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid 89 | return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']] 90 | 91 | def parse_plist(url): 92 | html = get_decoded_html(url) 93 | lcode = r1(r"lcode:\s*'([^']+)'", html) 94 | plist_info = json.loads(get_content('http://www.tudou.com/crp/plist.action?lcode=' + lcode)) 95 | return ([(item['kw'], item['iid']) for item in plist_info['items']]) 96 | 97 | def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs): 98 | videos = parse_plist(url) 99 | for i, (title, id) in enumerate(videos): 100 | print('Processing %s of %s videos...' % (i + 1, len(videos))) 101 | tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) 102 | 103 | site_info = "Tudou.com" 104 | download = tudou_download 105 | download_playlist = tudou_download_playlist 106 | -------------------------------------------------------------------------------- /src/you_get/extractors/twitter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['twitter_download'] 4 | 5 | from ..common import * 6 | from .universal import * 7 | 8 | def extract_m3u(source): 9 | r1 = get_content(source) 10 | s1 = re.findall(r'(/ext_tw_video/.*)', r1) 11 | s1 += re.findall(r'(/amplify_video/.*)', r1) 12 | r2 = get_content('https://video.twimg.com%s' % s1[-1]) 13 | s2 = re.findall(r'(/ext_tw_video/.*)', r2) 14 | s2 += re.findall(r'(/amplify_video/.*)', r2) 15 | return ['https://video.twimg.com%s' % i for i in s2] 16 | 17 | def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 18 | headers = { 19 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', 20 | 'Accept-Encoding': 'gzip, deflate', 21 | 'Accept': '*/*' 22 | } 23 | 24 | if re.match(r'https?://pbs\.twimg\.com', url): 25 | universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) 26 | return 27 | 28 | if re.match(r'https?://mobile', url): # normalize mobile URL 29 | url = 'https://' + match1(url, r'//mobile\.(.+)') 30 | 31 | if re.match(r'https?://twitter\.com/i/moments/', url): # FIXME: moments 32 | html = get_html(url, faker=True) 33 | paths = re.findall(r'data-permalink-path="([^"]+)"', html) 34 | for path in paths: 35 | twitter_download('https://twitter.com' + path, 36 | output_dir=output_dir, 37 | merge=merge, 38 | info_only=info_only, 39 | **kwargs) 40 | return 41 | 42 | m = re.match(r'^https?://(mobile\.)?(x|twitter)\.com/([^/]+)/status/(\d+)', url) 43 | assert m 44 | screen_name, item_id = m.group(3), m.group(4) 45 | page_title = "{} [{}]".format(screen_name, item_id) 46 | 47 | # FIXME: this API won't work for protected or nsfw contents 48 | api_url = 'https://cdn.syndication.twimg.com/tweet-result?id=%s&token=!' % item_id 49 | content = get_content(api_url) 50 | info = json.loads(content) 51 | 52 | author = info['user']['name'] 53 | url = 'https://twitter.com/%s/status/%s' % (info['user']['screen_name'], item_id) 54 | full_text = info['text'] 55 | 56 | if 'photos' in info: 57 | for photo in info['photos']: 58 | photo_url = photo['url'] 59 | title = item_id + '_' + photo_url.split('.')[-2].split('/')[-1] 60 | urls = [ photo_url + ':orig' ] 61 | size = urls_size(urls, headers=headers) 62 | ext = photo_url.split('.')[-1] 63 | 64 | print_info(site_info, title, ext, size) 65 | if not info_only: 66 | download_urls(urls, title, ext, size, output_dir, merge=merge) 67 | 68 | if 'video' in info: 69 | for mediaDetail in info['mediaDetails']: 70 | if 'video_info' not in mediaDetail: continue 71 | variants = mediaDetail['video_info']['variants'] 72 | variants = sorted(variants, key=lambda kv: kv.get('bitrate', 0)) 73 | title = item_id + '_' + variants[-1]['url'].split('/')[-1].split('?')[0].split('.')[0] 74 | urls = [ variants[-1]['url'] ] 75 | size = urls_size(urls, headers=headers) 76 | mime, ext = variants[-1]['content_type'], 'mp4' 77 | 78 | print_info(site_info, title, ext, size) 79 | if not info_only: 80 | download_urls(urls, title, ext, size, output_dir, merge=merge, headers=headers) 81 | 82 | # TODO: should we deal with quoted tweets? 83 | 84 | 85 | site_info = "X.com" 86 | download = twitter_download 87 | download_playlist = playlist_not_supported('twitter') 88 | -------------------------------------------------------------------------------- /src/you_get/extractors/veoh.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['veoh_download'] 4 | 5 | from ..common import * 6 | 7 | def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): 8 | '''Get item_id''' 9 | if re.match(r'http://www.veoh.com/watch/\w+', url): 10 | item_id = match1(url, r'http://www.veoh.com/watch/(\w+)') 11 | elif re.match(r'http://www.veoh.com/m/watch.php\?v=\.*', url): 12 | item_id = match1(url, r'http://www.veoh.com/m/watch.php\?v=(\w+)') 13 | else: 14 | raise NotImplementedError('Cannot find item ID') 15 | veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = info_only, **kwargs) 16 | 17 | #---------------------------------------------------------------------- 18 | def veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = False, **kwargs): 19 | """Source: Android mobile""" 20 | webpage_url = 'http://www.veoh.com/m/watch.php?v={item_id}&quality=1'.format(item_id = item_id) 21 | 22 | #grab download URL 23 | a = get_content(webpage_url, decoded=True) 24 | url = match1(a, r'(.[^>]+?)]+?)"', video_page) 12 | 13 | for quality in ['.1080.', '.720.', '.480.', '.360.', '.240.']: 14 | for source in sources: 15 | if source.find(quality) != -1: 16 | url = source 17 | break 18 | assert url 19 | type, ext, size = url_info(url) 20 | print_info(site_info, title, type, size) 21 | 22 | return url, title, ext, size 23 | 24 | 25 | def get_video_from_user_videolist(url): 26 | ep = 'https://vk.com/al_video.php' 27 | to_post = dict(act='show', al=1, module='direct', video=re.search(r'video(\d+_\d+)', url).group(1)) 28 | page = post_content(ep, post_data=to_post) 29 | video_pt = r'(.+?)', page).group(1) 32 | mime, ext, size = url_info(url) 33 | print_info(site_info, title, mime, size) 34 | 35 | return url, title, ext, size 36 | 37 | 38 | def get_image_info(url): 39 | image_page = get_content(url) 40 | # used for title - vk page owner 41 | page_of = re.findall(r'Sender:
    (.[^>]+?)(.[^>]+?)Download full size', image_page) 47 | type, ext, size = url_info(image_link) 48 | print_info(site_info, title, type, size) 49 | 50 | return image_link, title, ext, size 51 | 52 | 53 | def vk_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs): 54 | link = None 55 | if re.match(r'(.+)z\=video(.+)', url): 56 | link, title, ext, size = get_video_info(url) 57 | elif re.match(r'(.+)vk\.com\/photo(.+)', url): 58 | link, title, ext, size = get_image_info(url) 59 | elif re.search(r'vk\.com\/video\d+_\d+', url): 60 | link, title, ext, size = get_video_from_user_videolist(url) 61 | else: 62 | raise NotImplementedError('Nothing to download here') 63 | 64 | if not info_only and link is not None: 65 | download_urls([link], title, ext, size, output_dir, merge=merge) 66 | 67 | 68 | site_info = "VK.com" 69 | download = vk_download 70 | download_playlist = playlist_not_supported('vk') 71 | -------------------------------------------------------------------------------- /src/you_get/extractors/w56.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['w56_download', 'w56_download_by_id'] 4 | 5 | from ..common import * 6 | 7 | from .sohu import sohu_download 8 | 9 | import json 10 | 11 | def w56_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): 12 | content = json.loads(get_html('http://vxml.56.com/json/%s/?src=site' % id)) 13 | info = content['info'] 14 | title = title or info['Subject'] 15 | assert title 16 | hd = info['hd'] 17 | assert hd in (0, 1, 2) 18 | hd_types = [['normal', 'qvga'], ['clear', 'vga'], ['super', 'wvga']][hd] 19 | files = [x for x in info['rfiles'] if x['type'] in hd_types] 20 | assert len(files) == 1 21 | size = int(files[0]['filesize']) 22 | url = files[0]['url'] + '&prod=56' 23 | ext = 'mp4' 24 | 25 | print_info(site_info, title, ext, size) 26 | if not info_only: 27 | download_urls([url], title, ext, size, output_dir = output_dir, merge = merge) 28 | 29 | def w56_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 30 | content = get_content(url) 31 | sohu_url = r1(r"url:\s*'([^']*)'", content) 32 | if sohu_url: 33 | sohu_download(sohu_url, output_dir, merge=merge, info_only=info_only, **kwargs) 34 | return 35 | 36 | id = r1(r'http://www.56.com/u\d+/v_(\w+).html', url) or \ 37 | r1(r'http://www.56.com/.*vid-(\w+).html', url) 38 | w56_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only) 39 | 40 | site_info = "56.com" 41 | download = w56_download 42 | download_playlist = playlist_not_supported('56') 43 | -------------------------------------------------------------------------------- /src/you_get/extractors/ximalaya.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['ximalaya_download_playlist', 'ximalaya_download', 'ximalaya_download_by_id'] 4 | 5 | from ..common import * 6 | 7 | import json 8 | import re 9 | 10 | stream_types = [ 11 | {'itag': '1', 'container': 'm4a', 'bitrate': 'default'}, 12 | {'itag': '2', 'container': 'm4a', 'bitrate': '32'}, 13 | {'itag': '3', 'container': 'm4a', 'bitrate': '64'} 14 | ] 15 | 16 | def ximalaya_download_by_id(id, title = None, output_dir = '.', info_only = False, stream_id = None): 17 | BASE_URL = 'http://www.ximalaya.com/tracks/' 18 | json_url = BASE_URL + id + '.json' 19 | json_data = json.loads(get_content(json_url, headers=fake_headers)) 20 | if 'res' in json_data: 21 | if json_data['res'] == False: 22 | raise ValueError('Server reported id %s is invalid' % id) 23 | if 'is_paid' in json_data and json_data['is_paid']: 24 | if 'is_free' in json_data and not json_data['is_free']: 25 | raise ValueError('%s is paid item' % id) 26 | if (not title) and 'title' in json_data: 27 | title = json_data['title'] 28 | #no size data in the json. should it be calculated? 29 | size = 0 30 | url = json_data['play_path_64'] 31 | if stream_id: 32 | if stream_id == '1': 33 | url = json_data['play_path_32'] 34 | elif stream_id == '0': 35 | url = json_data['play_path'] 36 | logging.debug('ximalaya_download_by_id: %s' % url) 37 | ext = 'm4a' 38 | urls = [url] 39 | print('Site: %s' % site_info) 40 | print('title: %s' % title) 41 | if info_only: 42 | if stream_id: 43 | print_stream_info(stream_id) 44 | else: 45 | for item in range(0, len(stream_types)): 46 | print_stream_info(item) 47 | if not info_only: 48 | print('Type: MPEG-4 audio m4a') 49 | print('Size: N/A') 50 | download_urls(urls, title, ext, size, output_dir = output_dir, merge = False) 51 | 52 | def ximalaya_download(url, output_dir = '.', info_only = False, stream_id = None, **kwargs): 53 | if re.match(r'http://www\.ximalaya\.com/(\d+)/sound/(\d+)', url): 54 | id = match1(url, r'http://www\.ximalaya\.com/\d+/sound/(\d+)') 55 | else: 56 | raise NotImplementedError(url) 57 | ximalaya_download_by_id(id, output_dir = output_dir, info_only = info_only, stream_id = stream_id) 58 | 59 | def ximalaya_download_page(playlist_url, output_dir = '.', info_only = False, stream_id = None, **kwargs): 60 | if re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', playlist_url): 61 | page_content = get_content(playlist_url) 62 | pattern = re.compile(r'
  • (\w+)') 112 | 113 | #video_url = match1(html, r'
    ([\s\S]*)', html) 12 | total_size = 0 13 | part_urls= [] 14 | 15 | video_html = r1(r'', html) 16 | 17 | # video_guessulike = r1(r"window.xgData =([s\S'\s\.]*)\'\;[\s\S]*window.vouchData", video_html) 18 | video_url = r1(r"window.vurl = \'([s\S'\s\.]*)\'\;[\s\S]*window.imgurl", video_html) 19 | part_urls.append(video_url) 20 | ext = video_url.split('.')[-1] 21 | 22 | print_info(site_info, title, ext, total_size) 23 | if not info_only: 24 | download_urls(part_urls, title, ext, total_size, output_dir=output_dir, merge=merge) 25 | 26 | 27 | def zhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): 28 | if 'video.zhibo.tv' in url: 29 | zhibo_vedio_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) 30 | return 31 | 32 | # if 'v.zhibo.tv' in url: 33 | # http://v.zhibo.tv/31609372 34 | html = get_html(url) 35 | title = r1(r'([\s\S]*)', html) 36 | is_live = r1(r"window.videoIsLive=\'([s\S'\s\.]*)\'\;[\s\S]*window.resDomain", html) 37 | if is_live != "1": 38 | raise ValueError("The live stream is not online! (Errno:%s)" % is_live) 39 | 40 | match = re.search(r""" 41 | ourStreamName .*? 42 | '(.*?)' .*? 43 | rtmpHighSource .*? 44 | '(.*?)' .*? 45 | '(.*?)' 46 | """, html, re.S | re.X) 47 | real_url = match.group(3) + match.group(1) + match.group(2) 48 | 49 | print_info(site_info, title, 'flv', float('inf')) 50 | if not info_only: 51 | download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge) 52 | 53 | site_info = "zhibo.tv" 54 | download = zhibo_download 55 | download_playlist = playlist_not_supported('zhibo') 56 | -------------------------------------------------------------------------------- /src/you_get/extractors/zhihu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['zhihu_download', 'zhihu_download_playlist'] 4 | 5 | from ..common import * 6 | import json 7 | 8 | 9 | def zhihu_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 10 | paths = url.split("/") 11 | # question or column 12 | if len(paths) < 3 and len(paths) < 6: 13 | raise TypeError("URL does not conform to specifications, Support column and question only." 14 | "Example URL: https://zhuanlan.zhihu.com/p/51669862 or " 15 | "https://www.zhihu.com/question/267782048/answer/490720324") 16 | 17 | if ("question" not in paths or "answer" not in paths) and "zhuanlan.zhihu.com" not in paths: 18 | raise TypeError("URL does not conform to specifications, Support column and question only." 19 | "Example URL: https://zhuanlan.zhihu.com/p/51669862 or " 20 | "https://www.zhihu.com/question/267782048/answer/490720324") 21 | 22 | html = get_html(url, faker=True) 23 | title = match1(html, r'data-react-helmet="true">(.*?)') 24 | for index, video_id in enumerate(matchall(html, [r''): '-', 30 | ord('['): '(', 31 | ord(']'): ')', 32 | ord('\t'): ' ', 33 | }) 34 | else: 35 | # *nix 36 | if os == 'mac': 37 | # Mac OS HFS+ 38 | text = text.translate({ 39 | ord(':'): '-', 40 | }) 41 | 42 | # Remove leading . 43 | if text.startswith("."): 44 | text = text[1:] 45 | 46 | text = text[:80] # Trim to 82 Unicode characters long 47 | return text 48 | -------------------------------------------------------------------------------- /src/you_get/util/git.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import subprocess 5 | from ..version import __version__ 6 | 7 | def get_head(repo_path): 8 | """Get (branch, commit) from HEAD of a git repo.""" 9 | try: 10 | ref = open(os.path.join(repo_path, '.git', 'HEAD'), 'r').read().strip()[5:].split('/') 11 | branch = ref[-1] 12 | commit = open(os.path.join(repo_path, '.git', *ref), 'r').read().strip()[:7] 13 | return branch, commit 14 | except: 15 | return None 16 | 17 | def get_version(repo_path): 18 | try: 19 | version = __version__.split('.') 20 | major, minor, cn = [int(i) for i in version] 21 | p = subprocess.Popen(['git', 22 | '--git-dir', os.path.join(repo_path, '.git'), 23 | '--work-tree', repo_path, 24 | 'rev-list', 'HEAD', '--count'], 25 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) 26 | raw, err = p.communicate() 27 | c_head = int(raw.decode('ascii')) 28 | q = subprocess.Popen(['git', 29 | '--git-dir', os.path.join(repo_path, '.git'), 30 | '--work-tree', repo_path, 31 | 'rev-list', 'master', '--count'], 32 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) 33 | raw, err = q.communicate() 34 | c_master = int(raw.decode('ascii')) 35 | cc = c_head - c_master 36 | assert cc 37 | return '%s.%s.%s' % (major, minor, cn + cc) 38 | except: 39 | return __version__ 40 | -------------------------------------------------------------------------------- /src/you_get/util/log.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This file is Python 2 compliant. 3 | 4 | from ..version import script_name 5 | 6 | import os, sys 7 | 8 | TERM = os.getenv('TERM', '') 9 | IS_ANSI_TERMINAL = TERM in ( 10 | 'eterm-color', 11 | 'linux', 12 | 'screen', 13 | 'vt100', 14 | ) or TERM.startswith('xterm') 15 | 16 | # ANSI escape code 17 | # See 18 | RESET = 0 19 | BOLD = 1 20 | UNDERLINE = 4 21 | NEGATIVE = 7 22 | NO_BOLD = 21 23 | NO_UNDERLINE = 24 24 | POSITIVE = 27 25 | BLACK = 30 26 | RED = 31 27 | GREEN = 32 28 | YELLOW = 33 29 | BLUE = 34 30 | MAGENTA = 35 31 | CYAN = 36 32 | LIGHT_GRAY = 37 33 | DEFAULT = 39 34 | BLACK_BACKGROUND = 40 35 | RED_BACKGROUND = 41 36 | GREEN_BACKGROUND = 42 37 | YELLOW_BACKGROUND = 43 38 | BLUE_BACKGROUND = 44 39 | MAGENTA_BACKGROUND = 45 40 | CYAN_BACKGROUND = 46 41 | LIGHT_GRAY_BACKGROUND = 47 42 | DEFAULT_BACKGROUND = 49 43 | DARK_GRAY = 90 # xterm 44 | LIGHT_RED = 91 # xterm 45 | LIGHT_GREEN = 92 # xterm 46 | LIGHT_YELLOW = 93 # xterm 47 | LIGHT_BLUE = 94 # xterm 48 | LIGHT_MAGENTA = 95 # xterm 49 | LIGHT_CYAN = 96 # xterm 50 | WHITE = 97 # xterm 51 | DARK_GRAY_BACKGROUND = 100 # xterm 52 | LIGHT_RED_BACKGROUND = 101 # xterm 53 | LIGHT_GREEN_BACKGROUND = 102 # xterm 54 | LIGHT_YELLOW_BACKGROUND = 103 # xterm 55 | LIGHT_BLUE_BACKGROUND = 104 # xterm 56 | LIGHT_MAGENTA_BACKGROUND = 105 # xterm 57 | LIGHT_CYAN_BACKGROUND = 106 # xterm 58 | WHITE_BACKGROUND = 107 # xterm 59 | 60 | def sprint(text, *colors): 61 | """Format text with color or other effects into ANSI escaped string.""" 62 | return "\33[{}m{content}\33[{}m".format(";".join([str(color) for color in colors]), RESET, content=text) if IS_ANSI_TERMINAL and colors else text 63 | 64 | def println(text, *colors): 65 | """Print text to standard output.""" 66 | sys.stdout.write(sprint(text, *colors) + "\n") 67 | 68 | def print_err(text, *colors): 69 | """Print text to standard error.""" 70 | sys.stderr.write(sprint(text, *colors) + "\n") 71 | 72 | def print_log(text, *colors): 73 | """Print a log message to standard error.""" 74 | sys.stderr.write(sprint("{}: {}".format(script_name, text), *colors) + "\n") 75 | 76 | def i(message): 77 | """Print a normal log message.""" 78 | print_log(message) 79 | 80 | def d(message): 81 | """Print a debug log message.""" 82 | print_log(message, BLUE) 83 | 84 | def w(message): 85 | """Print a warning log message.""" 86 | print_log(message, YELLOW) 87 | 88 | def e(message, exit_code=None): 89 | """Print an error log message.""" 90 | print_log(message, YELLOW, BOLD) 91 | if exit_code is not None: 92 | sys.exit(exit_code) 93 | 94 | def wtf(message, exit_code=1): 95 | """What a Terrible Failure!""" 96 | print_log(message, RED, BOLD) 97 | if exit_code is not None: 98 | sys.exit(exit_code) 99 | 100 | def yes_or_no(message): 101 | ans = str(input('%s (y/N) ' % message)).lower().strip() 102 | return ans == 'y' 103 | -------------------------------------------------------------------------------- /src/you_get/util/os.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from platform import system 4 | 5 | def detect_os(): 6 | """Detect operating system. 7 | """ 8 | 9 | # Inspired by: 10 | # https://github.com/scivision/pybashutils/blob/78b7f2b339cb03b1c37df94015098bbe462f8526/pybashutils/windows_linux_detect.py 11 | 12 | syst = system().lower() 13 | os = 'unknown' 14 | 15 | if 'cygwin' in syst: 16 | os = 'cygwin' 17 | elif 'darwin' in syst: 18 | os = 'mac' 19 | elif 'linux' in syst: 20 | os = 'linux' 21 | # detect WSL https://github.com/Microsoft/BashOnWindows/issues/423 22 | try: 23 | with open('/proc/version', 'r') as f: 24 | if 'microsoft' in f.read().lower(): 25 | os = 'wsl' 26 | except: pass 27 | elif 'windows' in syst: 28 | os = 'windows' 29 | elif 'bsd' in syst: 30 | os = 'bsd' 31 | 32 | return os 33 | -------------------------------------------------------------------------------- /src/you_get/util/strings.py: -------------------------------------------------------------------------------- 1 | try: 2 | # py 3.4 3 | from html import unescape as unescape_html 4 | except ImportError: 5 | import re 6 | from html.entities import entitydefs 7 | 8 | def unescape_html(string): 9 | '''HTML entity decode''' 10 | string = re.sub(r'&#[^;]+;', _sharp2uni, string) 11 | string = re.sub(r'&[^;]+;', lambda m: entitydefs[m.group(0)[1:-1]], string) 12 | return string 13 | 14 | def _sharp2uni(m): 15 | '''&#...; ==> unicode''' 16 | s = m.group(0)[2:].rstrip(';;') 17 | if s.startswith('x'): 18 | return chr(int('0'+s, 16)) 19 | else: 20 | return chr(int(s)) 21 | 22 | from .fs import legitimize 23 | 24 | def get_filename(htmlstring): 25 | return legitimize(unescape_html(htmlstring)) 26 | 27 | def parameterize(string): 28 | return "'%s'" % string.replace("'", r"'\''") 29 | -------------------------------------------------------------------------------- /src/you_get/util/term.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | def get_terminal_size(): 4 | """Get (width, height) of the current terminal.""" 5 | try: 6 | import fcntl, termios, struct # fcntl module only available on Unix 7 | return struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234')) 8 | except: 9 | return (40, 80) 10 | -------------------------------------------------------------------------------- /src/you_get/version.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | script_name = 'you-get' 4 | __version__ = '0.4.1743' 5 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/tests/__init__.py -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | 5 | from you_get.extractors import ( 6 | imgur, 7 | magisto, 8 | youtube, 9 | missevan, 10 | acfun, 11 | bilibili, 12 | soundcloud, 13 | tiktok, 14 | twitter, 15 | miaopai 16 | ) 17 | 18 | 19 | class YouGetTests(unittest.TestCase): 20 | def test_imgur(self): 21 | imgur.download('http://imgur.com/WVLk5nD', info_only=True) 22 | imgur.download('https://imgur.com/we-should-have-listened-WVLk5nD', info_only=True) 23 | 24 | def test_magisto(self): 25 | magisto.download( 26 | 'http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA', 27 | info_only=True 28 | ) 29 | 30 | #def test_youtube(self): 31 | #youtube.download( 32 | # 'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True 33 | #) 34 | #youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True) 35 | #youtube.download( 36 | # 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa 37 | # info_only=True 38 | #) 39 | #youtube.download( 40 | # 'https://www.youtube.com/watch?v=oRdxUFDoQe0', info_only=True 41 | #) 42 | 43 | def test_acfun(self): 44 | acfun.download('https://www.acfun.cn/v/ac44560432', info_only=True) 45 | 46 | #def test_bilibili(self): 47 | #bilibili.download('https://www.bilibili.com/video/BV1sL4y177sC', info_only=True) 48 | 49 | #def test_soundcloud(self): 50 | ## single song 51 | #soundcloud.download( 52 | # 'https://soundcloud.com/keiny-pham/impure-bird', info_only=True 53 | #) 54 | ## playlist 55 | #soundcloud.download( 56 | # 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True 57 | #) 58 | 59 | def test_tiktok(self): 60 | tiktok.download('https://www.tiktok.com/@zukky_48/video/7398162058153315605', info_only=True) 61 | tiktok.download('https://www.tiktok.com/@/video/7398162058153315605', info_only=True) 62 | tiktok.download('https://t.tiktok.com/i18n/share/video/7398162058153315605/', info_only=True) 63 | tiktok.download('https://vt.tiktok.com/ZSYKjKt6M/', info_only=True) 64 | 65 | def test_twitter(self): 66 | twitter.download('https://twitter.com/elonmusk/status/1530516552084234244', info_only=True) 67 | twitter.download('https://x.com/elonmusk/status/1530516552084234244', info_only=True) 68 | 69 | def test_weibo(self): 70 | miaopai.download('https://video.weibo.com/show?fid=1034:4825403706245135', info_only=True) 71 | 72 | if __name__ == '__main__': 73 | unittest.main() 74 | -------------------------------------------------------------------------------- /tests/test_common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | 5 | from you_get.common import * 6 | 7 | class TestCommon(unittest.TestCase): 8 | 9 | def test_match1(self): 10 | self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)'), '1234567890A') 11 | self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)', r'youtu.(\w+)'), ['1234567890A', 'be']) 12 | -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | 5 | from you_get.util.fs import * 6 | 7 | class TestUtil(unittest.TestCase): 8 | def test_legitimize(self): 9 | self.assertEqual(legitimize("1*2", os="linux"), "1*2") 10 | self.assertEqual(legitimize("1*2", os="mac"), "1*2") 11 | self.assertEqual(legitimize("1*2", os="windows"), "1-2") 12 | self.assertEqual(legitimize("1*2", os="wsl"), "1-2") 13 | -------------------------------------------------------------------------------- /you-get: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, sys 3 | 4 | _srcdir = '%s/src/' % os.path.dirname(os.path.realpath(__file__)) 5 | _filepath = os.path.dirname(sys.argv[0]) 6 | sys.path.insert(1, os.path.join(_filepath, _srcdir)) 7 | 8 | if sys.version_info[0] == 3: 9 | import you_get 10 | if __name__ == '__main__': 11 | you_get.main(repo_path=_filepath) 12 | else: # Python 2 13 | from you_get.util import log 14 | log.e("[fatal] Python 3 is required!") 15 | log.wtf("try to run this script using 'python3 you-get'.") 16 | -------------------------------------------------------------------------------- /you-get.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "you-get", 3 | "author": "Mort Yao", 4 | "author_email": "mort.yao@gmail.com", 5 | "url": "https://you-get.org/", 6 | "license": "MIT", 7 | 8 | "description": "Dumb downloader that scrapes the web", 9 | "keywords": "video download youtube youku niconico", 10 | 11 | "classifiers": [ 12 | "Development Status :: 4 - Beta", 13 | "Environment :: Console", 14 | "Intended Audience :: Developers", 15 | "Intended Audience :: End Users/Desktop", 16 | "License :: OSI Approved :: MIT License", 17 | "Operating System :: OS Independent", 18 | "Programming Language :: Python", 19 | "Programming Language :: Python :: 3", 20 | "Programming Language :: Python :: 3 :: Only", 21 | "Programming Language :: Python :: 3.7", 22 | "Programming Language :: Python :: 3.8", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | "Programming Language :: Python :: 3.12", 27 | "Topic :: Internet", 28 | "Topic :: Internet :: WWW/HTTP", 29 | "Topic :: Multimedia", 30 | "Topic :: Multimedia :: Graphics", 31 | "Topic :: Multimedia :: Sound/Audio", 32 | "Topic :: Multimedia :: Video", 33 | "Topic :: Utilities" 34 | ], 35 | 36 | "console_scripts": [ 37 | "you-get = you_get.__main__:main" 38 | ] 39 | } 40 | -------------------------------------------------------------------------------- /you-get.plugin.zsh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zsh 2 | alias you-get="noglob python3 $(dirname $0)/you-get" 3 | alias you-vlc="noglob python3 $(dirname $0)/you-get --player vlc" 4 | --------------------------------------------------------------------------------