├── .github
    └── workflows
    │   └── python-package.yml
├── .gitignore
├── CHANGELOG.rst
├── CONTRIBUTING.md
├── LICENSE.txt
├── MANIFEST.in
├── Makefile
├── README.md
├── README.rst
├── SECURITY.md
├── contrib
    └── completion
    │   ├── _you-get
    │   ├── you-get-completion.bash
    │   └── you-get.fish
├── requirements.txt
├── setup.cfg
├── setup.py
├── src
    └── you_get
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── cli_wrapper
    │       ├── __init__.py
    │       ├── downloader
    │       │   └── __init__.py
    │       ├── openssl
    │       │   └── __init__.py
    │       ├── player
    │       │   ├── __init__.py
    │       │   ├── __main__.py
    │       │   ├── dragonplayer.py
    │       │   ├── gnome_mplayer.py
    │       │   ├── mplayer.py
    │       │   ├── vlc.py
    │       │   └── wmp.py
    │       └── transcoder
    │       │   ├── __init__.py
    │       │   ├── ffmpeg.py
    │       │   ├── libav.py
    │       │   └── mencoder.py
    │   ├── common.py
    │   ├── extractor.py
    │   ├── extractors
    │       ├── __init__.py
    │       ├── acfun.py
    │       ├── alive.py
    │       ├── archive.py
    │       ├── baidu.py
    │       ├── bandcamp.py
    │       ├── baomihua.py
    │       ├── bigthink.py
    │       ├── bilibili.py
    │       ├── bokecc.py
    │       ├── cbs.py
    │       ├── ckplayer.py
    │       ├── cntv.py
    │       ├── coub.py
    │       ├── dailymotion.py
    │       ├── douban.py
    │       ├── douyin.py
    │       ├── douyutv.py
    │       ├── ehow.py
    │       ├── embed.py
    │       ├── facebook.py
    │       ├── fc2video.py
    │       ├── flickr.py
    │       ├── freesound.py
    │       ├── funshion.py
    │       ├── giphy.py
    │       ├── google.py
    │       ├── heavymusic.py
    │       ├── huomaotv.py
    │       ├── icourses.py
    │       ├── ifeng.py
    │       ├── imgur.py
    │       ├── infoq.py
    │       ├── instagram.py
    │       ├── interest.py
    │       ├── iqilu.py
    │       ├── iqiyi.py
    │       ├── iwara.py
    │       ├── ixigua.py
    │       ├── joy.py
    │       ├── kakao.py
    │       ├── khan.py
    │       ├── ku6.py
    │       ├── kuaishou.py
    │       ├── kugou.py
    │       ├── kuwo.py
    │       ├── le.py
    │       ├── lizhi.py
    │       ├── longzhu.py
    │       ├── lrts.py
    │       ├── magisto.py
    │       ├── metacafe.py
    │       ├── mgtv.py
    │       ├── miaopai.py
    │       ├── miomio.py
    │       ├── missevan.py
    │       ├── mixcloud.py
    │       ├── mtv81.py
    │       ├── nanagogo.py
    │       ├── naver.py
    │       ├── netease.py
    │       ├── nicovideo.py
    │       ├── pinterest.py
    │       ├── pixnet.py
    │       ├── pptv.py
    │       ├── qie.py
    │       ├── qie_video.py
    │       ├── qingting.py
    │       ├── qq.py
    │       ├── qq_egame.py
    │       ├── showroom.py
    │       ├── sina.py
    │       ├── sohu.py
    │       ├── soundcloud.py
    │       ├── suntv.py
    │       ├── ted.py
    │       ├── theplatform.py
    │       ├── tiktok.py
    │       ├── toutiao.py
    │       ├── tucao.py
    │       ├── tudou.py
    │       ├── tumblr.py
    │       ├── twitter.py
    │       ├── ucas.py
    │       ├── universal.py
    │       ├── veoh.py
    │       ├── vimeo.py
    │       ├── vk.py
    │       ├── w56.py
    │       ├── wanmen.py
    │       ├── ximalaya.py
    │       ├── xinpianchang.py
    │       ├── yixia.py
    │       ├── yizhibo.py
    │       ├── youku.py
    │       ├── youtube.py
    │       ├── zhanqi.py
    │       ├── zhibo.py
    │       └── zhihu.py
    │   ├── json_output.py
    │   ├── processor
    │       ├── __init__.py
    │       ├── ffmpeg.py
    │       ├── join_flv.py
    │       ├── join_mp4.py
    │       ├── join_ts.py
    │       └── rtmpdump.py
    │   ├── util
    │       ├── __init__.py
    │       ├── fs.py
    │       ├── git.py
    │       ├── log.py
    │       ├── os.py
    │       ├── strings.py
    │       └── term.py
    │   └── version.py
├── tests
    ├── __init__.py
    ├── test.py
    ├── test_common.py
    └── test_util.py
├── you-get
├── you-get.json
└── you-get.plugin.zsh


/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | 
 3 | name: develop
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ develop ]
 8 |   pull_request:
 9 |     branches: [ develop ]
10 | 
11 | jobs:
12 |   build:
13 | 
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: [3.8, 3.9, '3.10', '3.11', '3.12', '3.13', pypy-3.8, pypy-3.9, pypy-3.10]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v4
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v5
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip setuptools
29 |         pip install flake8
30 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
31 |     - name: Lint with flake8
32 |       run: |
33 |         # stop the build if there are Python syntax errors or undefined names
34 |         flake8 . --count --select=E9,F63,F7,F82 --ignore=F824 --show-source --statistics
35 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
36 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
37 |     - name: Test with unittest
38 |       run: |
39 |         make test
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | # Misc
62 | _*
63 | *_
64 | *.3gp
65 | *.asf
66 | *.download
67 | *.f4v
68 | *.flv
69 | *.gif
70 | *.html
71 | *.jpg
72 | *.lrc
73 | *.mkv
74 | *.mp3
75 | *.mp4
76 | *.mpg
77 | *.png
78 | *.srt
79 | *.ts
80 | *.webm
81 | *.xml
82 | *.json
83 | /.env
84 | /.idea
85 | *.m4a
86 | *.DS_Store
87 | *.txt
88 | *.sw[a-p]
89 | 
90 | *.zip
91 | 
92 | .emacs*
93 | .vscode
94 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Report an Issue
 2 | 
 3 | If you would like to report a problem you find when using `you-get`, please open a [Pull Request](https://github.com/soimort/you-get/pulls), which should include:
 4 | 
 5 | 1. A detailed description of the encountered problem;
 6 | 2. At least one commit, addressing the problem through some unit test(s).
 7 |    * Examples of good commits: [#2675](https://github.com/soimort/you-get/pull/2675/files), [#2680](https://github.com/soimort/you-get/pull/2680/files), [#2685](https://github.com/soimort/you-get/pull/2685/files)
 8 | 
 9 | PRs that fail to meet the above criteria may be closed summarily with no further action.
10 | 
11 | A valid PR will remain open until its addressed problem is fixed.
12 | 
13 | 
14 | 
15 | # 如何汇报问题
16 | 
17 | 为了防止对 GitHub Issues 的滥用，本项目不接受一般的 Issue。
18 | 
19 | 如您在使用 `you-get` 的过程中发现任何问题，请开启一个 [Pull Request](https://github.com/soimort/you-get/pulls)。该 PR 应当包含：
20 | 
21 | 1. 详细的问题描述；
22 | 2. 至少一个 commit，其内容是**与问题相关的**单元测试。**不要通过随意修改无关文件的方式来提交 PR！**
23 |    * 有效的 commit 示例：[#2675](https://github.com/soimort/you-get/pull/2675/files), [#2680](https://github.com/soimort/you-get/pull/2680/files), [#2685](https://github.com/soimort/you-get/pull/2685/files)
24 | 
25 | 不符合以上条件的 PR 可能被直接关闭。
26 | 
27 | 有效的 PR 将会被一直保留，直至相应的问题得以修复。
28 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2012-2024 Mort Yao <mort.yao@gmail.com> and other contributors
 4 |               (https://github.com/soimort/you-get/graphs/contributors)
 5 | Copyright (c) 2012 Boyu Guo <iambus@gmail.com>
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.rst
 2 | include *.txt
 3 | include Makefile
 4 | include CONTRIBUTING.md
 5 | include README.md
 6 | include you-get
 7 | include you-get.json
 8 | include you-get.plugin.zsh
 9 | recursive-include contrib *
10 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: default i test clean all html rst build install release
 2 | 
 3 | default: i
 4 | 
 5 | i:
 6 | 	@(cd src; python -i -c 'import you_get; print("You-Get %s\n>>> import you_get" % you_get.version.__version__)')
 7 | 
 8 | test:
 9 | 	(cd src; python -m unittest discover -s ../tests)
10 | 
11 | clean:
12 | 	zenity --question
13 | 	rm -fr build/ dist/ src/*.egg-info/
14 | 	find . | grep __pycache__ | xargs rm -fr
15 | 	find . | grep .pyc | xargs rm -f
16 | 
17 | all: build
18 | 
19 | html:
20 | 	pandoc README.md > README.html
21 | 
22 | rst:
23 | 	pandoc -s -t rst README.md > README.rst
24 | 
25 | build:
26 | 	python -m build
27 | 
28 | install:
29 | 	python -m pip install .
30 | 
31 | release: build
32 | 	@echo 'Upload new version to PyPI using:'
33 | 	@echo '	twine upload --sign dist/you_get-VERSION*'
34 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | You-Get
 2 | =======
 3 | 
 4 | |PyPI version| |Build Status| |Gitter|
 5 | 
 6 | `You-Get <https://you-get.org/>`__ is a tiny command-line utility to
 7 | download media contents (videos, audios, images) from the Web, in case
 8 | there is no other handy way to do it.
 9 | 
10 | Here's how you use ``you-get`` to download a video from `this web
11 | page <http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society>`__:
12 | 
13 | .. code:: console
14 | 
15 |     $ you-get http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society
16 |     Site:       fsf.org
17 |     Title:      TEDxGE2014_Stallman05_LQ
18 |     Type:       WebM video (video/webm)
19 |     Size:       27.12 MiB (28435804 Bytes)
20 | 
21 |     Downloading TEDxGE2014_Stallman05_LQ.webm ...
22 |     100.0% ( 27.1/27.1 MB) ├████████████████████████████████████████┤[1/1]   12 MB/s
23 | 
24 | And here's why you might want to use it:
25 | 
26 | -  You enjoyed something on the Internet, and just want to download them
27 |    for your own pleasure.
28 | -  You watch your favorite videos online from your computer, but you are
29 |    prohibited from saving them. You feel that you have no control over
30 |    your own computer. (And it's not how an open Web is supposed to
31 |    work.)
32 | -  You want to get rid of any closed-source technology or proprietary
33 |    JavaScript code, and disallow things like Flash running on your
34 |    computer.
35 | -  You are an adherent of hacker culture and free software.
36 | 
37 | What ``you-get`` can do for you:
38 | 
39 | -  Download videos / audios from popular websites such as YouTube,
40 |    Youku, Niconico, and a bunch more. (See the `full list of supported
41 |    sites <#supported-sites>`__)
42 | -  Stream an online video in your media player. No web browser, no more
43 |    ads.
44 | -  Download images (of interest) by scraping a web page.
45 | -  Download arbitrary non-HTML contents, i.e., binary files.
46 | 
47 | Interested? `Install it <#installation>`__ now and `get started by
48 | examples <#getting-started>`__.
49 | 
50 | Are you a Python programmer? Then check out `the
51 | source <https://github.com/soimort/you-get>`__ and fork it!
52 | 
53 | .. |PyPI version| image:: https://badge.fury.io/py/you-get.png
54 |    :target: http://badge.fury.io/py/you-get
55 | .. |Build Status| image:: https://github.com/soimort/you-get/workflows/develop/badge.svg
56 |    :target: https://github.com/soimort/you-get/actions
57 | .. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg
58 |    :target: https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
59 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 | 
3 | ## Reporting a Vulnerability
4 | 
5 | Please report security issues to <mort.yao+you-get@gmail.com>.
6 | 


--------------------------------------------------------------------------------
/contrib/completion/_you-get:
--------------------------------------------------------------------------------
 1 | #compdef you-get
 2 | 
 3 | # Zsh completion definition for soimort/you-get.
 4 | 
 5 | setopt localoptions noshwordsplit noksharrays
 6 | local -a args
 7 | 
 8 | args=(
 9 |     '(- : *)'{-V,--version}'[print version and exit]'
10 |     '(- : *)'{-h,--help}'[print help and exit]'
11 |     '(-i --info)'{-i,--info}'[print extracted information]'
12 |     '(-u --url)'{-u,--url}'[print extracted information with URLs]'
13 |     '(--json)--json[print extracted URLs in JSON format]'
14 |     '(-n --no-merge)'{-n,--no-merge}'[do not merge video parts]'
15 |     '(--no-caption)--no-caption[do not download captions]'
16 |     '(-f --force)'{-f,--force}'[force overwrite existing files]'
17 |     '(-F --format)'{-F,--format}'[set video format to the specified stream id]:stream id'
18 |     '(-O --output-filename)'{-O,--output-filename}'[set output filename]:filename:_files'
19 |     '(-o --output-dir)'{-o,--output-dir}'[set output directory]:directory:_files -/'
20 |     '(-p --player)'{-p,--player}'[stream extracted URL to the specified player]:player and options'
21 |     '(-c --cookies)'{-c,--cookies}'[load cookies.txt or cookies.sqlite]:cookies file:_files'
22 |     '(-x --http-proxy)'{-x,--http-proxy}'[use the specified HTTP proxy for downloading]:host\:port:'
23 |     '(-y --extractor-proxy)'{-y,--extractor-proxy}'[use the specified HTTP proxy for extraction only]:host\:port'
24 |     '(--no-proxy)--no-proxy[do not use a proxy]'
25 |     '(-t --timeout)'{-t,--timeout}'[set socket timeout]:seconds'
26 |     '(-d --debug)'{-d,--debug}'[show traceback and other debug info]'
27 |     '*: :_guard "^-*" url'
28 | )
29 | _arguments -S -s $args
30 | 


--------------------------------------------------------------------------------
/contrib/completion/you-get-completion.bash:
--------------------------------------------------------------------------------
 1 | # Bash completion definition for you-get.
 2 | 
 3 | _you-get () {
 4 |     COMPREPLY=()
 5 |     local IFS=$' \n'
 6 |     local cur=$2 prev=$3
 7 |     local -a opts_without_arg opts_with_arg
 8 |     opts_without_arg=(
 9 |         -V --version -h --help -i --info -u --url --json -n --no-merge
10 |         --no-caption -f --force --no-proxy -d --debug
11 |     )
12 |     opts_with_arg=(
13 |         -F --format -O --output-filename -o --output-dir -p --player
14 |         -c --cookies -x --http-proxy -y --extractor-proxy -t --timeout
15 |     )
16 | 
17 |     # Do not complete non option names
18 |     [[ $cur == -* ]] || return 1
19 | 
20 |     # Do not complete when the previous arg is an option expecting an argument
21 |     for opt in "${opts_with_arg[@]}"; do
22 |         [[ $opt == $prev ]] && return 1
23 |     done
24 | 
25 |     # Complete option names
26 |     COMPREPLY=( $(compgen -W "${opts_without_arg[*]} ${opts_with_arg[*]}" \
27 |                           -- "$cur") )
28 |     return 0
29 | }
30 | 
31 | complete -F _you-get you-get
32 | 


--------------------------------------------------------------------------------
/contrib/completion/you-get.fish:
--------------------------------------------------------------------------------
 1 | # Fish completion definition for you-get.
 2 | 
 3 | complete -c you-get -s V -l version -d 'print version and exit'
 4 | complete -c you-get -s h -l help -d 'print help and exit'
 5 | complete -c you-get -s i -l info -d 'print extracted information'
 6 | complete -c you-get -s u -l url -d 'print extracted information'
 7 | complete -c you-get -l json -d 'print extracted URLs in JSON format'
 8 | complete -c you-get -s n -l no-merge -d 'do not merge video parts'
 9 | complete -c you-get -l no-caption -d 'do not download captions'
10 | complete -c you-get -s f -l force -d 'force overwrite existing files'
11 | complete -c you-get -s F -l format -x -d 'set video format to the specified stream id'
12 | complete -c you-get -s O -l output-filename -d 'set output filename' \
13 |          -x -a '(__fish_complete_path (commandline -ct) "output filename")'
14 | complete -c you-get -s o -l output-dir  -d 'set output directory' \
15 |          -x -a '(__fish_complete_directories (commandline -ct) "output directory")'
16 | complete -c you-get -s p -l player -x -d 'stream extracted URL to the specified player'
17 | complete -c you-get -s c -l cookies -d 'load cookies.txt or cookies.sqlite' \
18 |          -x -a '(__fish_complete_path (commandline -ct) "cookies.txt or cookies.sqlite")'
19 | complete -c you-get -s x -l http-proxy -x -d 'use the specified HTTP proxy for downloading'
20 | complete -c you-get -s y -l extractor-proxy -x -d 'use the specified HTTP proxy for extraction only'
21 | complete -c you-get -l no-proxy -d 'do not use a proxy'
22 | complete -c you-get -s t -l timeout -x -d 'set socket timeout'
23 | complete -c you-get -s d -l debug -d 'show traceback and other debug info'
24 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # runtime dependencies
2 | dukpy
3 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [build]
 2 | force = 0
 3 | 
 4 | [global]
 5 | verbose = 0
 6 | 
 7 | [egg_info]
 8 | tag_build = 
 9 | tag_date = 0
10 | tag_svn_revision = 0
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | PROJ_NAME = 'you-get'
 4 | PACKAGE_NAME = 'you_get'
 5 | 
 6 | PROJ_METADATA = '%s.json' % PROJ_NAME
 7 | 
 8 | import importlib.util
 9 | import importlib.machinery
10 | 
11 | def load_source(modname, filename):
12 |     loader = importlib.machinery.SourceFileLoader(modname, filename)
13 |     spec = importlib.util.spec_from_file_location(modname, filename, loader=loader)
14 |     module = importlib.util.module_from_spec(spec)
15 |     # The module is always executed and not cached in sys.modules.
16 |     # Uncomment the following line to cache the module.
17 |     # sys.modules[module.__name__] = module
18 |     loader.exec_module(module)
19 |     return module
20 | 
21 | import os, json
22 | here = os.path.abspath(os.path.dirname(__file__))
23 | proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read())
24 | try:
25 |     README = open(os.path.join(here, 'README.rst'), encoding='utf-8').read()
26 | except:
27 |     README = ""
28 | CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst'), encoding='utf-8').read()
29 | VERSION = load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__
30 | 
31 | from setuptools import setup, find_packages
32 | setup(
33 |     name = proj_info['name'],
34 |     version = VERSION,
35 | 
36 |     author = proj_info['author'],
37 |     author_email = proj_info['author_email'],
38 |     url = proj_info['url'],
39 |     license = proj_info['license'],
40 | 
41 |     description = proj_info['description'],
42 |     keywords = proj_info['keywords'],
43 | 
44 |     long_description = README,
45 | 
46 |     packages = find_packages('src'),
47 |     package_dir = {'' : 'src'},
48 | 
49 |     test_suite = 'tests',
50 | 
51 |     platforms = 'any',
52 |     zip_safe = True,
53 |     include_package_data = True,
54 | 
55 |     classifiers = proj_info['classifiers'],
56 | 
57 |     entry_points = {'console_scripts': proj_info['console_scripts']},
58 | 
59 |     install_requires = ['dukpy'],
60 |     extras_require = {
61 |         'socks': ['PySocks'],
62 |     }
63 | )
64 | 


--------------------------------------------------------------------------------
/src/you_get/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # This file is Python 2 compliant.
 3 | 
 4 | import sys
 5 | 
 6 | if sys.version_info[0] == 3:
 7 |     #from .extractor import Extractor, VideoExtractor
 8 |     #from .util import log
 9 | 
10 |     from .__main__ import *
11 | 
12 |     #from .common import *
13 |     #from .version import *
14 |     #from .cli_wrapper import *
15 |     #from .extractor import *
16 | else:
17 |     # Don't import anything.
18 |     pass
19 | 


--------------------------------------------------------------------------------
/src/you_get/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import getopt
 4 | import os
 5 | import platform
 6 | import sys
 7 | from .version import script_name, __version__
 8 | from .util import git, log
 9 | 
10 | _options = [
11 |     'help',
12 |     'version',
13 |     'gui',
14 |     'force',
15 |     'playlists',
16 | ]
17 | _short_options = 'hVgfl'
18 | 
19 | _help = """Usage: {} [OPTION]... [URL]...
20 | TODO
21 | """.format(script_name)
22 | 
23 | # TBD
24 | def main_dev(**kwargs):
25 |     """Main entry point.
26 |     you-get-dev
27 |     """
28 | 
29 |     # Get (branch, commit) if running from a git repo.
30 |     head = git.get_head(kwargs['repo_path'])
31 | 
32 |     # Get options and arguments.
33 |     try:
34 |         opts, args = getopt.getopt(sys.argv[1:], _short_options, _options)
35 |     except getopt.GetoptError as e:
36 |         log.wtf("""
37 |     [Fatal] {}.
38 |     Try '{} --help' for more options.""".format(e, script_name))
39 | 
40 |     if not opts and not args:
41 |         # Display help.
42 |         print(_help)
43 |         # Enter GUI mode.
44 |         #from .gui import gui_main
45 |         #gui_main()
46 |     else:
47 |         conf = {}
48 |         for opt, arg in opts:
49 |             if opt in ('-h', '--help'):
50 |                 # Display help.
51 |                 print(_help)
52 | 
53 |             elif opt in ('-V', '--version'):
54 |                 # Display version.
55 |                 log.println("you-get:", log.BOLD)
56 |                 log.println("    version:  {}".format(__version__))
57 |                 if head is not None:
58 |                     log.println("    branch:   {}\n    commit:   {}".format(*head))
59 |                 else:
60 |                     log.println("    branch:   {}\n    commit:   {}".format("(stable)", "(tag v{})".format(__version__)))
61 | 
62 |                 log.println("    platform: {}".format(platform.platform()))
63 |                 log.println("    python:   {}".format(sys.version.split('\n')[0]))
64 | 
65 |             elif opt in ('-g', '--gui'):
66 |                 # Run using GUI.
67 |                 conf['gui'] = True
68 | 
69 |             elif opt in ('-f', '--force'):
70 |                 # Force download.
71 |                 conf['force'] = True
72 | 
73 |             elif opt in ('-l', '--playlist', '--playlists'):
74 |                 # Download playlist whenever possible.
75 |                 conf['playlist'] = True
76 | 
77 |         if args:
78 |             if 'gui' in conf and conf['gui']:
79 |                 # Enter GUI mode.
80 |                 from .gui import gui_main
81 |                 gui_main(*args, **conf)
82 |             else:
83 |                 # Enter console mode.
84 |                 from .console import console_main
85 |                 console_main(*args, **conf)
86 | 
87 | def main(**kwargs):
88 |     """Main entry point.
89 |     you-get (legacy)
90 |     """
91 |     from .common import main
92 |     main(**kwargs)
93 | 
94 | if __name__ == '__main__':
95 |     main()
96 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/__init__.py


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/downloader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/downloader/__init__.py


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/openssl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/openssl/__init__.py


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | from .mplayer import *
4 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | ''' WIP
 4 | def main():
 5 |     script_main('you-get', any_download, any_download_playlist)
 6 | 
 7 | if __name__ == "__main__":
 8 |     main()
 9 | '''
10 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/dragonplayer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/player/dragonplayer.py


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/gnome_mplayer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/player/gnome_mplayer.py


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/mplayer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/player/mplayer.py


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/vlc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/player/wmp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/player/wmp.py


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/transcoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/transcoder/__init__.py


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/transcoder/ffmpeg.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/transcoder/ffmpeg.py


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/transcoder/libav.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/transcoder/libav.py


--------------------------------------------------------------------------------
/src/you_get/cli_wrapper/transcoder/mencoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/cli_wrapper/transcoder/mencoder.py


--------------------------------------------------------------------------------
/src/you_get/extractors/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from .acfun import *
 4 | from .alive import *
 5 | from .archive import *
 6 | from .baidu import *
 7 | from .bandcamp import *
 8 | from .bigthink import *
 9 | from .bilibili import *
10 | from .bokecc import *
11 | from .cbs import *
12 | from .ckplayer import *
13 | from .cntv import *
14 | from .coub import *
15 | from .dailymotion import *
16 | from .douban import *
17 | from .douyin import *
18 | from .douyutv import *
19 | from .ehow import *
20 | from .facebook import *
21 | from .fc2video import *
22 | from .flickr import *
23 | from .freesound import *
24 | from .funshion import *
25 | from .google import *
26 | from .heavymusic import *
27 | from .icourses import *
28 | from .ifeng import *
29 | from .imgur import *
30 | from .infoq import *
31 | from .instagram import *
32 | from .interest import *
33 | from .iqilu import *
34 | from .iqiyi import *
35 | from .joy import *
36 | from .khan import *
37 | from .ku6 import *
38 | from .kakao import *
39 | from .kuaishou import *
40 | from .kugou import *
41 | from .kuwo import *
42 | from .le import *
43 | from .lizhi import *
44 | from .longzhu import *
45 | from .magisto import *
46 | from .metacafe import *
47 | from .mgtv import *
48 | from .miaopai import *
49 | from .miomio import *
50 | from .mixcloud import *
51 | from .mtv81 import *
52 | from .nanagogo import *
53 | from .naver import *
54 | from .netease import *
55 | from .nicovideo import *
56 | from .pinterest import *
57 | from .pixnet import *
58 | from .pptv import *
59 | from .qie import *
60 | from .qingting import *
61 | from .qq import *
62 | from .showroom import *
63 | from .sina import *
64 | from .sohu import *
65 | from .soundcloud import *
66 | from .suntv import *
67 | from .ted import *
68 | from .theplatform import *
69 | from .tiktok import *
70 | from .tucao import *
71 | from .tudou import *
72 | from .tumblr import *
73 | from .twitter import *
74 | from .ucas import *
75 | from .veoh import *
76 | from .vimeo import *
77 | from .vk import *
78 | from .w56 import *
79 | from .wanmen import *
80 | from .xinpianchang import *
81 | from .yixia import *
82 | from .youku import *
83 | from .youtube import *
84 | from .zhanqi import *
85 | from .zhibo import *
86 | from .zhihu import *
87 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/alive.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['alive_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def alive_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 8 |     html = get_html(url)
 9 |     
10 |     title = r1(r'<meta property="og:title" content="([^"]+)"', html)
11 |     
12 |     url = r1(r'file: "(http://alive[^"]+)"', html)
13 |     type, ext, size = url_info(url)
14 |     
15 |     print_info(site_info, title, type, size)
16 |     if not info_only:
17 |         download_urls([url], title, ext, size, output_dir, merge = merge)
18 | 
19 | site_info = "Alive.in.th"
20 | download = alive_download
21 | download_playlist = playlist_not_supported('alive')
22 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/archive.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['archive_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def archive_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     html = get_html(url)
 9 |     title = r1(r'<meta property="og:title" content="([^"]*)"', html)
10 |     source = r1(r'<meta property="og:video" content="([^"]*)"', html)
11 |     mime, ext, size = url_info(source)
12 | 
13 |     print_info(site_info, title, mime, size)
14 |     if not info_only:
15 |         download_urls([source], title, ext, size, output_dir, merge=merge)
16 | 
17 | site_info = "Archive.org"
18 | download = archive_download
19 | download_playlist = playlist_not_supported('archive')
20 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/bandcamp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['bandcamp_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def bandcamp_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     html = get_html(url)
 9 |     trackinfo = json.loads(r1(r'(\[{"(video_poster_url|video_caption)".*}\]),', html))
10 |     for track in trackinfo:
11 |         track_num = track['track_num']
12 |         title = '%s. %s' % (track_num, track['title'])
13 |         file_url = 'http:' + track['file']['mp3-128']
14 |         mime, ext, size = url_info(file_url)
15 | 
16 |         print_info(site_info, title, mime, size)
17 |         if not info_only:
18 |             download_urls([file_url], title, ext, size, output_dir, merge=merge)
19 | 
20 | site_info = "Bandcamp.com"
21 | download = bandcamp_download
22 | download_playlist = bandcamp_download
23 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/baomihua.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['baomihua_download', 'baomihua_download_by_id']
 4 | 
 5 | from ..common import *
 6 | 
 7 | import urllib
 8 | 
 9 | def baomihua_headers(referer=None, cookie=None):
10 | 	# a reasonable UA
11 | 	ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
12 | 	headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
13 | 	if referer is not None:
14 | 		headers.update({'Referer': referer})
15 | 	if cookie is not None:
16 | 		headers.update({'Cookie': cookie})
17 | 	return headers
18 | 	
19 | def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
20 |     html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id)
21 |     host = r1(r'host=([^&]*)', html)
22 |     assert host
23 |     type = r1(r'videofiletype=([^&]*)', html)
24 |     assert type
25 |     vid = r1(r'&stream_name=([^&]*)', html)
26 |     assert vid
27 |     dir_str = r1(r'&dir=([^&]*)', html).strip()
28 |     url = "http://%s/%s/%s.%s" % (host, dir_str, vid, type)
29 |     _, ext, size = url_info(url, headers=baomihua_headers())
30 |     print_info(site_info, title, type, size)
31 |     if not info_only:
32 |         download_urls([url], title, ext, size, output_dir, merge = merge, headers=baomihua_headers())
33 | 
34 | def baomihua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
35 |     html = get_html(url)
36 |     title = r1(r'<title>(.*)</title>', html)
37 |     assert title
38 |     id = r1(r'flvid\s*=\s*(\d+)', html)
39 |     assert id
40 |     baomihua_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
41 | 
42 | site_info = "baomihua.com"
43 | download = baomihua_download
44 | download_playlist = playlist_not_supported('baomihua')
45 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/bigthink.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from ..extractor import VideoExtractor
 5 | 
 6 | import json
 7 | 
 8 | class Bigthink(VideoExtractor):
 9 |     name = "Bigthink"
10 | 
11 |     stream_types = [  #this is just a sample. Will make it in prepare()
12 |         # {'id': '1080'},
13 |         # {'id': '720'},
14 |         # {'id': '360'},
15 |         # {'id': '288'},
16 |         # {'id': '190'},
17 |         # {'id': '180'},
18 |         
19 |     ]
20 | 
21 |     @staticmethod
22 |     def get_streams_by_id(account_number, video_id):
23 |         """
24 |         int, int->list
25 |         
26 |         Get the height of the videos.
27 |         
28 |         Since brightcove is using 3 kinds of links: rtmp, http and https,
29 |         we will be using the HTTPS one to make it secure.
30 |         
31 |         If somehow akamaihd.net is blocked by the Great Fucking Wall,
32 |         change the "startswith https" to http.
33 |         """
34 |         endpoint = 'https://edge.api.brightcove.com/playback/v1/accounts/{account_number}/videos/{video_id}'.format(account_number = account_number, video_id = video_id)
35 |         fake_header_id = fake_headers
36 |         #is this somehow related to the time? Magic....
37 |         fake_header_id['Accept'] ='application/json;pk=BCpkADawqM1cc6wmJQC2tvoXZt4mrB7bFfi6zGt9QnOzprPZcGLE9OMGJwspQwKfuFYuCjAAJ53JdjI8zGFx1ll4rxhYJ255AXH1BQ10rnm34weknpfG-sippyQ'
38 | 
39 |         html = get_content(endpoint, headers= fake_header_id)
40 |         html_json = json.loads(html)
41 | 
42 |         link_list = []
43 | 
44 |         for i in html_json['sources']:
45 |             if 'src' in i:  #to avoid KeyError
46 |                 if i['src'].startswith('https'):
47 |                     link_list.append((str(i['height']), i['src']))
48 | 
49 |         return link_list
50 | 
51 |     def prepare(self, **kwargs):
52 | 
53 |         html = get_content(self.url)
54 | 
55 |         self.title = match1(html, r'<meta property="og:title" content="([^"]*)"')
56 | 
57 |         account_number = match1(html, r'data-account="(\d+)"')
58 | 
59 |         video_id = match1(html, r'data-brightcove-id="(\d+)"')
60 |         
61 |         assert account_number, video_id
62 | 
63 |         link_list = self.get_streams_by_id(account_number, video_id)
64 | 
65 |         for i in link_list:
66 |             self.stream_types.append({'id': str(i[0])})
67 |             self.streams[i[0]] = {'url': i[1]}
68 | 
69 |     def extract(self, **kwargs):
70 |         for i in self.streams:
71 |             s = self.streams[i]
72 |             _, s['container'], s['size'] = url_info(s['url'])
73 |             s['src'] = [s['url']]
74 | 
75 | site = Bigthink()
76 | download = site.download_by_url
77 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/bokecc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from ..extractor import VideoExtractor
 5 | import xml.etree.ElementTree as ET
 6 | 
 7 | class BokeCC(VideoExtractor):
 8 |     name = "BokeCC"
 9 | 
10 |     stream_types = [  # we do now know for now, as we have to check the
11 |                       # output from the API
12 |     ]
13 | 
14 |     API_ENDPOINT = 'http://p.bokecc.com/'
15 | 
16 | 
17 |     def download_by_id(self, vid = '', title = None, output_dir='.', merge=True, info_only=False,**kwargs):
18 |         """self, str->None
19 |         
20 |         Keyword arguments:
21 |         self: self
22 |         vid: The video ID for BokeCC cloud, something like
23 |         FE3BB999594978049C33DC5901307461
24 |         
25 |         Calls the prepare() to download the video.
26 |         
27 |         If no title is provided, this method shall try to find a proper title
28 |         with the information providin within the
29 |         returned content of the API."""
30 | 
31 |         assert vid
32 | 
33 |         self.prepare(vid = vid, title = title, **kwargs)
34 | 
35 |         self.extract(**kwargs)
36 | 
37 |         self.download(output_dir = output_dir, 
38 |                     merge = merge, 
39 |                     info_only = info_only, **kwargs)
40 | 
41 |     def prepare(self, vid = '', title = None, **kwargs):
42 |         assert vid
43 | 
44 |         api_url = self.API_ENDPOINT + \
45 |             'servlet/playinfo?vid={vid}&m=0'.format(vid = vid)  #return XML
46 | 
47 |         html = get_content(api_url)
48 |         self.tree = ET.ElementTree(ET.fromstring(html))
49 | 
50 |         if self.tree.find('result').text != '1':
51 |             log.wtf('API result says failed!')
52 |             raise 
53 | 
54 |         if title is None:
55 |             self.title = '_'.join([i.text for i in self.tree.iterfind('video/videomarks/videomark/markdesc')])
56 |         else:
57 |             self.title = title
58 | 
59 |         if not title:
60 |             self.title = vid
61 | 
62 |         for i in self.tree.iterfind('video/quality'):
63 |             quality = i.attrib ['value']
64 |             url = i[0].attrib['playurl']
65 |             self.stream_types.append({'id': quality,
66 |                                       'video_profile': i.attrib ['desp']})
67 |             self.streams[quality] = {'url': url,
68 |                                      'video_profile': i.attrib ['desp']}
69 |             self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
70 | 
71 | 
72 |     def extract(self, **kwargs):
73 |         for i in self.streams:
74 |             s = self.streams[i]
75 |             _, s['container'], s['size'] = url_info(s['url'])
76 |             s['src'] = [s['url']]
77 |         if 'stream_id' in kwargs and kwargs['stream_id']:
78 |             # Extract the stream
79 |             stream_id = kwargs['stream_id']
80 | 
81 |             if stream_id not in self.streams:
82 |                 log.e('[Error] Invalid video format.')
83 |                 log.e('Run \'-i\' command with no specific video format to view all available formats.')
84 |                 exit(2)
85 |         else:
86 |             # Extract stream with the best quality
87 |             stream_id = self.streams_sorted[0]['id']
88 |             _, s['container'], s['size'] = url_info(s['url'])
89 |             s['src'] = [s['url']]
90 | 
91 | site = BokeCC()
92 | 
93 | # I don't know how to call the player directly so I just put it here
94 | # just in case anyone touchs it -- Beining@Aug.24.2016
95 | #download = site.download_by_url
96 | #download_playlist = site.download_by_url
97 | 
98 | bokecc_download_by_id = site.download_by_id
99 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/cbs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['cbs_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | from .theplatform import theplatform_download_by_pid
 8 | 
 9 | def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
10 |     """Downloads CBS videos by URL.
11 |     """
12 | 
13 |     html = get_content(url)
14 |     pid = match1(html, r'video\.settings\.pid\s*=\s*\'([^\']+)\'')
15 |     title = match1(html, r'video\.settings\.title\s*=\s*\"([^\"]+)\"')
16 | 
17 |     theplatform_download_by_pid(pid, title, output_dir=output_dir, merge=merge, info_only=info_only)
18 | 
19 | site_info = "CBS.com"
20 | download = cbs_download
21 | download_playlist = playlist_not_supported('cbs')
22 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ckplayer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #coding:utf-8
  3 | # Author:  Beining --<i@cnbeining.com>
  4 | # Purpose: A general extractor for CKPlayer
  5 | # Created: 03/15/2016
  6 | 
  7 | __all__ = ['ckplayer_download']
  8 | 
  9 | from xml.etree import ElementTree as ET
 10 | from copy import copy
 11 | from ..common import *
 12 | #----------------------------------------------------------------------
 13 | def ckplayer_get_info_by_xml(ckinfo):
 14 |     """str->dict
 15 |     Information for CKPlayer API content."""
 16 |     e = ET.XML(ckinfo)
 17 |     video_dict = {'title': '',
 18 |                   #'duration': 0,
 19 |                   'links': [],
 20 |                   'size': 0,
 21 |                   'flashvars': '',}
 22 |     dictified = dictify(e)['ckplayer']
 23 |     if 'info' in dictified:
 24 |         if '_text' in dictified['info'][0]['title'][0]:  #title
 25 |             video_dict['title'] = dictified['info'][0]['title'][0]['_text'].strip()
 26 | 
 27 |     #if dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip():  #duration
 28 |         #video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip()
 29 | 
 30 |     if '_text' in dictified['video'][0]['size'][0]:  #size exists for 1 piece
 31 |         video_dict['size'] = sum([int(i['size'][0]['_text']) for i in dictified['video']])
 32 | 
 33 |     if '_text' in dictified['video'][0]['file'][0]:  #link exist
 34 |         video_dict['links'] = [i['file'][0]['_text'].strip() for i in dictified['video']]
 35 | 
 36 |     if '_text' in dictified['flashvars'][0]:
 37 |         video_dict['flashvars'] = dictified['flashvars'][0]['_text'].strip()
 38 | 
 39 |     return video_dict
 40 | 
 41 | #----------------------------------------------------------------------
 42 | #helper
 43 | #https://stackoverflow.com/questions/2148119/how-to-convert-an-xml-string-to-a-dictionary-in-python
 44 | def dictify(r,root=True):
 45 |     if root:
 46 |         return {r.tag : dictify(r, False)}
 47 |     d=copy(r.attrib)
 48 |     if r.text:
 49 |         d["_text"]=r.text
 50 |     for x in r.findall("./*"):
 51 |         if x.tag not in d:
 52 |             d[x.tag]=[]
 53 |         d[x.tag].append(dictify(x,False))
 54 |     return d
 55 | 
 56 | #----------------------------------------------------------------------
 57 | def ckplayer_download_by_xml(ckinfo, output_dir = '.', merge = False, info_only = False, **kwargs):
 58 |     #Info XML
 59 |     video_info = ckplayer_get_info_by_xml(ckinfo)
 60 |     
 61 |     try:
 62 |         title = kwargs['title']
 63 |     except:
 64 |         title = ''
 65 |     type_ = ''
 66 |     size = 0
 67 |     
 68 |     if len(video_info['links']) > 0:  #has link
 69 |         type_, _ext, size = url_info(video_info['links'][0])  #use 1st to determine type, ext
 70 |     
 71 |     if 'size' in video_info:
 72 |         size = int(video_info['size'])
 73 |     else:
 74 |         for i in video_info['links'][1:]:  #save 1st one
 75 |             size += url_info(i)[2]
 76 |     
 77 |     print_info(site_info, title, type_, size)
 78 |     if not info_only:
 79 |         download_urls(video_info['links'], title, _ext, size, output_dir=output_dir, merge=merge)
 80 | 
 81 | #----------------------------------------------------------------------
 82 | def ckplayer_download(url, output_dir = '.', merge = False, info_only = False, is_xml = True, **kwargs):
 83 |     if is_xml:  #URL is XML URL
 84 |         try:
 85 |             title = kwargs['title']
 86 |         except:
 87 |             title = ''
 88 |         try:
 89 |             headers = kwargs['headers']  #headers provided
 90 |             ckinfo = get_content(url, headers = headers)
 91 |         except NameError:
 92 |             ckinfo = get_content(url)
 93 |         
 94 |         ckplayer_download_by_xml(ckinfo, output_dir, merge, 
 95 |                                 info_only, title = title)
 96 | 
 97 | site_info = "CKPlayer General"
 98 | download = ckplayer_download
 99 | download_playlist = playlist_not_supported('ckplayer')
100 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/cntv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import json
 4 | import re
 5 | 
 6 | from ..common import get_content, r1, match1, playlist_not_supported
 7 | from ..extractor import VideoExtractor
 8 | 
 9 | __all__ = ['cntv_download', 'cntv_download_by_id']
10 | 
11 | 
12 | class CNTV(VideoExtractor):
13 |     name = 'CNTV.com'
14 |     stream_types = [
15 |         {'id': '1', 'video_profile': '1280x720_2000kb/s', 'map_to': 'chapters4'},
16 |         {'id': '2', 'video_profile': '1280x720_1200kb/s', 'map_to': 'chapters3'},
17 |         {'id': '3', 'video_profile': '640x360_850kb/s', 'map_to': 'chapters2'},
18 |         {'id': '4', 'video_profile': '480x270_450kb/s', 'map_to': 'chapters'},
19 |         {'id': '5', 'video_profile': '320x180_200kb/s', 'map_to': 'lowChapters'},
20 |     ]
21 | 
22 |     ep = 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid={}'
23 | 
24 |     def __init__(self):
25 |         super().__init__()
26 |         self.api_data = None
27 | 
28 |     def prepare(self, **kwargs):
29 |         self.api_data = json.loads(get_content(self.__class__.ep.format(self.vid)))
30 |         self.title = self.api_data['title']
31 |         for s in self.api_data['video']:
32 |             for st in self.__class__.stream_types:
33 |                 if st['map_to'] == s:
34 |                     urls = self.api_data['video'][s]
35 |                     src = [u['url'] for u in urls]
36 |                     stream_data = dict(src=src, size=0, container='mp4', video_profile=st['video_profile'])
37 |                     self.streams[st['id']] = stream_data
38 | 
39 | 
40 | def cntv_download_by_id(rid, **kwargs):
41 |     CNTV().download_by_vid(rid, **kwargs)
42 | 
43 | 
44 | def cntv_download(url, **kwargs):
45 |     if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url):
46 |         rid = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)')
47 |     elif re.match(r'http(s)?://tv\.cctv\.com/\d+/\d+/\d+/\w+.shtml', url):
48 |         rid = r1(r'var guid = "(\w+)"', get_content(url))
49 |     elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \
50 |          re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \
51 |          re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \
52 |          re.match(r'http(s)?://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \
53 |          re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): 
54 |         page = get_content(url)
55 |         rid = r1(r'videoCenterId","(\w+)"', page)
56 |         if rid is None:
57 |             guid = re.search(r'guid\s*=\s*"([0-9a-z]+)"', page).group(1)
58 |             rid = guid
59 |     elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url):
60 |         rid = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url)
61 |     else:
62 |         raise NotImplementedError(url)
63 | 
64 |     CNTV().download_by_vid(rid, **kwargs)
65 | 
66 | site_info = "CNTV.com"
67 | download = cntv_download
68 | download_playlist = playlist_not_supported('cntv')
69 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/coub.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __all__ = ['coub_download']
  4 | 
  5 | from ..common import *
  6 | from ..processor import ffmpeg
  7 | from ..util.fs import legitimize
  8 | 
  9 | 
 10 | def coub_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 11 |     html = get_content(url)
 12 | 
 13 |     try:
 14 |         json_data = get_coub_data(html)
 15 |         title, video_url, audio_url = get_title_and_urls(json_data)
 16 |         video_file_name, video_file_path = get_file_path(merge, output_dir, title, video_url)
 17 |         audio_file_name, audio_file_path = get_file_path(merge, output_dir, title, audio_url)
 18 |         download_url(audio_url, merge, output_dir, title, info_only)
 19 |         download_url(video_url, merge, output_dir, title, info_only)
 20 |         if not info_only:
 21 |             try:
 22 |                 fix_coub_video_file(video_file_path)
 23 |                 audio_duration = float(ffmpeg.ffprobe_get_media_duration(audio_file_path))
 24 |                 video_duration = float(ffmpeg.ffprobe_get_media_duration(video_file_path))
 25 |                 loop_file_path = get_loop_file_path(title, output_dir)
 26 |                 single_file_path = audio_file_path
 27 |                 if audio_duration > video_duration:
 28 |                     write_loop_file(round(audio_duration / video_duration), loop_file_path, video_file_name)
 29 |                 else:
 30 |                     single_file_path = audio_file_path
 31 |                     write_loop_file(round(video_duration / audio_duration), loop_file_path, audio_file_name)
 32 | 
 33 |                 ffmpeg.ffmpeg_concat_audio_and_video([loop_file_path, single_file_path], title + "_full", "mp4")
 34 |                 cleanup_files([video_file_path, audio_file_path, loop_file_path])
 35 |             except EnvironmentError as err:
 36 |                 print("Error preparing full coub video. {}".format(err))
 37 |     except Exception as err:
 38 |         print("Error while downloading files. {}".format(err))
 39 | 
 40 | 
 41 | def write_loop_file(records_number, loop_file_path, file_name):
 42 |     with open(loop_file_path, 'a') as file:
 43 |         for i in range(records_number):
 44 |             file.write("file '{}'\n".format(file_name))
 45 | 
 46 | 
 47 | def download_url(url, merge, output_dir, title, info_only):
 48 |     mime, ext, size = url_info(url)
 49 |     print_info(site_info, title, mime, size)
 50 |     if not info_only:
 51 |         download_urls([url], title, ext, size, output_dir, merge=merge)
 52 | 
 53 | 
 54 | def fix_coub_video_file(file_path):
 55 |     with open(file_path, 'r+b') as file:
 56 |         file.seek(0)
 57 |         file.write(bytes(2))
 58 | 
 59 | 
 60 | def get_title_and_urls(json_data):
 61 |     title = legitimize(re.sub(r'[\s*]', "_", json_data['title']))
 62 |     video_info = json_data['file_versions']['html5']['video']
 63 |     if 'high' not in video_info:
 64 |         if 'med' not in video_info:
 65 |             video_url = video_info['low']['url']
 66 |         else:
 67 |             video_url = video_info['med']['url']
 68 |     else:
 69 |         video_url = video_info['high']['url']
 70 |     audio_info = json_data['file_versions']['html5']['audio']
 71 |     if 'high' not in audio_info:
 72 |         if 'med' not in audio_info:
 73 |             audio_url = audio_info['low']['url']
 74 |         else:
 75 |             audio_url = audio_info['med']['url']
 76 |     else:
 77 |         audio_url = audio_info['high']['url']
 78 |     return title, video_url, audio_url
 79 | 
 80 | 
 81 | def get_coub_data(html):
 82 |     coub_data = r1(r'<script id=\'coubPageCoubJson\' type=\'text/json\'>([\w\W]+?(?=</script>))</script>', html)
 83 |     json_data = json.loads(coub_data)
 84 |     return json_data
 85 | 
 86 | 
 87 | def get_file_path(merge, output_dir, title, url):
 88 |     mime, ext, size = url_info(url)
 89 |     file_name = get_output_filename([], title, ext, output_dir, merge)
 90 |     file_path = os.path.join(output_dir, file_name)
 91 |     return file_name, file_path
 92 | 
 93 | 
 94 | def get_loop_file_path(title, output_dir):
 95 |     return os.path.join(output_dir, get_output_filename([], title, "txt", None, False))
 96 | 
 97 | 
 98 | def cleanup_files(files):
 99 |     for file in files:
100 |         os.remove(file)
101 | 
102 | 
103 | site_info = "coub.com"
104 | download = coub_download
105 | download_playlist = playlist_not_supported('coub')
106 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/dailymotion.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['dailymotion_download']
 4 | 
 5 | from ..common import *
 6 | import urllib.parse
 7 | 
 8 | def rebuilt_url(url):
 9 |     path = urllib.parse.urlparse(url).path
10 |     aid = path.split('/')[-1].split('_')[0]
11 |     return 'http://www.dailymotion.com/embed/video/{}?autoplay=1'.format(aid)
12 | 
13 | def dailymotion_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
14 |     """Downloads Dailymotion videos by URL.
15 |     """
16 | 
17 |     html = get_content(rebuilt_url(url))
18 |     info = json.loads(match1(html, r'qualities":({.+?}),"'))
19 |     title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \
20 |             match1(html, r'"title"\s*:\s*"([^"]+)"')
21 |     title = unicodize(title)
22 | 
23 |     for quality in ['1080','720','480','380','240','144','auto']:
24 |         try:
25 |             real_url = info[quality][1]["url"]
26 |             if real_url:
27 |                 break
28 |         except KeyError:
29 |             pass
30 | 
31 |     mime, ext, size = url_info(real_url)
32 | 
33 |     print_info(site_info, title, mime, size)
34 |     if not info_only:
35 |         download_urls([real_url], title, ext, size, output_dir=output_dir, merge=merge)
36 | 
37 | site_info = "Dailymotion.com"
38 | download = dailymotion_download
39 | download_playlist = playlist_not_supported('dailymotion')
40 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/douban.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['douban_download']
 4 | 
 5 | import urllib.request, urllib.parse
 6 | from ..common import *
 7 | 
 8 | def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 9 |     html = get_html(url)
10 | 
11 |     if re.match(r'https?://movie', url):
12 |         title = match1(html, 'name="description" content="([^"]+)')
13 |         tid = match1(url, r'trailer/(\d+)')
14 |         real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
15 |         type, ext, size = url_info(real_url)
16 | 
17 |         print_info(site_info, title, type, size)
18 |         if not info_only:
19 |             download_urls([real_url], title, ext, size, output_dir, merge = merge)
20 | 
21 |     elif 'subject' in url:
22 |         titles = re.findall(r'data-title="([^"]*)">', html)
23 |         song_id = re.findall(r'<li class="song-item" id="([^"]*)"', html)
24 |         song_ssid = re.findall(r'data-ssid="([^"]*)"', html)
25 |         get_song_url = 'http://music.douban.com/j/songlist/get_song_url'
26 | 
27 |         for i in range(len(titles)):
28 |             title = titles[i]
29 |             datas = {
30 |                 'sid': song_id[i],
31 |                 'ssid': song_ssid[i]
32 |             }
33 |             post_params = urllib.parse.urlencode(datas).encode('utf-8')
34 |             try:
35 |                 resp = urllib.request.urlopen(get_song_url, post_params)
36 |                 resp_data = json.loads(resp.read().decode('utf-8'))
37 |                 real_url = resp_data['r']
38 |                 type, ext, size = url_info(real_url)
39 |                 print_info(site_info, title, type, size)
40 |             except:
41 |                 pass
42 | 
43 |             if not info_only:
44 |                 try:
45 |                     download_urls([real_url], title, ext, size, output_dir, merge = merge)
46 |                 except:
47 |                     pass
48 | 
49 |     else:
50 |         titles = re.findall(r'"name":"([^"]*)"', html)
51 |         real_urls = [re.sub('\\\\/', '/', i) for i in re.findall(r'"rawUrl":"([^"]*)"', html)]
52 | 
53 |         for i in range(len(titles)):
54 |             title = titles[i]
55 |             real_url = real_urls[i]
56 | 
57 |             type, ext, size = url_info(real_url)
58 | 
59 |             print_info(site_info, title, type, size)
60 |             if not info_only:
61 |                 download_urls([real_url], title, ext, size, output_dir, merge = merge)
62 | 
63 | site_info = "Douban.com"
64 | download = douban_download
65 | download_playlist = playlist_not_supported('douban')
66 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/douyin.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | import json
 4 | 
 5 | from ..common import (
 6 |     url_size,
 7 |     print_info,
 8 |     get_content,
 9 |     fake_headers,
10 |     download_urls,
11 |     playlist_not_supported,
12 |     match1,
13 |     get_location,
14 | )
15 | 
16 | __all__ = ['douyin_download_by_url']
17 | 
18 | 
19 | def get_value(source: dict, path):
20 |     try:
21 |         value = source
22 |         for key in path:
23 |             if type(key) is str:
24 |                 if key in value.keys():
25 |                     value = value[key]
26 |                 else:
27 |                     value = None
28 |                     break
29 |             elif type(key) is int:
30 |                 if len(value) != 0:
31 |                     value = value[key]
32 |                 else:
33 |                     value = None
34 |                     break
35 |     except:
36 |         value = None
37 |     return value
38 | 
39 | 
40 | def douyin_download_by_url(url, **kwargs):
41 |     # if short link, get the real url
42 |     if 'v.douyin.com' in url:
43 |         url = get_location(url)
44 |     aweme_id = match1(url, r'/(\d+)/?')
45 |     # get video info
46 |     video_info_api = 'https://www.douyin.com/web/api/v2/aweme/iteminfo/?item_ids={}'
47 |     url = video_info_api.format(aweme_id)
48 |     page_content = get_content(url, headers=fake_headers)
49 |     video_info = json.loads(page_content)
50 | 
51 |     # get video id and title
52 |     video_id = get_value(video_info, ['item_list', 0, 'video', 'vid'])
53 |     title = get_value(video_info, ['item_list', 0, 'desc'])
54 | 
55 |     # get video play url
56 |     video_url = "https://aweme.snssdk.com/aweme/v1/play/?ratio=720p&line=0&video_id={}".format(video_id)
57 |     video_format = 'mp4'
58 |     size = url_size(video_url, faker=True)
59 |     print_info(
60 |         site_info='douyin.com', title=title,
61 |         type=video_format, size=size
62 |     )
63 |     if not kwargs['info_only']:
64 |         download_urls(
65 |             urls=[video_url], title=title, ext=video_format, total_size=size,
66 |             faker=True,
67 |             **kwargs
68 |         )
69 | 
70 | 
71 | download = douyin_download_by_url
72 | download_playlist = playlist_not_supported('douyin')
73 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/douyutv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['douyutv_download']
 4 | 
 5 | from ..common import *
 6 | from ..util.log import *
 7 | import json
 8 | import hashlib
 9 | import time
10 | import re
11 | 
12 | headers = {
13 |         'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4'
14 |     }
15 | 
16 | def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
17 |     ep = 'http://vmobile.douyu.com/video/getInfo?vid='
18 |     patt = r'show/([0-9A-Za-z]+)'
19 |     title_patt = r'<h1>(.+?)</h1>'
20 | 
21 |     hit = re.search(patt, url)
22 |     if hit is None:
23 |         log.wtf('Unknown url pattern')
24 |     vid = hit.group(1)
25 | 
26 |     page = get_content(url, headers=headers)
27 |     hit = re.search(title_patt, page)
28 |     if hit is None:
29 |         title = vid
30 |     else:
31 |         title = hit.group(1)
32 | 
33 |     meta = json.loads(get_content(ep + vid))
34 |     if meta['error'] != 0:
35 |         log.wtf('Error from API server')
36 |     m3u8_url = meta['data']['video_url']
37 |     print_info('Douyu Video', title, 'm3u8', 0, m3u8_url=m3u8_url)
38 |     if not info_only:
39 |         urls = general_m3u8_extractor(m3u8_url)
40 |         download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs)
41 | 
42 | 
43 | def douyutv_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
44 |     if 'v.douyu.com/show/' in url:
45 |         douyutv_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
46 |         return
47 | 
48 |     url = re.sub(r'.*douyu.com','https://m.douyu.com/room', url)
49 |     html = get_content(url, headers)
50 |     room_id_patt = r'"rid"\s*:\s*(\d+),'
51 |     room_id = match1(html, room_id_patt)
52 |     if room_id == "0":
53 |         room_id = url[url.rfind('/') + 1:]
54 | 
55 |     api_url = "http://www.douyutv.com/api/v1/"
56 |     args = "room/%s?aid=wp&client_sys=wp&time=%d" % (room_id, int(time.time()))
57 |     auth_md5 = (args + "zNzMV1y4EMxOHS6I5WKm").encode("utf-8")
58 |     auth_str = hashlib.md5(auth_md5).hexdigest()
59 |     json_request_url = "%s%s&auth=%s" % (api_url, args, auth_str)
60 | 
61 |     content = get_content(json_request_url, headers)
62 |     json_content = json.loads(content)
63 |     data = json_content['data']
64 |     server_status = json_content.get('error', 0)
65 |     if server_status != 0:
66 |         raise ValueError("Server returned error:%s" % server_status)
67 | 
68 |     title = data.get('room_name')
69 |     show_status = data.get('show_status')
70 |     if show_status != "1":
71 |         raise ValueError("The live stream is not online! (Errno:%s)" % server_status)
72 | 
73 |     real_url = data.get('rtmp_url') + '/' + data.get('rtmp_live')
74 | 
75 |     print_info(site_info, title, 'flv', float('inf'))
76 |     if not info_only:
77 |         download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge)
78 | 
79 | 
80 | site_info = "douyu.com"
81 | download = douyutv_download
82 | download_playlist = playlist_not_supported('douyu')
83 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ehow.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['ehow_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def ehow_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 8 | 	
 9 | 	assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported"
10 | 
11 | 	html = get_html(url)
12 | 	contentid = r1(r'<meta name="contentid" scheme="DMINSTR2" content="([^"]+)" />', html)
13 | 	vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html)
14 | 	assert vid
15 | 
16 | 	xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid)
17 |     
18 | 	from xml.dom.minidom import parseString
19 | 	doc = parseString(xml)
20 | 	tab = doc.getElementsByTagName('related')[0].firstChild
21 | 
22 | 	for video in tab.childNodes:
23 | 		if re.search(contentid, video.attributes['link'].value):
24 | 			url = video.attributes['flv'].value
25 | 			break
26 | 
27 | 	title = video.attributes['title'].value
28 | 	assert title 
29 | 
30 | 	type, ext, size = url_info(url)
31 | 	print_info(site_info, title, type, size)
32 | 	
33 | 	if not info_only:
34 | 		download_urls([url], title, ext, size, output_dir, merge = merge)
35 | 
36 | site_info = "ehow.com"
37 | download = ehow_download
38 | download_playlist = playlist_not_supported('ehow')
39 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/facebook.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['facebook_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     url = re.sub(r'//.*?facebook.com','//facebook.com',url)
 9 |     html = get_html(url)
10 | 
11 |     title = r1(r'<title id="pageTitle">(.+)</title>', html)
12 | 
13 |     if title is None:
14 |       title = url
15 | 
16 |     sd_urls = list(set([
17 |         unicodize(str.replace(i, '\\/', '/'))
18 |         for i in re.findall(r'sd_src_no_ratelimit:"([^"]*)"', html)
19 |     ]))
20 |     hd_urls = list(set([
21 |         unicodize(str.replace(i, '\\/', '/'))
22 |         for i in re.findall(r'hd_src_no_ratelimit:"([^"]*)"', html)
23 |     ]))
24 |     urls = hd_urls if hd_urls else sd_urls
25 | 
26 |     type, ext, size = url_info(urls[0], True)
27 |     size = urls_size(urls)
28 | 
29 |     print_info(site_info, title, type, size)
30 |     if not info_only:
31 |         download_urls(urls, title, ext, size, output_dir, merge=False)
32 | 
33 | site_info = "Facebook.com"
34 | download = facebook_download
35 | download_playlist = playlist_not_supported('facebook')
36 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/fc2video.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['fc2video_download']
 4 | 
 5 | from ..common import *
 6 | from hashlib import md5
 7 | from urllib.parse import urlparse
 8 | 
 9 | #----------------------------------------------------------------------
10 | def makeMimi(upid):
11 |     """From http://cdn37.atwikiimg.com/sitescript/pub/dksitescript/FC2.site.js
12 |     Also com.hps.util.fc2.FC2EncrptUtil.makeMimiLocal
13 |     L110"""
14 |     strSeed = "gGddgPfeaf_gzyr"
15 |     prehash = upid + "_" + strSeed
16 |     return md5(prehash.encode('utf-8')).hexdigest()
17 | 
18 | #----------------------------------------------------------------------
19 | def fc2video_download_by_upid(upid, output_dir = '.', merge = True, info_only = False, **kwargs):
20 |     """"""
21 |     fake_headers = {
22 |         'DNT': '1',
23 |         'Accept-Encoding': 'gzip, deflate, sdch',
24 |         'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2',
25 |         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.58 Safari/537.36',
26 |         'Accept': '*/*',
27 |         'X-Requested-With': 'ShockwaveFlash/19.0.0.245',
28 |         'Connection': 'keep-alive',
29 |     }
30 |     api_base = 'http://video.fc2.com/ginfo.php?upid={upid}&mimi={mimi}'.format(upid = upid, mimi = makeMimi(upid))
31 |     html = get_content(api_base, headers=fake_headers)
32 | 
33 |     video_url = match1(html, r'filepath=(.+)&sec')
34 |     video_url = video_url.replace('&mid', '?mid')
35 | 
36 |     title = match1(html, r'&title=([^&]+)')
37 | 
38 |     type, ext, size = url_info(video_url, headers=fake_headers)
39 | 
40 |     print_info(site_info, title, type, size)
41 |     if not info_only:
42 |         download_urls([video_url], title, ext, size, output_dir, merge=merge, headers = fake_headers)
43 | 
44 | #----------------------------------------------------------------------
45 | def fc2video_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
46 |     """wrapper"""
47 |     #'http://video.fc2.com/en/content/20151021bTVKnbEw'
48 |     #'http://xiaojiadianvideo.asia/content/20151021bTVKnbEw'
49 |     #'http://video.fc2.com/ja/content/20151021bTVKnbEw'
50 |     #'http://video.fc2.com/tw/content/20151021bTVKnbEw'
51 |     hostname = urlparse(url).hostname
52 |     if not ('fc2.com' in hostname or 'xiaojiadianvideo.asia' in hostname):
53 |         return False
54 |     upid = match1(url, r'.+/content/(\w+)')
55 | 
56 |     fc2video_download_by_upid(upid, output_dir, merge, info_only)
57 | 
58 | site_info = "FC2Video"
59 | download = fc2video_download
60 | download_playlist = playlist_not_supported('fc2video')
61 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/freesound.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['freesound_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def freesound_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 8 |     page = get_html(url)
 9 |     
10 |     title = r1(r'<meta property="og:title" content="([^"]*)"', page)
11 |     preview_url = r1(r'<meta property="og:audio" content="([^"]*)"', page)
12 |     
13 |     type, ext, size = url_info(preview_url)
14 |     
15 |     print_info(site_info, title, type, size)
16 |     if not info_only:
17 |         download_urls([preview_url], title, ext, size, output_dir, merge = merge)
18 | 
19 | site_info = "Freesound.org"
20 | download = freesound_download
21 | download_playlist = playlist_not_supported('freesound')
22 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/giphy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['giphy_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def giphy_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     html = get_html(url)
 9 | 
10 |     url = list(set([
11 |         unicodize(str.replace(i, '\\/', '/'))
12 |         for i in re.findall(r'<meta property="og:video:secure_url" content="(.*?)">', html)
13 |     ]))
14 | 
15 |     title = r1(r'<meta property="og:title" content="(.*?)">', html)
16 | 
17 |     if title is None:
18 |       title = url[0]
19 | 
20 |     type, ext, size = url_info(url[0], True)
21 |     size = urls_size(url)
22 | 
23 |     type = "video/mp4"
24 |     ext = "mp4"
25 | 
26 |     print_info(site_info, title, type, size)
27 |     if not info_only:
28 |         download_urls(url, title, ext, size, output_dir, merge=False)
29 | 
30 | site_info = "Giphy.com"
31 | download = giphy_download
32 | download_playlist = playlist_not_supported('giphy')
33 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/heavymusic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['heavymusic_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def heavymusic_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     html = get_html(url)
 9 |     tracks = re.findall(r'href="(online2\.php[^"]+)"', html)
10 |     for track in tracks:
11 |         band = r1(r'band=([^&]*)', track)
12 |         album = r1(r'album=([^&]*)', track)
13 |         title = r1(r'track=([^&]*)', track)
14 |         file_url = 'http://www.heavy-music.ru/online2.php?band=%s&album=%s&track=%s' % (parse.quote(band), parse.quote(album), parse.quote(title))
15 |         _, _, size = url_info(file_url)
16 | 
17 |         print_info(site_info, title, 'mp3', size)
18 |         if not info_only:
19 |             download_urls([file_url], title[:-4], 'mp3', size, output_dir, merge=merge)
20 | 
21 | site_info = "heavy-music.ru"
22 | download = heavymusic_download
23 | download_playlist = heavymusic_download
24 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/huomaotv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['huomaotv_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | 
 8 | def get_mobile_room_url(room_id):
 9 |     return 'http://www.huomao.com/mobile/mob_live/%s' % room_id
10 | 
11 | 
12 | def get_m3u8_url(stream_id):
13 |     return 'http://live-ws.huomaotv.cn/live/%s/playlist.m3u8' % stream_id
14 | 
15 | 
16 | def huomaotv_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
17 |     room_id_pattern = r'huomao.com/(\d+)'
18 |     room_id = match1(url, room_id_pattern)
19 |     html = get_content(get_mobile_room_url(room_id))
20 | 
21 |     stream_id_pattern = r'id="html_stream" value="(\w+)"'
22 |     stream_id = match1(html, stream_id_pattern)
23 | 
24 |     m3u8_url = get_m3u8_url(stream_id)
25 | 
26 |     title = match1(html, r'<title>([^<]{1,9999})</title>')
27 | 
28 |     print_info(site_info, title, 'm3u8', float('inf'))
29 | 
30 |     if not info_only:
31 |         download_url_ffmpeg(m3u8_url, title, 'm3u8', None, output_dir=output_dir, merge=merge)
32 | 
33 | 
34 | site_info = 'huomao.com'
35 | download = huomaotv_download
36 | download_playlist = playlist_not_supported('huomao')
37 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ifeng.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['ifeng_download', 'ifeng_download_by_id']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def ifeng_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
 8 |     assert r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', id), id
 9 |     url = 'http://vxml.ifengimg.com/video_info_new/%s/%s/%s.xml' % (id[-2], id[-2:], id)
10 |     xml = get_html(url, 'utf-8')
11 |     title = r1(r'Name="([^"]+)"', xml)
12 |     title = unescape_html(title)
13 |     url = r1(r'VideoPlayUrl="([^"]+)"', xml)
14 |     from random import randint
15 |     r = randint(10, 19)
16 |     url = url.replace('http://wideo.ifeng.com/', 'http://ips.ifeng.com/wideo.ifeng.com/')
17 |     type, ext, size = url_info(url)
18 | 
19 |     print_info(site_info, title, ext, size)
20 |     if not info_only:
21 |         download_urls([url], title, ext, size, output_dir, merge = merge)
22 | 
23 | def ifeng_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
24 | # old pattern /uuid.shtml
25 | # now it could be #uuid
26 |     id = r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', url)
27 |     if id:
28 |         return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
29 | 
30 |     html = get_content(url)
31 |     uuid_pattern = r'"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"'
32 |     id = r1(r'var vid="([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"', html)
33 |     if id is None:
34 |         video_pattern = r'"vid"\s*:\s*' + uuid_pattern
35 |         id = match1(html, video_pattern)
36 |     assert id, "can't find video info"
37 |     return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
38 | 
39 | site_info = "ifeng.com"
40 | download = ifeng_download
41 | download_playlist = playlist_not_supported('ifeng')
42 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/imgur.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from ..extractor import VideoExtractor
 5 | from .universal import *
 6 | 
 7 | class Imgur(VideoExtractor):
 8 |     name = "Imgur"
 9 | 
10 |     stream_types = [
11 |         {'id': 'original'},
12 |         {'id': 'thumbnail'},
13 |     ]
14 | 
15 |     def prepare(self, **kwargs):
16 |         self.ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/123.0.2420.97'
17 | 
18 |         if re.search(r'imgur\.com/a/', self.url):
19 |             # album
20 |             content = get_content(self.url, headers=fake_headers)
21 |             album = match1(content, r'album\s*:\s*({.*}),') or \
22 |                     match1(content, r'image\s*:\s*({.*}),')
23 |             album = json.loads(album)
24 |             count = album['album_images']['count']
25 |             images = album['album_images']['images']
26 |             ext = images[0]['ext']
27 |             self.streams = {
28 |                 'original': {
29 |                     'src': ['http://i.imgur.com/%s%s' % (i['hash'], ext)
30 |                             for i in images],
31 |                     'size': sum([i['size'] for i in images]),
32 |                     'container': ext[1:]
33 |                 },
34 |                 'thumbnail': {
35 |                     'src': ['http://i.imgur.com/%ss%s' % (i['hash'], '.jpg')
36 |                             for i in images],
37 |                     'container': 'jpg'
38 |                 }
39 |             }
40 |             self.title = album['title']
41 | 
42 |         elif re.search(r'i\.imgur\.com/', self.url):
43 |             # direct image
44 |             _, container, size = url_info(self.url, faker=True)
45 |             self.streams = {
46 |                 'original': {
47 |                     'src': [self.url],
48 |                     'size': size,
49 |                     'container': container
50 |                 }
51 |             }
52 |             self.title = r1(r'i\.imgur\.com/([^./]*)', self.url)
53 | 
54 |         else:
55 |             # gallery image
56 |             content = get_content(self.url, headers=fake_headers)
57 |             url = match1(content, r'meta property="og:video"[^>]+(https?://i.imgur.com/[^"?]+)') or \
58 |                 match1(content, r'meta property="og:image"[^>]+(https?://i.imgur.com/[^"?]+)')
59 |             _, container, size = url_info(url, headers={'User-Agent': fake_headers['User-Agent']})
60 |             self.streams = {
61 |                 'original': {
62 |                     'src': [url],
63 |                     'size': size,
64 |                     'container': container
65 |                 }
66 |             }
67 |             self.title = r1(r'i\.imgur\.com/([^./]*)', url)
68 | 
69 |     def extract(self, **kwargs):
70 |         if 'stream_id' in kwargs and kwargs['stream_id']:
71 |             i = kwargs['stream_id']
72 |             if 'size' not in self.streams[i]:
73 |                 self.streams[i]['size'] = urls_size(self.streams[i]['src'])
74 | 
75 | site = Imgur()
76 | download = site.download_by_url
77 | download_playlist = site.download_by_url
78 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/infoq.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from ..extractor import VideoExtractor
 5 | 
 6 | import ssl
 7 | 
 8 | class Infoq(VideoExtractor):
 9 |     name = "InfoQ"
10 | 
11 |     stream_types = [
12 |         {'id': 'video'},
13 |         {'id': 'audio'},
14 |         {'id': 'slides'}
15 |     ]
16 | 
17 |     def prepare(self, **kwargs):
18 |         content = get_content(self.url)
19 |         self.title = match1(content, r'<title>([^<]+)</title>')
20 |         s = match1(content, r'P\.s\s*=\s*\'([^\']+)\'')
21 |         scp = match1(content, r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'')
22 |         scs = match1(content, r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'')
23 |         sck = match1(content, r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'')
24 | 
25 |         mp3 = match1(content, r'name="filename"\s*value="([^"]+\.mp3)"')
26 |         if mp3: mp3 = 'http://res.infoq.com/downloads/mp3downloads/%s' % mp3
27 | 
28 |         pdf = match1(content, r'name="filename"\s*value="([^"]+\.pdf)"')
29 |         if pdf: pdf = 'http://res.infoq.com/downloads/pdfdownloads/%s' % pdf
30 | 
31 |         # cookie handler
32 |         ssl_context = request.HTTPSHandler(
33 |             context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
34 |         cookie_handler = request.HTTPCookieProcessor()
35 |         opener = request.build_opener(ssl_context, cookie_handler)
36 |         opener.addheaders = [
37 |             ('Referer', self.url),
38 |             ('Cookie',
39 |              'CloudFront-Policy=%s;CloudFront-Signature=%s;CloudFront-Key-Pair-Id=%s' % (scp, scs, sck))
40 |         ]
41 |         request.install_opener(opener)
42 | 
43 |         if s: self.streams['video'] = {'url': s }
44 |         if mp3: self.streams['audio'] = { 'url': mp3 }
45 |         if pdf: self.streams['slides'] = { 'url': pdf }
46 | 
47 |     def extract(self, **kwargs):
48 |         for i in self.streams:
49 |             s = self.streams[i]
50 |             _, s['container'], s['size'] = url_info(s['url'])
51 |             s['src'] = [s['url']]
52 | 
53 | site = Infoq()
54 | download = site.download_by_url
55 | download_playlist = site.download_by_url
56 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/instagram.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['instagram_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     headers = {
 9 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.2592.87',
10 |         'sec-fetch-mode': 'navigate'  # important
11 |     }
12 | 
13 |     url = r1(r'([^?]*)', url)
14 |     cont = get_content(url, headers=headers)
15 | 
16 |     vid = r1(r'instagram.com/\w+/([^/]+)', url)
17 |     description = r1(r'<meta property="og:title" content="([^"]*)"', cont) or \
18 |         r1(r'<title>([^<]*)</title>', cont) # with logged-in cookies
19 |     title = "{} [{}]".format(description.replace("\n", " "), vid)
20 | 
21 |     appId = r1(r'"appId":"(\d+)"', cont)
22 |     media_id = r1(r'"media_id":"(\d+)"', cont)
23 |     logging.debug('appId: %s' % appId)
24 |     logging.debug('media_id: %s' % media_id)
25 | 
26 |     api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id
27 |     try:
28 |         api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}})
29 |         post = json.loads(api_cont)
30 |     except:
31 |         log.wtf('[Error] Please specify a cookie file.')
32 | 
33 |     for item in post['items']:
34 |         code = item['code']
35 |         carousel_media = item.get('carousel_media') or [item]
36 |         for i, media in enumerate(carousel_media):
37 |             title = '%s [%s]' % (code, i)
38 |             image_url = media['image_versions2']['candidates'][0]['url']
39 |             ext = image_url.split('?')[0].split('.')[-1]
40 |             size = int(get_head(image_url)['Content-Length'])
41 | 
42 |             print_info(site_info, title, ext, size)
43 |             if not info_only:
44 |                 download_urls(urls=[image_url],
45 |                               title=title,
46 |                               ext=ext,
47 |                               total_size=size,
48 |                               output_dir=output_dir)
49 | 
50 |             # download videos (if any)
51 |             if 'video_versions' in media:
52 |                 video_url = media['video_versions'][0]['url']
53 |                 ext = video_url.split('?')[0].split('.')[-1]
54 |                 size = int(get_head(video_url)['Content-Length'])
55 | 
56 |                 print_info(site_info, title, ext, size)
57 |                 if not info_only:
58 |                     download_urls(urls=[video_url],
59 |                                   title=title,
60 |                                   ext=ext,
61 |                                   total_size=size,
62 |                                   output_dir=output_dir)
63 | 
64 | site_info = "Instagram.com"
65 | download = instagram_download
66 | download_playlist = playlist_not_supported('instagram')
67 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/interest.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from json import loads
 5 | 
 6 | def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 7 |     #http://ch.interest.me/zhtv/VOD/View/114789
 8 |     #http://program.interest.me/zhtv/sonja/8/Vod/View/15794
 9 |     html = get_content(url)
10 |     #get title
11 |     title = match1(html, r'<meta property="og:title" content="([^"]*)"')
12 |     title = title.split('&')[0].strip()
13 |     info_url = match1(html, r'data: "(.+)",')
14 |     play_info = loads(get_content(info_url))
15 |     try:
16 |         serverurl = play_info['data']['cdn']['serverurl']
17 |     except KeyError:
18 |         raise ValueError('Cannot_Get_Play_URL')
19 |     except:
20 |         raise ValueError('Cannot_Get_Play_URL')
21 |     # I cannot find any example of "fileurl", so i just put it like this for now
22 |     assert serverurl
23 | 
24 |     type, ext, size = 'mp4', 'mp4', 0
25 | 
26 |     print_info(site_info, title, type, size)
27 |     if not info_only:
28 |         download_rtmp_url(url=serverurl, title=title, ext=ext, output_dir=output_dir)
29 | 
30 | site_info = "interest.me"
31 | download = interest_download
32 | download_playlist = playlist_not_supported('interest')
33 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/iqilu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['iqilu_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | def iqilu_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
 9 |     ''''''
10 |     if re.match(r'http://v.iqilu.com/\w+', url):
11 |         patt = r'url\s*:\s*\[([^\]]+)\]'
12 |         
13 |         #URL in webpage
14 |         html = get_content(url)
15 |         player_data = '[' + match1(html, patt) + ']'
16 |         urls = json.loads(player_data)
17 |         url = urls[0]['stream_url']
18 |         
19 |         #grab title
20 |         title = match1(html, r'<meta name="description" content="(.*?)\"\W')
21 | 
22 |         type_, ext, size = url_info(url)
23 |         print_info(site_info, title, type_, size)
24 |         if not info_only:
25 |             download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
26 | 
27 | 
28 | site_info = "iQilu"
29 | download = iqilu_download
30 | download_playlist = playlist_not_supported('iqilu')
31 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/iwara.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | __all__ = ['iwara_download']
 3 | from ..common import *
 4 | headers = {
 5 |     'DNT': '1',
 6 |     'Accept-Encoding': 'gzip, deflate, sdch, br',
 7 |     'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2',
 8 |     'Upgrade-Insecure-Requests': '1',
 9 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36',
10 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
11 |     'Cache-Control': 'max-age=0',
12 |     'Connection': 'keep-alive',
13 |     'Save-Data': 'on',
14 |     'Cookie':'has_js=1;show_adult=1',
15 | }
16 | stream_types = [
17 |         {'id': 'Source',      'container': 'mp4', 'video_profile': '原始'},
18 |         {'id': '540p',    'container': 'mp4', 'video_profile': '540p'},
19 |         {'id': '360p',   'container': 'mp4', 'video_profile': '360P'},
20 |     ]
21 | def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
22 |     global headers
23 |     video_hash = match1(url, r'https?://\w+.iwara.tv/videos/(\w+)')
24 |     video_url = match1(url, r'(https?://\w+.iwara.tv)/videos/\w+')
25 |     html = get_content(url, headers=headers)
26 |     title = r1(r'<title>(.*)</title>', html)
27 |     api_url = video_url + '/api/video/' + video_hash
28 |     content = get_content(api_url, headers=headers)
29 |     data = json.loads(content)
30 |     if len(data)<1 :
31 |         print('Maybe is Private Video?'+'['+title+']')
32 |         return True;
33 |     down_urls = 'https:' + data[0]['uri']
34 |     type, ext, size = url_info(down_urls, headers=headers)
35 |     print_info(site_info, title+data[0]['resolution'], type, size)
36 | 
37 |     if not info_only:
38 |         download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers)
39 | 
40 | def download_playlist_by_url( url, **kwargs):
41 |     video_page = get_html(url)
42 |     url_first=match1(url, r"(http[s]?://[^/]+)")
43 |     videos = set(re.findall(r'<a href="(/videos/[^"]+)"', video_page))
44 |     if(len(videos)>0):
45 |         for video in videos:
46 |             iwara_download(url_first+video, **kwargs)
47 |     else:
48 |         maybe_print('this page not found any videos')
49 | site_info = "Iwara"
50 | download = iwara_download
51 | download_playlist = download_playlist_by_url
52 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/joy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['joy_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def video_info(channel_id, program_id, volumn_id):
 8 |     url = 'http://msx.app.joy.cn/service.php'
 9 |     if program_id:
10 |         url += '?action=vodmsxv6'
11 |         url += '&channelid=%s' % channel_id
12 |         url += '&programid=%s' % program_id
13 |         url += '&volumnid=%s' % volumn_id
14 |     else:
15 |         url += '?action=msxv6'
16 |         url += '&videoid=%s' % volumn_id
17 |     
18 |     xml = get_html(url)
19 |     
20 |     name = r1(r'<Title>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</Title>', xml)
21 |     urls = re.findall(r'<Url[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</Url>', xml)
22 |     hostpath = r1(r'<HostPath[^>]*>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</HostPath>', xml)
23 |     
24 |     return name, urls, hostpath
25 | 
26 | def joy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
27 |     channel_id = r1(r'[^_]channelId\s*:\s*"([^\"]+)"', get_html(url))
28 |     program_id = r1(r'[^_]programId\s*:\s*"([^\"]+)"', get_html(url))
29 |     volumn_id = r1(r'[^_]videoId\s*:\s*"([^\"]+)"', get_html(url))
30 |     
31 |     title, urls, hostpath = video_info(channel_id, program_id, volumn_id)
32 |     urls = [hostpath + url for url in urls]
33 |     
34 |     size = 0
35 |     for url in urls:
36 |         _, ext, temp = url_info(url)
37 |         size += temp
38 |     
39 |     print_info(site_info, title, ext, size)
40 |     if not info_only:
41 |         download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge)
42 | 
43 | site_info = "Joy.cn"
44 | download = joy_download
45 | download_playlist = playlist_not_supported('joy')
46 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/kakao.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from .universal import *
 5 | 
 6 | __all__ = ['kakao_download']
 7 | 
 8 | 
 9 | def kakao_download(url, output_dir='.', info_only=False,  **kwargs):
10 |     json_request_url = 'https://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?vid={}'
11 | 
12 |     # in this implementation playlist not supported so use url_without_playlist
13 |     # if want to support playlist need to change that
14 |     if re.search('playlistId', url):
15 |         url = re.search(r"(.+)\?.+?", url).group(1)
16 | 
17 |     page = get_content(url)
18 |     try:
19 |         vid = re.search(r"<meta name=\"vid\" content=\"(.+)\">", page).group(1)
20 |         title = re.search(r"<meta name=\"title\" content=\"(.+)\">", page).group(1)
21 | 
22 |         meta_str = get_content(json_request_url.format(vid))
23 |         meta_json = json.loads(meta_str)
24 | 
25 |         standard_preset = meta_json['output_list']['standard_preset']
26 |         output_videos = meta_json['output_list']['output_list']
27 |         size = ''
28 |         if meta_json['svcname'] == 'smr_pip':
29 |             for v in output_videos:
30 |                 if v['preset'] == 'mp4_PIP_SMR_480P':
31 |                     size = int(v['filesize'])
32 |                     break
33 |         else:
34 |             for v in output_videos:
35 |                 if v['preset'] == standard_preset:
36 |                     size = int(v['filesize'])
37 |                     break
38 | 
39 |         video_url = meta_json['location']['url']
40 | 
41 |         print_info(site_info, title, 'mp4', size)
42 |         if not info_only:
43 |             download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
44 |     except:
45 |         universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs)
46 | 
47 | 
48 | site_info = "tv.kakao.com"
49 | download = kakao_download
50 | download_playlist = playlist_not_supported('kakao')
51 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/khan.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['khan_download']
 4 | 
 5 | from ..common import *
 6 | from .youtube import YouTube
 7 | 
 8 | def khan_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 9 |     html = get_content(url)
10 |     youtube_url = re.search('<meta property="og:video" content="([^"]+)', html).group(1)
11 |     YouTube().download_by_url(youtube_url, output_dir=output_dir, merge=merge, info_only=info_only)
12 | 
13 | site_info = "khanacademy.org"
14 | download = khan_download
15 | download_playlist = playlist_not_supported('khan')
16 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ku6.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['ku6_download', 'ku6_download_by_id']
 4 | 
 5 | from ..common import *
 6 | 
 7 | import json
 8 | import re
 9 | 
10 | def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
11 |     data = json.loads(get_html('http://v.ku6.com/fetchVideo4Player/%s...html' % id))['data']
12 |     t = data['t']
13 |     f = data['f']
14 |     title = title or t
15 |     assert title
16 |     urls = f.split(',')
17 |     ext = match1(urls[0], r'.*\.(\w+)\??[^\.]*')
18 |     assert ext in ('flv', 'mp4', 'f4v'), ext
19 |     ext = {'f4v': 'flv'}.get(ext, ext)
20 |     size = 0
21 |     for url in urls:
22 |         _, _, temp = url_info(url)
23 |         size += temp
24 |     
25 |     print_info(site_info, title, ext, size)
26 |     if not info_only:
27 |         download_urls(urls, title, ext, size, output_dir, merge = merge)
28 | 
29 | def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
30 |     id = None
31 | 
32 |     if match1(url, r'http://baidu.ku6.com/watch/(.*)\.html') is not None:
33 |         id = baidu_ku6(url)
34 |     else:
35 |         patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html',
36 |                 r'http://v.ku6.com/show/(.*)\.\.\.html',
37 |                 r'http://my.ku6.com/watch\?.*v=(.*)\.\..*']
38 |         id = r1_of(patterns, url)
39 | 
40 |     if id is None:
41 |         # http://www.ku6.com/2017/detail-zt.html?vid=xvqTmvZrH8MNvErpvRxFn3
42 |         page = get_content(url)
43 |         meta = re.search(r'detailDataMap=(\{.+?\});', page)
44 |         if meta is not None:
45 |             meta = meta.group(1)
46 |         else:
47 |             raise Exception('Unsupported url')
48 |         vid = re.search(r'vid=([^&]+)', url)
49 |         if vid is not None:
50 |             vid = vid.group(1)
51 |         else:
52 |             raise Exception('Unsupported url')
53 |         this_meta = re.search('"?'+vid+r'"?:\{(.+?)\}', meta)
54 |         if this_meta is not None:
55 |             this_meta = this_meta.group(1)
56 |             title = re.search('title:"(.+?)"', this_meta).group(1)
57 |             video_url = re.search('playUrl:"(.+?)"', this_meta).group(1)
58 |         video_size = url_size(video_url)
59 |         print_info(site_info, title, 'mp4', video_size)
60 |         if not info_only:
61 |             download_urls([video_url], title, 'mp4', video_size, output_dir, merge=merge, **kwargs)
62 |         return
63 | 
64 |     ku6_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
65 | 
66 | def baidu_ku6(url):
67 |     id = None
68 | 
69 |     h1 = get_html(url)
70 |     isrc = match1(h1, r'<iframe id="innerFrame" src="([^"]*)"')
71 | 
72 |     if isrc is not None:
73 |         h2 = get_html(isrc)
74 |         id = match1(h2, r'http://v.ku6.com/show/(.*)\.\.\.html')
75 | #fix #1746
76 | #some ku6 urls really ends with three dots? A bug?
77 |         if id is None:
78 |             id = match1(h2, r'http://v.ku6.com/show/(.*)\.html')
79 | 
80 |     return id
81 | 
82 | site_info = "Ku6.com"
83 | download = ku6_download
84 | download_playlist = playlist_not_supported('ku6')
85 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/kuaishou.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import urllib.request
 4 | import urllib.parse
 5 | import re
 6 | 
 7 | from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size
 8 | 
 9 | __all__ = ['kuaishou_download_by_url']
10 | 
11 | 
12 | def kuaishou_download_by_url(url, info_only=False, **kwargs):
13 |     page = get_content(url)
14 |     # size = video_list[-1]['size']
15 |     # result wrong size
16 |     try:
17 |         search_result=re.search(r"\"playUrls\":\[(\{\"quality\"\:\"\w+\",\"url\":\".*?\"\})+\]", page)
18 |         all_video_info_str = search_result.group(1)
19 |         all_video_infos=re.findall(r"\{\"quality\"\:\"(\w+)\",\"url\":\"(.*?)\"\}", all_video_info_str)
20 |         # get the one of the best quality
21 |         video_url = all_video_infos[0][1].encode("utf-8").decode('unicode-escape')
22 |         title = re.search(r"<meta charset=UTF-8><title>(.*?)</title>", page).group(1)
23 |         size = url_size(video_url)
24 |         video_format = "flv"#video_url.split('.')[-1]
25 |         print_info(site_info, title, video_format, size)
26 |         if not info_only:
27 |             download_urls([video_url], title, video_format, size, **kwargs)
28 |     except:# extract image
29 |         og_image_url = re.search(r"<meta\s+property=\"og:image\"\s+content=\"(.+?)\"/>", page).group(1)
30 |         image_url = og_image_url
31 |         title = url.split('/')[-1]
32 |         size = url_size(image_url)
33 |         image_format = image_url.split('.')[-1]
34 |         print_info(site_info, title, image_format, size)
35 |         if not info_only:
36 |             download_urls([image_url], title, image_format, size, **kwargs)
37 | 
38 | site_info = "kuaishou.com"
39 | download = kuaishou_download_by_url
40 | download_playlist = playlist_not_supported('kuaishou')
41 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/kugou.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['kugou_download']
 4 | 
 5 | from ..common import *
 6 | from json import loads
 7 | from base64 import b64decode
 8 | import re
 9 | 
10 | 
11 | def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
12 |     if url.lower().find("5sing") != -1:
13 |         # for 5sing.kugou.com
14 |         html = get_html(url)
15 |         ticket = r1(r'"ticket":\s*"(.*)"', html)
16 |         j = loads(str(b64decode(ticket), encoding="utf-8"))
17 |         url = j['file']
18 |         title = j['songName']
19 |         songtype, ext, size = url_info(url)
20 |         print_info(site_info, title, songtype, size)
21 |         if not info_only:
22 |             download_urls([url], title, ext, size, output_dir, merge=merge)
23 |     elif url.lower().find("hash") != -1:
24 |         return kugou_download_by_hash(url, output_dir, merge, info_only)
25 |     else:
26 |         # for the www.kugou.com/
27 |         return kugou_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
28 |         # raise NotImplementedError(url)       
29 | 
30 | 
31 | def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False):
32 |     # sample
33 |     # url_sample:http://www.kugou.com/song/#hash=93F7D2FC6E95424739448218B591AEAF&album_id=9019462
34 |     hash_val = match1(url, r'hash=(\w+)')
35 |     album_id = match1(url, r'album_id=(\d+)')
36 |     if not album_id:
37 |         album_id = 123
38 |     html = get_html("http://www.kugou.com/yy/index.php?r=play/getdata&hash={}&album_id={}&mid=123".format(hash_val, album_id))
39 |     j = loads(html)
40 |     url = j['data']['play_url']
41 |     title = j['data']['audio_name']
42 |     # some songs cann't play because of copyright protection
43 |     if (url == ''):
44 |         return
45 |     songtype, ext, size = url_info(url)
46 |     print_info(site_info, title, songtype, size)
47 |     if not info_only:
48 |         download_urls([url], title, ext, size, output_dir, merge=merge)
49 | 
50 | 
51 | def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs):
52 |     urls = []
53 | 
54 |     # download music leaderboard
55 |     # sample: http://www.kugou.com/yy/html/rank.html
56 |     if url.lower().find('rank') != -1:
57 |         html = get_html(url)
58 |         pattern = re.compile('<a href="(http://.*?)" data-active=')
59 |         res = pattern.findall(html)
60 |         for song in res:
61 |             res = get_html(song)
62 |             pattern_url = re.compile(r'"hash":"(\w+)".*"album_id":(\d)+')
63 |             hash_val, album_id = res = pattern_url.findall(res)[0]
64 |             if not album_id:
65 |                 album_id = 123
66 |             urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (hash_val, album_id))
67 | 
68 |     # download album
69 |     # album sample:   http://www.kugou.com/yy/album/single/1645030.html
70 |     elif url.lower().find('album') != -1:
71 |         html = get_html(url)
72 |         pattern = re.compile(r'var data=(\[.*?\]);')
73 |         res = pattern.findall(html)[0]
74 |         for v in json.loads(res):
75 |             urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v['hash'], v['album_id']))
76 | 
77 |     # download the playlist        
78 |     # playlist sample:http://www.kugou.com/yy/special/single/487279.html
79 |     else:
80 |         html = get_html(url)
81 |         pattern = re.compile(r'data="(\w+)\|(\d+)"')
82 |         for v in pattern.findall(html):
83 |             urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
84 |             print('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
85 | 
86 |     # download the list by hash
87 |     for url in urls:
88 |         kugou_download_by_hash(url, output_dir, merge, info_only)
89 | 
90 | 
91 | site_info = "kugou.com"
92 | download = kugou_download
93 | # download_playlist = playlist_not_supported("kugou")
94 | download_playlist = kugou_download_playlist
95 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/kuwo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['kuwo_download']
 4 | 
 5 | from ..common import *
 6 | import re
 7 | 
 8 | def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False):
 9 |     html=get_content("http://player.kuwo.cn/webmusic/st/getNewMuiseByRid?rid=MUSIC_%s"%rid)
10 |     title=match1(html,r"<name>(.*)</name>")
11 |     #to get title
12 |     #format =aac|mp3 ->to get aac format=mp3 ->to get mp3
13 |     url=get_content("http://antiserver.kuwo.cn/anti.s?format=mp3&rid=MUSIC_%s&type=convert_url&response=url"%rid)
14 |     songtype, ext, size = url_info(url)
15 |     print_info(site_info, title, songtype, size)
16 |     if not info_only:
17 |         download_urls([url], title, ext, size, output_dir)
18 | 
19 | def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
20 |     html=get_content(url)
21 |     matched=set(re.compile(r"yinyue/(\d+)").findall(html))#reduce duplicated
22 |     for rid in matched:
23 |         kuwo_download_by_rid(rid,output_dir,merge,info_only)
24 | 
25 | 
26 | 
27 | def kuwo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
28 |     if "www.kuwo.cn/yinyue" in url:
29 |         rid=match1(url, r'yinyue/(\d+)')
30 |         kuwo_download_by_rid(rid,output_dir, merge, info_only)
31 |     else:
32 |         kuwo_playlist_download(url,output_dir,merge,info_only)
33 | 
34 | site_info = "kuwo.cn"
35 | download = kuwo_download
36 | # download_playlist = playlist_not_supported("kugou")
37 | # download_playlist=playlist_not_supported("kuwo")
38 | download_playlist=kuwo_playlist_download
39 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/lizhi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['lizhi_download']
 4 | import json
 5 | import datetime
 6 | from ..common import *
 7 | 
 8 | #
 9 | # Worked well but not perfect.
10 | # TODO: add option --format={sd|hd}
11 | #
12 | def get_url(ep):
13 |     readable = datetime.datetime.fromtimestamp(int(ep['create_time']) / 1000).strftime('%Y/%m/%d')
14 |     return 'http://cdn5.lizhi.fm/audio/{}/{}_hd.mp3'.format(readable, ep['id'])
15 | 
16 | # radio_id: e.g. 549759 from http://www.lizhi.fm/549759/
17 | #
18 | # Returns a list of tuples (audio_id, title, url) for each episode
19 | # (audio) in the radio playlist. url is the direct link to the audio
20 | # file.
21 | def lizhi_extract_playlist_info(radio_id):
22 |     # /api/radio_audios API parameters:
23 |     #
24 |     # - s: starting episode
25 |     # - l: count (per page)
26 |     # - band: radio_id
27 |     #
28 |     # We use l=65535 for poor man's pagination (that is, no pagination
29 |     # at all -- hope all fits on a single page).
30 |     #
31 |     # TODO: Use /api/radio?band={radio_id} to get number of episodes
32 |     # (au_cnt), then handle pagination properly.
33 |     api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id
34 |     api_response = json.loads(get_content(api_url))
35 |     return [(ep['id'], ep['name'], get_url(ep)) for ep in api_response]
36 | 
37 | def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False):
38 |     filetype, ext, size = url_info(url)
39 |     print_info(site_info, title, filetype, size)
40 |     if not info_only:
41 |         download_urls([url], title, ext, size, output_dir=output_dir)
42 | 
43 | def lizhi_download_playlist(url, output_dir='.', info_only=False, **kwargs):
44 |     # Sample URL: http://www.lizhi.fm/549759/
45 |     radio_id = match1(url,r'/(\d+)')
46 |     if not radio_id:
47 |         raise NotImplementedError('%s not supported' % url)
48 |     for audio_id, title, url in lizhi_extract_playlist_info(radio_id):
49 |         lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
50 | 
51 | def lizhi_download(url, output_dir='.', info_only=False, **kwargs):
52 |     # Sample URL: http://www.lizhi.fm/549759/18864883431656710/
53 |     m = re.search(r'/(?P<radio_id>\d+)/(?P<audio_id>\d+)', url)
54 |     if not m:
55 |         raise NotImplementedError('%s not supported' % url)
56 |     radio_id = m.group('radio_id')
57 |     audio_id = m.group('audio_id')
58 |     # Look for the audio_id among the full list of episodes
59 |     for aid, title, url in lizhi_extract_playlist_info(radio_id):
60 |         if aid == audio_id:
61 |             lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
62 |             break
63 |     else:
64 |         raise NotImplementedError('Audio #%s not found in playlist #%s' % (audio_id, radio_id))
65 | 
66 | site_info = "lizhi.fm"
67 | download = lizhi_download
68 | download_playlist = lizhi_download_playlist
69 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/longzhu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['longzhu_download']
 4 | 
 5 | import json
 6 | from ..common import (
 7 |     get_content,
 8 |     general_m3u8_extractor,
 9 |     match1,
10 |     print_info,
11 |     download_urls,
12 |     playlist_not_supported,
13 | )
14 | from ..common import player
15 | 
16 | def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwargs):
17 |     web_domain = url.split('/')[2]
18 |     if (web_domain == 'star.longzhu.com') or (web_domain == 'y.longzhu.com'):
19 |         domain = url.split('/')[3].split('?')[0]
20 |         m_url = 'http://m.longzhu.com/{0}'.format(domain)
21 |         m_html = get_content(m_url)
22 |         room_id_patt = r'var\s*roomId\s*=\s*(\d+);'
23 |         room_id = match1(m_html,room_id_patt)
24 | 
25 |         json_url = 'http://liveapi.plu.cn/liveapp/roomstatus?roomId={0}'.format(room_id)
26 |         content = get_content(json_url)
27 |         data = json.loads(content)
28 |         streamUri = data['streamUri']
29 |         if len(streamUri) <= 4:
30 |             raise ValueError('The live stream is not online!')
31 |         title = data['title']
32 |         streamer = data['userName']
33 |         title = str.format(streamer,': ',title)
34 | 
35 |         steam_api_url = 'http://livestream.plu.cn/live/getlivePlayurl?roomId={0}'.format(room_id)
36 |         content = get_content(steam_api_url)
37 |         data = json.loads(content)
38 |         isonline = data.get('isTransfer')
39 |         if isonline == '0':
40 |             raise ValueError('The live stream is not online!')
41 | 
42 |         real_url = data['playLines'][0]['urls'][0]['securityUrl']
43 | 
44 |         print_info(site_info, title, 'flv', float('inf'))
45 | 
46 |         if not info_only:
47 |             download_urls([real_url], title, 'flv', None, output_dir, merge=merge)
48 | 
49 |     elif web_domain == 'replay.longzhu.com':
50 |         videoid = match1(url, r'(\d+)$')
51 |         json_url = 'http://liveapi.longzhu.com/livereplay/getreplayfordisplay?videoId={0}'.format(videoid)
52 |         content = get_content(json_url)
53 |         data = json.loads(content)
54 | 
55 |         username = data['userName']
56 |         title = data['title']
57 |         title = str.format(username,':',title)
58 |         real_url = data['videoUrl']
59 | 
60 |         if player:
61 |             print_info('Longzhu Video', title, 'm3u8', 0)
62 |             download_urls([real_url], title, 'm3u8', 0, output_dir, merge=merge)
63 |         else:
64 |             urls = general_m3u8_extractor(real_url)
65 |             print_info('Longzhu Video', title, 'm3u8', 0)
66 |             if not info_only:
67 |                 download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs)
68 | 
69 |     else:
70 |         raise ValueError('Wrong url or unsupported link ... {0}'.format(url))
71 | 
72 | site_info = 'longzhu.com'
73 | download = longzhu_download
74 | download_playlist = playlist_not_supported('longzhu')
75 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/lrts.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['lrts_download']
 4 | 
 5 | import logging
 6 | from ..common import *
 7 | from ..util import log, term
 8 | 
 9 | def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
10 |     html = get_html(url)
11 |     args = kwargs.get('args')
12 |     if not args: args = {}
13 |     matched = re.search(r"/book/(\d+)", url)
14 |     if not matched:
15 |         raise AssertionError("not found book number: %s" % url)
16 |     book_no = matched.group(1)
17 |     book_title = book_no
18 |     matched = re.search(r"<title>([^-]*)[-](.*)[,](.*)</title>", html)
19 |     if matched:
20 |         book_title = matched.group(1)
21 | 
22 |     matched = re.search(r"var totalCount='(\d+)'", html)
23 |     if not matched:
24 |         raise AssertionError("not found total count in html")
25 |     total_count = int(matched.group(1))
26 |     log.i('%s total: %s' % (book_title, total_count))
27 |     first_page = 0
28 |     if ('first' in args and args.first!= None):
29 |         first_page = int(args.first)
30 | 
31 |     page_size = 10
32 |     if ('page_size' in args and args.page_size != None):
33 |         page_size = int(args.page_size)
34 |     last_page = (total_count // page_size) + 1
35 |     if ('last' in args and args.last != None):
36 |         last_page = int(args.last)
37 | 
38 |     log.i('page size is %s, page from %s to %s' % (page_size, first_page, last_page))
39 |     headers = {
40 |       'Referer': url
41 |     }
42 |     items = []
43 |     for page in range(first_page, last_page):
44 |         page_url = 'http://www.lrts.me/ajax/book/%s/%s/%s' % (book_no, page, page_size)
45 |         response_content = json.loads(post_content(page_url, headers))
46 |         if response_content['status'] != 'success':
47 |             raise AssertionError("got the page failed: %s" % (page_url))
48 |         data = response_content['data']['data']
49 |         if data:
50 |             for i in data:
51 |                 i['resName'] = parse.unquote(i['resName'])
52 |             items.extend(data)
53 |         else:
54 |             break
55 |     headers = {
56 |       'Referer': 'http://www.lrts.me/playlist'
57 |     }
58 | 
59 |     for item in items:
60 |         i_url = 'http://www.lrts.me/ajax/path/4/%s/%s' % (item['fatherResId'], item['resId'])
61 |         response_content = json.loads(post_content(i_url, headers))
62 |         if response_content['status'] == 'success' and response_content['data']:
63 |             item['ok'] = True
64 |             item['url'] = response_content['data']
65 |             logging.debug('ok')
66 | 
67 |     items = list(filter(lambda i: 'ok' in i and i['ok'], items))
68 |     log.i('Downloading %s: %s count ...' % (book_title, len(items)))
69 | 
70 |     for item in items:
71 |         title = item['resName']
72 |         file_url = item['url']
73 |         # if not file_url: continue
74 |         _, _, size = url_info(file_url)
75 |         print_info(site_info, title, 'mp3', size)
76 |         if not info_only:
77 |             download_urls([file_url], title, 'mp3', size, output_dir, merge=merge)
78 | 
79 | site_info = "lrts.me"
80 | download = lrts_download
81 | download_playlist = lrts_download
82 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/magisto.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['magisto_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 9 |     html = get_html(url)
10 |     
11 |     video_hash = r1(r'video\/([a-zA-Z0-9]+)', url)
12 |     api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash)
13 |     content = get_html(api_url)
14 |     data = json.loads(content)
15 |     title1 = data['title']
16 |     title2 = data['creator']
17 |     title = "%s - %s" % (title1, title2)
18 |     url = data['video_direct_url']
19 |     type, ext, size = url_info(url)
20 | 
21 |     print_info(site_info, title, type, size)
22 |     if not info_only:
23 |         download_urls([url], title, ext, size, output_dir, merge=merge)
24 | 
25 | site_info = "Magisto.com"
26 | download = magisto_download
27 | download_playlist = playlist_not_supported('magisto')
28 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/metacafe.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['metacafe_download']
 4 | 
 5 | from ..common import *
 6 | import urllib.error
 7 | from urllib.parse import unquote
 8 | 
 9 | def metacafe_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
10 |     if re.match(r'http://www.metacafe.com/watch/\w+', url):
11 |         html =get_content(url)
12 |         title = r1(r'<meta property="og:title" content="([^"]*)"', html)
13 |         
14 |         for i in html.split('&'):  #wont bother to use re
15 |             if 'videoURL' in i:
16 |                 url_raw = i[9:]
17 |         
18 |         url = unquote(url_raw)
19 |         
20 |         type, ext, size = url_info(url)
21 |         print_info(site_info, title, type, size)
22 |         if not info_only:
23 |             download_urls([url], title, ext, size, output_dir, merge=merge)
24 | 
25 | site_info = "metacafe"
26 | download = metacafe_download
27 | download_playlist = playlist_not_supported('metacafe')
28 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/miomio.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['miomio_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | from .tudou import tudou_download_by_id
 8 | from .youku import youku_download_by_vid
 9 | from xml.dom.minidom import parseString
10 | 
11 | def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
12 |     html = get_html(url)
13 | 
14 |     title = r1(r'<meta name="description" content="([^"]*)"', html)
15 |     flashvars = r1(r'flashvars="(type=[^"]*)"', html)
16 | 
17 |     t = r1(r'type=(\w+)', flashvars)
18 |     id = r1(r'vid=([^"]+)', flashvars)
19 |     if t == 'youku':
20 |         youku_download_by_vid(id, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
21 |     elif t == 'tudou':
22 |         tudou_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
23 |     elif t == 'sina' or t == 'video':
24 |         fake_headers['Referer'] = url
25 |         url = "http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?vid=" + id
26 |         xml_data = get_content(url, headers=fake_headers, decoded=True)
27 |         url_list = sina_xml_to_url_list(xml_data)
28 | 
29 |         size_full = 0
30 |         for url in url_list:
31 |             type_, ext, size = url_info(url, headers=fake_headers)
32 |             size_full += size
33 | 
34 |         print_info(site_info, title, type_, size_full)
35 |         if not info_only:
36 |             download_urls(url_list, title, ext, total_size=size_full, output_dir=output_dir, merge=merge, headers=fake_headers)
37 |     else:
38 |         raise NotImplementedError(flashvars)
39 | 
40 | #----------------------------------------------------------------------
41 | def sina_xml_to_url_list(xml_data):
42 |     """str->list
43 |     Convert XML to URL List.
44 |     From Biligrab.
45 |     """
46 |     rawurl = []
47 |     dom = parseString(xml_data)
48 |     for node in dom.getElementsByTagName('durl'):
49 |         url = node.getElementsByTagName('url')[0]
50 |         rawurl.append(url.childNodes[0].data)
51 |     return rawurl
52 | 
53 | site_info = "MioMio.tv"
54 | download = miomio_download
55 | download_playlist = playlist_not_supported('miomio')
56 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/mixcloud.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['mixcloud_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def mixcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     html = get_html(url, faker=True)
 9 |     title = r1(r'<meta property="og:title" content="([^"]*)"', html)
10 |     preview_url = r1(r'm-preview=\"([^\"]+)\"', html)
11 |     preview = r1(r'previews(.*)\.mp3$', preview_url)
12 | 
13 |     for i in range(10, 30):
14 |         url = 'https://stream{i}.mixcloud.com/c/m4a/64{p}.m4a'.format(
15 |             i = i,
16 |             p = preview
17 |         )
18 |         try:
19 |             mime, ext, size = url_info(url)
20 |             break
21 |         except: continue
22 | 
23 |     print_info(site_info, title, ext, size)
24 |     if not info_only:
25 |         download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
26 | 
27 | site_info = "Mixcloud.com"
28 | download = mixcloud_download
29 | download_playlist = playlist_not_supported('mixcloud')
30 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/mtv81.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['mtv81_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | from xml.dom.minidom import parseString
 8 | 
 9 | from html.parser import HTMLParser
10 | 
11 | 
12 | def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
13 |     html = get_content(url)
14 |     title = HTMLParser().unescape(
15 |         "|".join(match1(html, r"<title>(.*?)</title>").split("|")[:-2]))
16 | 
17 |     # mgid%3Auma%3Avideo%3Amtv81.com%3A897974
18 |     vid = match1(html, r'getTheVideo\("(.*?)"')
19 |     xml = parseString(
20 |         get_content("http://intl.esperanto.mtvi.com/www/xml/media/mediaGen.jhtml?uri={}&flashPlayer=LNX%2013,0,0,206&geo=CN&sid=123456".format(vid)))
21 | 
22 |     url = sorted(
23 |         map(lambda x: x.firstChild.nodeValue, xml.getElementsByTagName("src")),
24 |         key=lambda x: int(match1(x, r'_(\d+?)_')))[-1]
25 | 
26 |     mediatype, ext, size = 'mp4', 'mp4', 0
27 |     print_info(site_info, title, mediatype, size)
28 |     #
29 |     # rtmpdump  -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf
30 |     #
31 |     # because rtmpdump is unstable,may try several times
32 |     #
33 |     if not info_only:
34 |         # import pdb
35 |         # pdb.set_trace()
36 |         download_rtmp_url(url=url, title=title, ext=ext, params={
37 |                           "--swfVfy": "http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf"}, output_dir=output_dir)
38 | 
39 | 
40 | site_info = "mtv81.com"
41 | download = mtv81_download
42 | download_playlist = playlist_not_supported('mtv81')
43 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/nanagogo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['nanagogo_download']
 4 | 
 5 | from ..common import *
 6 | from .universal import *
 7 | 
 8 | def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 9 |     if re.match(r'https?://stat.7gogo.jp', url):
10 |         universal_download(url, output_dir, merge=merge, info_only=info_only)
11 |         return
12 | 
13 |     talk_id = r1(r'7gogo.jp/([^/]+)/', url)
14 |     post_id = r1(r'7gogo.jp/[^/]+/(\d+)', url)
15 |     title = '%s_%s' % (talk_id, post_id)
16 |     api_url = 'https://api.7gogo.jp/web/v2/talks/%s/posts/%s' % (talk_id, post_id)
17 |     info = json.loads(get_content(api_url))
18 | 
19 |     items = []
20 |     if info['data']['posts']['post'] is None:
21 |         return
22 |     if info['data']['posts']['post']['body'] is None:
23 |         return
24 |     for i in info['data']['posts']['post']['body']:
25 |         if 'image' in i:
26 |             image_url = i['image']
27 |             if image_url[:2] == '//': continue # skip stamp images
28 |             _, ext, size = url_info(image_url)
29 |             items.append({'title': title,
30 |                           'url':   image_url,
31 |                           'ext':   ext,
32 |                           'size':  size})
33 |         elif 'movieUrlHq' in i:
34 |             movie_url = i['movieUrlHq']
35 |             _, ext, size = url_info(movie_url)
36 |             items.append({'title': title,
37 |                           'url':   movie_url,
38 |                           'ext':   ext,
39 |                           'size':  size})
40 | 
41 |     size = sum([i['size'] for i in items])
42 |     if size == 0: return # do not fail the whole process
43 |     print_info(site_info, title, ext, size)
44 |     if not info_only:
45 |         for i in items:
46 |             print_info(site_info, i['title'], i['ext'], i['size'])
47 |             download_urls([i['url']], i['title'], i['ext'], i['size'],
48 |                           output_dir=output_dir,
49 |                           merge=merge)
50 | 
51 | site_info = "7gogo.jp"
52 | download = nanagogo_download
53 | download_playlist = playlist_not_supported('nanagogo')
54 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/naver.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import urllib.request
 4 | import urllib.parse
 5 | import json
 6 | import re
 7 | 
 8 | from ..util import log
 9 | from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size
10 | from .universal import *
11 | 
12 | __all__ = ['naver_download_by_url']
13 | 
14 | 
15 | def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs):
16 |     ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}'
17 |     page = get_content(url)
18 |     try:
19 |         vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1)
20 |         key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1)
21 |         meta_str = get_content(ep.format(vid, key))
22 |         meta_json = json.loads(meta_str)
23 |         if 'errorCode' in meta_json:
24 |             log.wtf(meta_json['errorCode'])
25 |         title = meta_json['meta']['subject']
26 |         videos = meta_json['videos']['list']
27 |         video_list = sorted(videos, key=lambda video: video['encodingOption']['width'])
28 |         video_url = video_list[-1]['source']
29 |         # size = video_list[-1]['size']
30 |         # result wrong size
31 |         size = url_size(video_url)
32 |         print_info(site_info, title, 'mp4', size)
33 |         if not info_only:
34 |             download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
35 |     except:
36 |         universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
37 | 
38 | site_info = "naver.com"
39 | download = naver_download_by_url
40 | download_playlist = playlist_not_supported('naver')
41 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/nicovideo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['nicovideo_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def nicovideo_login(user, password):
 8 |     data = "current_form=login&mail=" + user +"&password=" + password + "&login_submit=Log+In"
 9 |     response = request.urlopen(request.Request("https://secure.nicovideo.jp/secure/login?site=niconico", headers=fake_headers, data=data.encode('utf-8')))
10 |     return response.headers
11 | 
12 | def nicovideo_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
13 |     import ssl
14 |     ssl_context = request.HTTPSHandler(
15 | context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
16 |     cookie_handler = request.HTTPCookieProcessor()
17 |     opener = request.build_opener(ssl_context, cookie_handler)
18 |     request.install_opener(opener)
19 | 
20 |     import netrc, getpass
21 |     try:
22 |         info = netrc.netrc().authenticators('nicovideo')
23 |     except:
24 |         info = None
25 |     if info is None:
26 |         user = input("User:     ")
27 |         password = getpass.getpass("Password: ")
28 |     else:
29 |         user, password = info[0], info[2]
30 |     print("Logging in...")
31 |     nicovideo_login(user, password)
32 | 
33 |     html = get_html(url) # necessary!
34 |     title = r1(r'<title>(.+?)</title>', html)
35 |     #title = unicodize(r1(r'<span class="videoHeaderTitle"[^>]*>([^<]+)</span>', html))
36 | 
37 |     vid = url.split('/')[-1].split('?')[0]
38 |     api_html = get_html('http://flapi.nicovideo.jp/api/getflv?v=%s' % vid)
39 |     real_url = parse.unquote(r1(r'url=([^&]+)&', api_html))
40 | 
41 |     type, ext, size = url_info(real_url)
42 | 
43 |     print_info(site_info, title, type, size)
44 |     if not info_only:
45 |         download_urls([real_url], title, ext, size, output_dir, merge = merge)
46 | 
47 | site_info = "Nicovideo.jp"
48 | download = nicovideo_download
49 | download_playlist = playlist_not_supported('nicovideo')
50 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/pinterest.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | from ..extractor import VideoExtractor
 5 | 
 6 | class Pinterest(VideoExtractor):
 7 |     # site name
 8 |     name = "Pinterest"
 9 | 
10 |     # ordered list of supported stream types / qualities on this site
11 |     # order: high quality -> low quality
12 |     stream_types = [
13 |         {'id': 'original'}, # contains an 'id' or 'itag' field at minimum
14 |         {'id': 'small'},
15 |     ]
16 | 
17 |     def prepare(self, **kwargs):
18 |         # scrape the html
19 |         content = get_content(self.url)
20 | 
21 |         # extract title
22 |         self.title = match1(content,
23 |                             r'<meta property="og:description" name="og:description" content="([^"]+)"')
24 | 
25 |         # extract raw urls
26 |         orig_img = match1(content,
27 |                          r'<meta itemprop="image" content="([^"]+/originals/[^"]+)"')
28 |         twit_img = match1(content,
29 |                           r'<meta property="twitter:image:src" name="twitter:image:src" content="([^"]+)"')
30 | 
31 |         # construct available streams
32 |         if orig_img: self.streams['original'] = {'url': orig_img}
33 |         if twit_img: self.streams['small'] = {'url': twit_img}
34 | 
35 |     def extract(self, **kwargs):
36 |         for i in self.streams:
37 |             # for each available stream
38 |             s = self.streams[i]
39 |             # fill in 'container' field and 'size' field (optional)
40 |             _, s['container'], s['size'] = url_info(s['url'])
41 |             # 'src' field is a list of processed urls for direct downloading
42 |             # usually derived from 'url'
43 |             s['src'] = [s['url']]
44 | 
45 | site = Pinterest()
46 | download = site.download_by_url
47 | # TBD: implement download_playlist
48 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/pixnet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['pixnet_download']
 4 | 
 5 | from ..common import *
 6 | from time import time
 7 | from urllib.parse import quote
 8 | from json import loads
 9 | 
10 | def pixnet_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
11 |     if re.match(r'http://(\w)+.pixnet.net/album/video/(\d)+', url):
12 |         # http://eric6513.pixnet.net/album/video/206644535
13 |         html = get_content(url)
14 |         title = ''.join(r1(r'<meta property="og:description\" content="([^"]*)"', html).split('-')[1:]).strip()
15 |         
16 |         time_now = int(time())
17 |         
18 |         m = re.match(r'http://(\w+).pixnet.net/album/video/(\d+)', url)
19 |         
20 |         username = m.group(1)
21 |         # eric6513
22 |         id = m.group(2)
23 |         # 206644535
24 |         
25 |         data_dict = {'username': username, 'autoplay': 1, 'id': id, 'loop': 0, 'profile': 9, 'time': time_now}
26 |         data_dict_str= quote(str(data_dict).replace("'", '"'), safe='"')  #have to be like this
27 |         url2 = 'http://api.pixnet.tv/content?type=json&customData=' + data_dict_str
28 |         # &sig=edb07258e6a9ff40e375e11d30607983  can be blank for now
29 |         # if required, can be obtained from url like
30 |         # http://s.ext.pixnet.tv/user/eric6513/html5/autoplay/206644507.js
31 |         # http://api.pixnet.tv/content?type=json&customData={%22username%22:%22eric6513%22,%22id%22:%22206644535%22,%22time%22:1441823350,%22autoplay%22:0,%22loop%22:0,%22profile%22:7}
32 |         
33 |         video_json = get_content(url2)
34 |         content = loads(video_json)
35 |         url_main = content['element']['video_url']
36 |         url_backup = content['element']['backup_video_uri']
37 |         # {"element":{"video_url":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","backup_video_uri":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","thumb_url":"\/\/imageproxy.pimg.tw\/zoomcrop?width=480&height=360&url=http%3A%2F%2Fpimg.pixnet.tv%2Fuser%2Feric6513%2F206644507%2Fbg_000000%2F480x360%2Fdefault.jpg%3Fv%3D1422870050","profiles":{"360p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"backup_profiles":{"360p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"count_play_url":["http:\/\/api.v6.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=v6play&amp;sig=3350496782","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=play&amp;sig=930187858","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=html5play&amp;sig=4191197761"],"count_finish_url":["http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819715&amp;type=finish&amp;sig=638797202","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819715&amp;type=html5finish&amp;sig=3215728991"]}}
38 |         
39 |         try:
40 |             # In some rare cases the main URL is IPv6 only...
41 |             # Something like #611
42 |             url_info(url_main)
43 |             url = url_main
44 |         except:
45 |             url = url_backup
46 |         
47 |         type, ext, size = url_info(url)
48 |         print_info(site_info, title, type, size)
49 |         if not info_only:
50 |             download_urls([url], title, ext, size, output_dir, merge=merge)
51 | 
52 | site_info = "Pixnet"
53 | download = pixnet_download
54 | download_playlist = playlist_not_supported('pixnet')
55 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/qie.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from ..common import *
 5 | from ..extractor import VideoExtractor
 6 | from ..util.log import *
 7 | 
 8 | from json import loads
 9 | 
10 | class QiE(VideoExtractor):
11 |     name = "QiE （企鹅直播）"
12 | 
13 |     # Last updated: 2015-11-24
14 |     stream_types = [
15 |         {'id': 'normal', 'container': 'flv', 'video_profile': '标清'},
16 |         {'id': 'middle', 'container': 'flv', 'video_profile': '550'},
17 |         {'id': 'middle2', 'container': 'flv', 'video_profile': '900'},
18 |     ]
19 |     
20 |     id_dic = {i['video_profile']:(i['id']) for i in stream_types}
21 |     
22 |     api_endpoint = 'http://www.qie.tv/api/v1/room/{room_id}'
23 |     game_ep = 'http://live.qq.com/game/game_details/get_game_details_info/'
24 | 
25 |     def get_room_id_from_url(self, match_id):
26 |         meta = json.loads(get_content(self.game_ep + str(match_id)))
27 |         if meta['error'] != 0:
28 |             log.wtf('Error happens when accessing game_details api')
29 |         rooms = meta['data']['anchor_data']
30 |         for room in rooms:
31 |             if room['is_use_room']:
32 |                 return room['room_id']
33 |         log.wtf('No room available for match {}'.format(match_id))
34 | 
35 |     def get_vid_from_url(self, url):
36 |         """Extracts video ID from live.qq.com.
37 |         """
38 |         hit = re.search(r'live.qq.com/(\d+)', url)
39 |         if hit is not None:
40 |             return hit.group(1)
41 |         hit = re.search(r'live.qq.com/directory/match/(\d+)', url)
42 |         if hit is not None:
43 |             return self.get_room_id_from_url(hit.group(1))
44 |         html = get_content(url)
45 |         room_id = match1(html, r'room_id\":(\d+)')
46 |         if room_id is None:
47 |             log.wtf('Unknown page {}'.format(url))
48 |         return room_id
49 | 
50 |     def download_playlist_by_url(self, url, **kwargs):
51 |         pass
52 | 
53 |     def prepare(self, **kwargs):
54 |         if self.url:
55 |             self.vid = self.get_vid_from_url(self.url)
56 |         
57 |         content = get_content(self.api_endpoint.format(room_id = self.vid))
58 |         content = loads(content)
59 |         self.title = content['data']['room_name']
60 |         rtmp_url =  content['data']['rtmp_url']
61 |         #stream_available = [i['name'] for i in content['data']['stream']]
62 |         stream_available = {}
63 |         stream_available['normal'] = rtmp_url + '/' + content['data']['rtmp_live']
64 |         if len(content['data']['rtmp_multi_bitrate']) > 0:
65 |             for k , v in content['data']['rtmp_multi_bitrate'].items():
66 |                 stream_available[k] = rtmp_url + '/' + v
67 |         
68 |         for s in self.stream_types:
69 |             if s['id'] in stream_available.keys():
70 |                 quality_id = s['id']
71 |                 url = stream_available[quality_id]
72 |                 self.streams[quality_id] = {
73 |                     'container': 'flv',
74 |                     'video_profile': s['video_profile'],
75 |                     'size': 0,
76 |                     'url': url
77 |                 }
78 | 
79 |     def extract(self, **kwargs):
80 |         for i in self.streams:
81 |             s = self.streams[i]
82 |             s['src'] = [s['url']]
83 |         if 'stream_id' in kwargs and kwargs['stream_id']:
84 |             # Extract the stream
85 |             stream_id = kwargs['stream_id']
86 | 
87 |             if stream_id not in self.streams:
88 |                 log.e('[Error] Invalid video format.')
89 |                 log.e('Run \'-i\' command with no specific video format to view all available formats.')
90 |                 exit(2)
91 |         else:
92 |             # Extract stream with the best quality
93 |             stream_id = self.streams_sorted[0]['id']
94 |             s['src'] = [s['url']]
95 | 
96 | site = QiE()
97 | download = site.download_by_url
98 | download_playlist = playlist_not_supported('QiE')
99 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/qie_video.py:
--------------------------------------------------------------------------------
 1 | from ..common import *
 2 | from ..extractor import VideoExtractor
 3 | from ..util.log import *
 4 | 
 5 | import json
 6 | import math
 7 | 
 8 | class QieVideo(VideoExtractor):
 9 |     name = 'QiE Video'
10 |     vid_patt = r'"stream_name":"(\d+)"'
11 |     title_patt = r'"title":"([^\"]+)"'
12 |     cdn = 'http://qietv-play.wcs.8686c.com/'
13 |     ep = 'http://api.qiecdn.com/api/v1/video/stream/{}'
14 |     stream_types = [
15 |         {'id':'1080p', 'video_profile':'1920x1080', 'container':'m3u8'},
16 |         {'id':'720p', 'video_profile':'1280x720', 'container':'m3u8'},
17 |         {'id':'480p', 'video_profile':'853x480', 'container':'m3u8'}
18 |     ]
19 | 
20 |     def get_vid_from_url(self):
21 |         hit = re.search(self.__class__.vid_patt, self.page)
22 |         if hit is None:
23 |             log.wtf('Cannot get stream_id')
24 |         return hit.group(1)
25 | 
26 |     def get_title(self):
27 |         hit = re.search(self.__class__.title_patt, self.page)
28 |         if hit is None:
29 |             return self.vid
30 |         return hit.group(1).strip()
31 | 
32 |     def prepare(self, **kwargs):
33 |         self.page = get_content(self.url)
34 |         if self.vid is None:
35 |             self.vid = self.get_vid_from_url()
36 |         self.title = self.get_title()
37 |         meta = json.loads(get_content(self.__class__.ep.format(self.vid)))
38 |         if meta['code'] != 200:
39 |             log.wtf(meta['message'])
40 |         for video in meta['result']['videos']:
41 |             height = video['height']
42 |             url = self.__class__.cdn + video['key']
43 |             stream_meta = dict(m3u8_url=url, size=0, container='m3u8')
44 |             video_profile = '{}x{}'.format(video['width'], video['height'])
45 |             stream_meta['video_profile'] = video_profile
46 |             for stream_type in self.__class__.stream_types:
47 |                 if height // 10 == int(stream_type['id'][:-1]) // 10:
48 | # width 481, 482... 489 are all 480p here
49 |                     stream_id = stream_type['id']
50 |                     self.streams[stream_id] = stream_meta
51 | 
52 |     def extract(self, **kwargs):
53 |         for stream_id in self.streams:
54 |             self.streams[stream_id]['src'], dur = general_m3u8_extractor(self.streams[stream_id]['m3u8_url'])
55 |             self.streams[stream_id]['video_profile'] += ', Duration: {}s'.format(math.floor(dur))
56 | 
57 | def general_m3u8_extractor(url):
58 |     dur = 0
59 |     base_url = url[:url.rfind('/')]
60 |     m3u8_content = get_content(url).split('\n')
61 |     result = []
62 |     for line in m3u8_content:
63 |         trimmed = line.strip()
64 |         if len(trimmed) > 0:
65 |             if trimmed.startswith('#'):
66 |                 if trimmed.startswith('#EXTINF'):
67 |                     t_str = re.search(r'(\d+\.\d+)', trimmed).group(1)
68 |                     dur += float(t_str)
69 |             else:
70 |                 if trimmed.startswith('http'):
71 |                     result.append(trimmed)
72 |                 else:
73 |                     result.append(base_url + '/' + trimmed)
74 |     return result, dur 
75 |     
76 | site = QieVideo()
77 | download_by_url = site.download_by_url
78 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/qingting.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import re
 3 | 
 4 | from ..common import get_content, playlist_not_supported, url_size
 5 | from ..extractors import VideoExtractor
 6 | from ..util import log
 7 | 
 8 | __all__ = ['qingting_download_by_url']
 9 | 
10 | 
11 | class Qingting(VideoExtractor):
12 |     # every resource is described by its channel id and program id
13 |     # so vid is tuple (channel_id, program_id)
14 | 
15 |     name = 'Qingting'
16 |     stream_types = [
17 |         {'id': '_default'}
18 |     ]
19 | 
20 |     ep = 'http://i.qingting.fm/wapi/channels/{}/programs/{}'
21 |     file_host = 'http://od.qingting.fm/{}'
22 |     mobile_pt = r'channels\/(\d+)\/programs/(\d+)'
23 | 
24 |     def prepare(self, **kwargs):
25 |         if self.vid is None:
26 |             hit = re.search(self.__class__.mobile_pt, self.url)
27 |             self.vid = (hit.group(1), hit.group(2))
28 | 
29 |         ep_url = self.__class__.ep.format(self.vid[0], self.vid[1])
30 |         meta = json.loads(get_content(ep_url))
31 | 
32 |         if meta['code'] != 0:
33 |             log.wtf(meta['message']['errormsg'])
34 | 
35 |         file_path = self.__class__.file_host.format(meta['data']['file_path'])
36 |         self.title = meta['data']['name']
37 |         duration = str(meta['data']['duration']) + 's'
38 | 
39 |         self.streams['_default'] = {'src': [file_path], 'video_profile': duration, 'container': 'm4a'}
40 | 
41 |     def extract(self, **kwargs):
42 |         self.streams['_default']['size'] = url_size(self.streams['_default']['src'][0])
43 | 
44 | 
45 | def qingting_download_by_url(url, **kwargs):
46 |     Qingting().download_by_url(url, **kwargs)
47 | 
48 | site_info = 'Qingting'
49 | download = qingting_download_by_url
50 | download_playlist = playlist_not_supported('Qingting')
51 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/qq_egame.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import json
 3 | 
 4 | from ..common import *
 5 | from ..extractors import VideoExtractor
 6 | from ..util import log
 7 | from ..util.strings import unescape_html
 8 | 
 9 | __all__ = ['qq_egame_download']
10 | 
11 | 
12 | def qq_egame_download(url,
13 |                       output_dir='.',
14 |                       merge=True,
15 |                       info_only=False,
16 |                       **kwargs):
17 |     uid = re.search('\d\d\d+', url)
18 |     an_url = "https://m.egame.qq.com/live?anchorid={}&".format(uid.group(0))
19 |     page = get_content(an_url)
20 |     server_data = re.search(r'window\.serverData\s*=\s*({.+?});', page)
21 |     if server_data is None:
22 |         log.wtf('Can not find window.server_data')
23 |     json_data = json.loads(server_data.group(1))
24 |     if json_data['anchorInfo']['data']['isLive'] == 0:
25 |         log.wtf('Offline...')
26 |     live_info = json_data['liveInfo']['data']
27 |     title = '{}_{}'.format(live_info['profileInfo']['nickName'],
28 |                            live_info['videoInfo']['title'])
29 |     real_url = live_info['videoInfo']['streamInfos'][0]['playUrl']
30 | 
31 |     print_info(site_info, title, 'flv', float('inf'))
32 |     if not info_only:
33 |         download_url_ffmpeg(
34 |             real_url,
35 |             title,
36 |             'flv',
37 |             params={},
38 |             output_dir=output_dir,
39 |             merge=merge)
40 | 
41 | 
42 | site_info = "egame.qq.com"
43 | download = qq_egame_download
44 | download_playlist = playlist_not_supported('qq_egame')
45 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/showroom.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['showroom_download']
 4 | 
 5 | from ..common import *
 6 | import urllib.error
 7 | from json import loads
 8 | from time import time, sleep
 9 | 
10 | #----------------------------------------------------------------------
11 | def showroom_get_roomid_by_room_url_key(room_url_key):
12 |     """str->str"""
13 |     fake_headers_mobile = {
14 |         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
15 |         'Accept-Charset': 'UTF-8,*;q=0.5',
16 |         'Accept-Encoding': 'gzip,deflate,sdch',
17 |         'Accept-Language': 'en-US,en;q=0.8',
18 |         'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
19 |     }
20 |     webpage_url = 'https://www.showroom-live.com/' + room_url_key
21 |     html = get_content(webpage_url, headers = fake_headers_mobile)
22 |     roomid = match1(html, r'room\?room_id\=(\d+)')
23 |     assert roomid
24 |     return roomid
25 | 
26 | def showroom_download_by_room_id(room_id, output_dir = '.', merge = False, info_only = False, **kwargs):
27 |     '''Source: Android mobile'''
28 |     while True:
29 |         timestamp = str(int(time() * 1000))
30 |         api_endpoint = 'https://www.showroom-live.com/api/live/streaming_url?room_id={room_id}&_={timestamp}'.format(room_id = room_id, timestamp = timestamp)
31 |         html = get_content(api_endpoint)
32 |         html = json.loads(html)
33 |         #{'streaming_url_list': [{'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 1, 'label': 'original spec(low latency)', 'is_default': True, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed/playlist.m3u8', 'is_default': True, 'id': 2, 'type': 'hls', 'label': 'original spec'}, {'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 3, 'label': 'low spec(low latency)', 'is_default': False, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low/playlist.m3u8', 'is_default': False, 'id': 4, 'type': 'hls', 'label': 'low spec'}]}
34 |         if len(html) >= 1:
35 |             break
36 |         log.w('The live show is currently offline.')
37 |         sleep(1)
38 | 
39 |     #This is mainly for testing the M3U FFmpeg parser so I would ignore any non-m3u ones
40 |     stream_url = [i['url'] for i in html['streaming_url_list'] if i['is_default'] and i['type'] == 'hls'][0]
41 | 
42 |     assert stream_url
43 | 
44 |     #title
45 |     title = ''
46 |     profile_api = 'https://www.showroom-live.com/api/room/profile?room_id={room_id}'.format(room_id = room_id)
47 |     html = loads(get_content(profile_api))
48 |     try:
49 |         title = html['main_name']
50 |     except KeyError:
51 |         title = 'Showroom_{room_id}'.format(room_id = room_id)
52 | 
53 |     type_, ext, size = url_info(stream_url)
54 |     print_info(site_info, title, type_, size)
55 |     if not info_only:
56 |         download_url_ffmpeg(url=stream_url, title=title, ext= 'mp4', output_dir=output_dir)
57 | 
58 | 
59 | #----------------------------------------------------------------------
60 | def showroom_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
61 |     """"""
62 |     if re.match( r'(\w+)://www.showroom-live.com/([-\w]+)', url):
63 |         room_url_key = match1(url, r'\w+://www.showroom-live.com/([-\w]+)')
64 |         room_id = showroom_get_roomid_by_room_url_key(room_url_key)
65 |         showroom_download_by_room_id(room_id, output_dir, merge,
66 |                                     info_only)
67 | 
68 | site_info = "Showroom"
69 | download = showroom_download
70 | download_playlist = playlist_not_supported('showroom')
71 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/sina.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']
  4 | 
  5 | from ..common import *
  6 | from ..util.log import *
  7 | 
  8 | from hashlib import md5
  9 | from random import randint
 10 | from time import time
 11 | from xml.dom.minidom import parseString
 12 | import urllib.parse
 13 | 
 14 | def api_req(vid):
 15 |     rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000))
 16 |     t = str(int('{0:b}'.format(int(time()))[:-6], 2))
 17 |     k = md5((vid + 'Z6prk18aWxP278cVAH' + t + rand).encode('utf-8')).hexdigest()[:16] + t
 18 |     url = 'http://ask.ivideo.sina.com.cn/v_play.php?vid={0}&ran={1}&p=i&k={2}'.format(vid, rand, k)
 19 |     xml = get_content(url, headers=fake_headers)
 20 |     return xml
 21 | 
 22 | def video_info(xml):
 23 |     video = parseString(xml).getElementsByTagName('video')[0]
 24 |     result = video.getElementsByTagName('result')[0]
 25 |     if result.firstChild.nodeValue == 'error':
 26 |         message = video.getElementsByTagName('message')[0]
 27 |         return None, message.firstChild.nodeValue, None
 28 |     vname = video.getElementsByTagName('vname')[0].firstChild.nodeValue
 29 |     durls = video.getElementsByTagName('durl')
 30 | 
 31 |     urls = []
 32 |     size = 0
 33 |     for durl in durls:
 34 |         url = durl.getElementsByTagName('url')[0].firstChild.nodeValue
 35 |         seg_size = durl.getElementsByTagName('filesize')[0].firstChild.nodeValue
 36 |         urls.append(url)
 37 |         size += int(seg_size)
 38 | 
 39 |     return urls, vname, size
 40 | 
 41 | def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):
 42 |     """Downloads a Sina video by its unique vid.
 43 |     http://video.sina.com.cn/
 44 |     """
 45 |     xml = api_req(vid)
 46 |     urls, name, size = video_info(xml)
 47 |     if urls is None:
 48 |         log.wtf(name)
 49 |     title = name
 50 |     print_info(site_info, title, 'flv', size)
 51 |     if not info_only:
 52 |         download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)
 53 | 
 54 | def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_only=False):
 55 |     """Downloads a Sina video by its unique vkey.
 56 |     http://video.sina.com/
 57 |     """
 58 | 
 59 |     url = 'http://video.sina.com/v/flvideo/%s_0.flv' % vkey
 60 |     type, ext, size = url_info(url)
 61 | 
 62 |     print_info(site_info, title, 'flv', size)
 63 |     if not info_only:
 64 |         download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)
 65 | 
 66 | def sina_zxt(url, output_dir='.', merge=True, info_only=False, **kwargs):
 67 |     ep = 'http://s.video.sina.com.cn/video/play?video_id='
 68 |     frag = urllib.parse.urlparse(url).fragment
 69 |     if not frag:
 70 |         log.wtf('No video specified with fragment')
 71 |     meta = json.loads(get_content(ep + frag))
 72 |     if meta['code'] != 1:
 73 | # Yes they use 1 for success.
 74 |         log.wtf(meta['message'])
 75 |     title = meta['data']['title']
 76 |     videos = sorted(meta['data']['videos'], key = lambda i: int(i['size']))
 77 | 
 78 |     if len(videos) == 0:
 79 |         log.wtf('No video file returned by API server')
 80 | 
 81 |     vid = videos[-1]['file_id']
 82 |     container = videos[-1]['type']
 83 |     size = int(videos[-1]['size'])
 84 | 
 85 |     if container == 'hlv':
 86 |         container = 'flv'
 87 | 
 88 |     urls, _, _ = video_info(api_req(vid))
 89 |     print_info(site_info, title, container, size)
 90 |     if not info_only:
 91 |         download_urls(urls, title, container, size, output_dir=output_dir, merge=merge, **kwargs)
 92 |     return
 93 | 
 94 | def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 95 |     """Downloads Sina videos by URL.
 96 |     """
 97 |     if 'news.sina.com.cn/zxt' in url:
 98 |         sina_zxt(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
 99 |         return
100 | 
101 |     vid = match1(url, r'vid=(\d+)')
102 |     if vid is None:
103 |         video_page = get_content(url)
104 |         vid = hd_vid = match1(video_page, r'hd_vid\s*:\s*\'([^\']+)\'')
105 |         if hd_vid == '0':
106 |             vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|')
107 |             vid = vids[-1]
108 | 
109 |     if vid is None:
110 |         vid = match1(video_page, r'vid:"?(\d+)"?')
111 |     if vid:
112 |         #title = match1(video_page, r'title\s*:\s*\'([^\']+)\'')
113 |         sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)
114 |     else:
115 |         vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
116 |         if vkey is None:
117 |             vid = match1(url, r'#(\d+)')
118 |             sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)
119 |             return
120 |         title = match1(video_page, r'title\s*:\s*"([^"]+)"')
121 |         sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
122 | 
123 | site_info = "Sina.com"
124 | download = sina_download
125 | download_playlist = playlist_not_supported('sina')
126 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/sohu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['sohu_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | import json
 8 | 
 9 | '''
10 | Changelog:
11 |     1. http://tv.sohu.com/upload/swf/20150604/Main.swf
12 |         new api
13 | '''
14 | 
15 | 
16 | def real_url(fileName, key, ch):
17 |     url = "https://data.vod.itc.cn/ip?new=" + fileName + "&num=1&key=" + key + "&ch=" + ch + "&pt=1&pg=2&prod=h5n"
18 |     return json.loads(get_html(url))['servers'][0]['url']
19 | 
20 | 
21 | def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_proxy=None, **kwargs):
22 |     if re.match(r'http://share.vrs.sohu.com', url):
23 |         vid = r1(r'id=(\d+)', url)
24 |     else:
25 |         html = get_html(url)
26 |         vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html)
27 |     assert vid
28 | 
29 |     if extractor_proxy:
30 |         set_proxy(tuple(extractor_proxy.split(":")))
31 |     info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid))
32 |     if info and info.get("data", ""):
33 |         for qtyp in ["oriVid", "superVid", "highVid", "norVid", "relativeId"]:
34 |             if 'data' in info:
35 |                 hqvid = info['data'][qtyp]
36 |             else:
37 |                 hqvid = info[qtyp]
38 |             if hqvid != 0 and hqvid != vid:
39 |                 info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))
40 |                 if not 'allot' in info:
41 |                     continue
42 |                 break
43 |         if extractor_proxy:
44 |             unset_proxy()
45 |         host = info['allot']
46 |         prot = info['prot']
47 |         tvid = info['tvid']
48 |         urls = []
49 |         data = info['data']
50 |         title = data['tvName']
51 |         size = sum(data['clipsBytes'])
52 |         assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
53 |         for fileName, key in zip(data['su'], data['ck']):
54 |             urls.append(real_url(fileName, key, data['ch']))
55 |         # assert data['clipsURL'][0].endswith('.mp4')
56 | 
57 |     else:
58 |         info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))
59 |         host = info['allot']
60 |         prot = info['prot']
61 |         tvid = info['tvid']
62 |         urls = []
63 |         data = info['data']
64 |         title = data['tvName']
65 |         size = sum(map(int, data['clipsBytes']))
66 |         assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
67 |         for fileName, key in zip(data['su'], data['ck']):
68 |             urls.append(real_url(fileName, key, data['ch']))
69 | 
70 |     print_info(site_info, title, 'mp4', size)
71 |     if not info_only:
72 |         download_urls(urls, title, 'mp4', size, output_dir, refer=url, merge=merge)
73 | 
74 | 
75 | site_info = "Sohu.com"
76 | download = sohu_download
77 | download_playlist = playlist_not_supported('sohu')
78 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/soundcloud.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['sndcd_download']
 4 | 
 5 | from ..common import *
 6 | import re
 7 | import json
 8 | 
 9 | 
10 | def get_sndcd_apikey():
11 |     home_page = get_content('https://soundcloud.com')
12 |     js_url = re.findall(r'script crossorigin src="(.+?)"></script>', home_page)[-1]
13 | 
14 |     client_id = get_content(js_url)
15 |     return re.search(r'client_id:"(.+?)"', client_id).group(1)
16 | 
17 | 
18 | def get_resource_info(resource_url, client_id):
19 |     cont = get_content(resource_url, decoded=True)
20 | 
21 |     x = re.escape('forEach(function(e){n(e)})}catch(e){}})},')
22 |     x = re.search(r'' + x + r'(.*)\);</script>', cont)
23 | 
24 |     info = json.loads(x.group(1))[-1]['data'][0]
25 | 
26 |     info = info['tracks'] if info.get('track_count') else [info]
27 | 
28 |     ids = [i['id'] for i in info if i.get('comment_count') is None]
29 |     ids = list(map(str, ids))
30 |     ids_split = ['%2C'.join(ids[i:i+10]) for i in range(0, len(ids), 10)]
31 |     api_url = 'https://api-v2.soundcloud.com/tracks?ids={ids}&client_id={client_id}&%5Bobject%20Object%5D=&app_version=1584348206&app_locale=en'
32 | 
33 |     res = []
34 |     for ids in ids_split:
35 |         uri = api_url.format(ids=ids, client_id=client_id)
36 |         cont = get_content(uri, decoded=True)
37 |         res += json.loads(cont)
38 | 
39 |     res = iter(res)
40 |     info = [next(res) if i.get('comment_count') is None else i for i in info]
41 | 
42 |     return info
43 | 
44 | 
45 | def sndcd_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
46 |     client_id = get_sndcd_apikey()
47 | 
48 |     r_info = get_resource_info(url, client_id)
49 | 
50 |     for info in r_info:
51 |         title = info['title']
52 |         metadata = info.get('publisher_metadata')
53 | 
54 |         transcodings = info['media']['transcodings']
55 |         sq = [i for i in transcodings if i['quality'] == 'sq']
56 |         hq = [i for i in transcodings if i['quality'] == 'hq']
57 |         # source url
58 |         surl = sq[0] if hq == [] else hq[0]
59 |         surl = surl['url']
60 | 
61 |         uri = surl + '?client_id=' + client_id
62 |         r = get_content(uri)
63 |         surl = json.loads(r)['url']
64 | 
65 |         m3u8 = get_content(surl)
66 |         # url list
67 |         urll = re.findall(r'http.*?(?=\n)', m3u8)
68 | 
69 |         size = urls_size(urll)
70 |         print_info(site_info, title, 'audio/mpeg', size)
71 |         print(end='', flush=True)
72 | 
73 |         if not info_only:
74 |             download_urls(urll, title=title, ext='mp3', total_size=size, output_dir=output_dir, merge=True)
75 | 
76 | 
77 | site_info = "SoundCloud.com"
78 | download = sndcd_download
79 | download_playlist = sndcd_download
80 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/suntv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['suntv_download']
 4 | 
 5 | from ..common import *
 6 | import urllib
 7 | import re
 8 | 
 9 | def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
10 |     if re.match(r'http://www.isuntv.com/\w+', url):
11 |         API_URL = "http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,App_Web_playcatemp4.ascx.9f08f04f.ashx"
12 |         
13 |         itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html')
14 |         values = {"itemid" : itemid, "vodid": ""}
15 |         
16 |         data = str(values).replace("'", '"')
17 |         data = data.encode('utf-8')
18 |         req = urllib.request.Request(API_URL, data)
19 |         req.add_header('AjaxPro-Method', 'ToPlay')  #important!
20 |         resp = urllib.request.urlopen(req)
21 |         respData = resp.read()
22 |         respData = respData.decode('ascii').strip('"')  #Ahhhhhhh!
23 |     
24 |         video_url = 'http://www.isuntv.com' + str(respData)
25 |         
26 |         html = get_content(url, decoded=False)
27 |         html = html.decode('gbk')
28 |         title = match1(html, '<title>([^<]+)').strip()  #get rid of \r\n s
29 |         
30 |         size = 0
31 |         type, ext, size = url_info(video_url)
32 |         
33 |         print_info(site_info, title, type, size)
34 |         if not info_only:
35 |             download_urls([url], title, 'mp4', size, output_dir, merge=merge)
36 | 
37 | site_info = "SunTV"
38 | download = suntv_download
39 | download_playlist = playlist_not_supported('suntv')
40 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ted.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['ted_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 9 |     html = get_html(url)
10 |     patt = r'"__INITIAL_DATA__"\s*:\s*\{(.+)\}'
11 |     metadata = json.loads('{' + match1(html, patt) + '}')
12 |     title = metadata['talks'][0]['title']
13 |     nativeDownloads = metadata['talks'][0]['downloads']['nativeDownloads']
14 |     for quality in ['high', 'medium', 'low']:
15 |         if quality in nativeDownloads:
16 |             url = nativeDownloads[quality]
17 |             type, ext, size = url_info(url)
18 |             print_info(site_info, title, type, size)
19 |             if not info_only:
20 |                 download_urls([url], title, ext, size, output_dir, merge=merge)
21 |             break
22 | 
23 | site_info = "TED.com"
24 | download = ted_download
25 | download_playlist = playlist_not_supported('ted')
26 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/theplatform.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..common import *
 4 | 
 5 | def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_only=False, **kwargs):
 6 |     smil_url = "http://link.theplatform.com/s/dJ5BDC/%s/meta.smil?format=smil&mbr=true" % pid
 7 |     smil = get_content(smil_url)
 8 |     smil_base = unescape_html(match1(smil, r'<meta base="([^"]+)"'))
 9 |     smil_videos = {y:x for x,y in dict(re.findall(r'<video src="([^"]+)".+height="([^"]+)"', smil)).items()}
10 |     for height in ['1080', '720', '480', '360', '240', '216']:
11 |         if height in smil_videos:
12 |             smil_video = smil_videos[height]
13 |             break
14 |     assert smil_video
15 | 
16 |     type, ext, size = 'mp4', 'mp4', 0
17 | 
18 |     print_info(site_info, title, type, size)
19 |     if not info_only:
20 |         download_rtmp_url(url=smil_base, title=title, ext=ext,params={"-y":ext+':'+smil_video}, output_dir=output_dir)
21 | 
22 | site_info = "thePlatform.com"
23 | download = theplatform_download_by_pid
24 | download_playlist = playlist_not_supported('theplatform')
25 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/tiktok.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['tiktok_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
 8 |     headers = {
 9 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
10 |         'Accept-Encoding': 'gzip, deflate',
11 |         'Accept': '*/*',
12 |         'Referer': 'https://www.tiktok.com/',
13 |         'Connection': 'keep-alive'  # important
14 |     }
15 | 
16 |     m = re.match('(https?://)?([^/]+)(/.*)', url)
17 |     host = m.group(2)
18 |     if host != 'www.tiktok.com':  # non-canonical URL
19 |         if host == 'vt.tiktok.com':  # short URL
20 |             url = get_location(url)
21 |         vid = r1(r'/video/(\d+)', url)
22 |         url = 'https://www.tiktok.com/@/video/%s/' % vid
23 |         host = 'www.tiktok.com'
24 |     else:
25 |         url = m.group(3).split('?')[0]
26 |         vid = url.split('/')[3]  # should be a string of numbers
27 | 
28 |     html, set_cookie = getHttps(host, url, headers=headers)
29 |     tt_chain_token = r1('tt_chain_token=([^;]+);', set_cookie)
30 |     headers['Cookie'] = 'tt_chain_token=%s' % tt_chain_token
31 | 
32 |     data = r1(r'<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" type="application/json">(.*?)</script>', html)
33 |     info = json.loads(data)
34 |     itemStruct = info['__DEFAULT_SCOPE__']['webapp.video-detail']['itemInfo']['itemStruct']
35 |     downloadAddr = itemStruct['video']['downloadAddr']
36 |     author = itemStruct['author']['uniqueId']
37 |     nickname = itemStruct['author']['nickname']
38 |     title = '%s [%s]' % (nickname or author, vid)
39 | 
40 |     mime, ext, size = url_info(downloadAddr, headers=headers)
41 | 
42 |     print_info(site_info, title, mime, size)
43 |     if not info_only:
44 |         download_urls([downloadAddr], title, ext, size, output_dir=output_dir, merge=merge, headers=headers)
45 | 
46 | site_info = "TikTok.com"
47 | download = tiktok_download
48 | download_playlist = playlist_not_supported('tiktok')
49 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/toutiao.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import binascii
 3 | import random
 4 | from json import loads
 5 | from urllib.parse import urlparse
 6 | 
 7 | from ..common import *
 8 | 
 9 | try:
10 |     from base64 import decodebytes
11 | except ImportError:
12 |     from base64 import decodestring
13 | 
14 |     decodebytes = decodestring
15 | 
16 | __all__ = ['toutiao_download', ]
17 | 
18 | 
19 | def random_with_n_digits(n):
20 |     return random.randint(10 ** (n - 1), (10 ** n) - 1)
21 | 
22 | 
23 | def sign_video_url(vid):
24 |     r = str(random_with_n_digits(16))
25 | 
26 |     url = 'https://ib.365yg.com/video/urls/v/1/toutiao/mp4/{vid}'.format(vid=vid)
27 |     n = urlparse(url).path + '?r=' + r
28 |     b_n = bytes(n, encoding="utf-8")
29 |     s = binascii.crc32(b_n)
30 |     aid = 1364
31 |     ts = int(time.time() * 1000)
32 |     return url + '?r={r}&s={s}&aid={aid}&vfrom=xgplayer&callback=axiosJsonpCallback1&_={ts}'.format(r=r, s=s, aid=aid,
33 |                                                                                                     ts=ts)
34 | 
35 | 
36 | class ToutiaoVideoInfo(object):
37 | 
38 |     def __init__(self):
39 |         self.bitrate = None
40 |         self.definition = None
41 |         self.size = None
42 |         self.height = None
43 |         self.width = None
44 |         self.type = None
45 |         self.url = None
46 | 
47 |     def __str__(self):
48 |         return json.dumps(self.__dict__)
49 | 
50 | 
51 | def get_file_by_vid(video_id):
52 |     vRet = []
53 |     url = sign_video_url(video_id)
54 |     ret = get_content(url)
55 |     ret = loads(ret[20:-1])
56 |     vlist = ret.get('data').get('video_list')
57 |     if len(vlist) > 0:
58 |         vInfo = vlist.get(sorted(vlist.keys(), reverse=True)[0])
59 |         vUrl = vInfo.get('main_url')
60 |         vUrl = decodebytes(vUrl.encode('ascii')).decode('ascii')
61 |         videoInfo = ToutiaoVideoInfo()
62 |         videoInfo.bitrate = vInfo.get('bitrate')
63 |         videoInfo.definition = vInfo.get('definition')
64 |         videoInfo.size = vInfo.get('size')
65 |         videoInfo.height = vInfo.get('vheight')
66 |         videoInfo.width = vInfo.get('vwidth')
67 |         videoInfo.type = vInfo.get('vtype')
68 |         videoInfo.url = vUrl
69 |         vRet.append(videoInfo)
70 |     return vRet
71 | 
72 | 
73 | def toutiao_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
74 |     html = get_html(url, faker=True)
75 |     video_id = match1(html, r".*?videoId: '(?P<vid>.*)'")
76 |     title = match1(html, '.*?<title>(?P<title>.*?)</title>')
77 |     video_file_list = get_file_by_vid(video_id)  # 调api获取视频源文件
78 |     type, ext, size = url_info(video_file_list[0].url, faker=True)
79 |     print_info(site_info=site_info, title=title, type=type, size=size)
80 |     if not info_only:
81 |         download_urls([video_file_list[0].url], title, ext, size, output_dir, merge=merge, faker=True)
82 | 
83 | 
84 | site_info = "Toutiao.com"
85 | download = toutiao_download
86 | download_playlist = playlist_not_supported("toutiao")
87 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/tucao.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['tucao_download']
 4 | from ..common import *
 5 | # import re
 6 | import random
 7 | import time
 8 | from xml.dom import minidom
 9 | #possible raw list types
10 | #1. <li>type=tudou&vid=199687639</li>
11 | #2. <li>type=tudou&vid=199506910|</li>
12 | #3. <li>type=video&file=http://xiaoshen140731.qiniudn.com/lovestage04.flv|</li>
13 | #4 may ? <li>type=video&file=http://xiaoshen140731.qiniudn.com/lovestage04.flv|xx**type=&vid=?</li>
14 | #5. <li>type=tudou&vid=200003098|07**type=tudou&vid=200000350|08</li>
15 | #6. <li>vid=49454694&type=sina|</li>
16 | #7. <li>type=189&vid=513031813243909|</li>
17 | # re_pattern=re.compile(r"(type=(.+?)&(vid|file)=(.*?))[\|<]")
18 | 
19 | def tucao_single_download(type_link, title, output_dir=".", merge=True, info_only=False):
20 |     if "file" in type_link:
21 |         url=type_link[type_link.find("file=")+5:]
22 |         vtype, ext, size=url_info(url)
23 |         print_info(site_info, title, vtype, size)
24 |         if not info_only:
25 |             download_urls([url], title, ext, size, output_dir)
26 |     #fix for 189 video source, see raw list types 7
27 |     elif "189" in type_link:
28 |         vid = match1(type_link, r"vid=(\d+)")
29 |         assert vid, "vid not exsits"
30 |         url = "http://api.tucao.tv/api/down/{}".format(vid)
31 |         vtype, ext, size=url_info(url)
32 |         print_info(site_info, title, vtype, size)
33 |         if not info_only:
34 |             download_urls([url], title, ext, size, output_dir)
35 |     else:
36 |         u="http://www.tucao.tv/api/playurl.php?{}&key=tucao{:07x}.cc&r={}".format(type_link,random.getrandbits(28),int(time.time()*1000))
37 |         xml=minidom.parseString(get_content(u))
38 |         urls=[]
39 |         size=0
40 |         for i in xml.getElementsByTagName("url"):
41 |             urls.append(i.firstChild.nodeValue)
42 |             vtype, ext, _size=url_info(i.firstChild.nodeValue)
43 |             size+=_size
44 |         print_info(site_info, title, vtype, size)
45 |         if not info_only:
46 |             download_urls(urls, title, ext, size, output_dir)
47 | 
48 | def tucao_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
49 |     html=get_content(url)
50 |     title=match1(html,r'<h1 class="show_title">(.*?)<\w')
51 |     #fix for raw list that vid goes before type, see raw list types 6
52 |     raw_list=match1(html,r"<li>\s*(type=.+?|vid=.+?)</li>")
53 |     raw_l=raw_list.split("**")
54 |     if len(raw_l)==1:
55 |         format_link=raw_l[0][:-1] if raw_l[0].endswith("|") else raw_l[0]
56 |         tucao_single_download(format_link,title,output_dir,merge,info_only)
57 |     else:
58 |         for i in raw_l:
59 |             format_link,sub_title=i.split("|")
60 |             tucao_single_download(format_link,title+"-"+sub_title,output_dir,merge,info_only)
61 | 
62 | 
63 | site_info = "tucao.tv"
64 | download = tucao_download
65 | download_playlist = playlist_not_supported("tucao")
66 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/tudou.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', 'tudou_download_by_iid']
  4 | 
  5 | from ..common import *
  6 | from xml.dom.minidom import parseString
  7 | import you_get.extractors.acfun
  8 | 
  9 | def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
 10 |     data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
 11 |     temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x]))
 12 |     vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp])
 13 | 
 14 |     urls = []
 15 |     for vid in vids:
 16 |         for i in parseString(get_html('http://ct.v2.tudou.com/f?id=%s' % vid)).getElementsByTagName('f'):
 17 |             urls.append(i.firstChild.nodeValue.strip())
 18 | 
 19 |     ext = r1(r'http://[\w.]*/(\w+)/[\w.]*', urls[0])
 20 | 
 21 |     print_info(site_info, title, ext, size)
 22 |     if not info_only:
 23 |         download_urls(urls, title, ext, size, output_dir=output_dir, merge = merge)
 24 | 
 25 | def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False):
 26 |     html = get_html('http://www.tudou.com/programs/view/%s/' % id)
 27 | 
 28 |     iid = r1(r'iid\s*[:=]\s*(\S+)', html)
 29 |     try:
 30 |         title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
 31 |     except AttributeError:
 32 |         title = ''
 33 |     tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
 34 | 
 35 | def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 36 |     if 'acfun.tudou.com' in url:  #wrong way!
 37 |         url = url.replace('acfun.tudou.com', 'www.acfun.tv')
 38 |         you_get.extractors.acfun.acfun_download(url, output_dir,
 39 |                                                merge,
 40 |                                                info_only)
 41 |         return  #throw you back
 42 | 
 43 |     # Embedded player
 44 |     id = r1(r'http://www.tudou.com/v/([^/]+)/', url)
 45 |     if id:
 46 |         return tudou_download_by_id(id, title="", info_only=info_only)
 47 | 
 48 |     html = get_content(url)
 49 | 
 50 |     try:
 51 |         title = r1(r'\Wkw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
 52 |         assert title
 53 |         title = unescape_html(title)
 54 |     except AttributeError:
 55 |         title = match1(html, r'id=\"subtitle\"\s*title\s*=\s*\"([^\"]+)\"')
 56 |         if title is None:
 57 |             title = ''
 58 | 
 59 |     vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html)
 60 |     if vcode is None:
 61 |         vcode = match1(html, r'viden\s*[:=]\s*\"([\w+/=]+)\"')
 62 |     if vcode:
 63 |         from .youku import youku_download_by_vid
 64 |         return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only, src='tudou', **kwargs)
 65 | 
 66 |     iid = r1(r'iid\s*[:=]\s*(\d+)', html)
 67 |     if not iid:
 68 |         return tudou_download_playlist(url, output_dir, merge, info_only)
 69 | 
 70 |     tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
 71 | 
 72 | # obsolete?
 73 | def parse_playlist(url):
 74 |     aid = r1(r'http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
 75 |     html = get_decoded_html(url)
 76 |     if not aid:
 77 |         aid = r1(r"aid\s*[:=]\s*'(\d+)'", html)
 78 |     if re.match(r'http://www.tudou.com/albumcover/', url):
 79 |         atitle = r1(r"title\s*:\s*'([^']+)'", html)
 80 |     elif re.match(r'http://www.tudou.com/playlist/p/', url):
 81 |         atitle = r1(r'atitle\s*=\s*"([^"]+)"', html)
 82 |     else:
 83 |         raise NotImplementedError(url)
 84 |     assert aid
 85 |     assert atitle
 86 |     import json
 87 |     #url = 'http://www.tudou.com/playlist/service/getZyAlbumItems.html?aid='+aid
 88 |     url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
 89 |     return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]
 90 | 
 91 | def parse_plist(url):
 92 |     html = get_decoded_html(url)
 93 |     lcode = r1(r"lcode:\s*'([^']+)'", html)
 94 |     plist_info = json.loads(get_content('http://www.tudou.com/crp/plist.action?lcode=' + lcode))
 95 |     return ([(item['kw'], item['iid']) for item in plist_info['items']])
 96 | 
 97 | def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 98 |     videos = parse_plist(url)
 99 |     for i, (title, id) in enumerate(videos):
100 |         print('Processing %s of %s videos...' % (i + 1, len(videos)))
101 |         tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
102 | 
103 | site_info = "Tudou.com"
104 | download = tudou_download
105 | download_playlist = tudou_download_playlist
106 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/twitter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['twitter_download']
 4 | 
 5 | from ..common import *
 6 | from .universal import *
 7 | 
 8 | def extract_m3u(source):
 9 |     r1 = get_content(source)
10 |     s1 = re.findall(r'(/ext_tw_video/.*)', r1)
11 |     s1 += re.findall(r'(/amplify_video/.*)', r1)
12 |     r2 = get_content('https://video.twimg.com%s' % s1[-1])
13 |     s2 = re.findall(r'(/ext_tw_video/.*)', r2)
14 |     s2 += re.findall(r'(/amplify_video/.*)', r2)
15 |     return ['https://video.twimg.com%s' % i for i in s2]
16 | 
17 | def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
18 |     headers = {
19 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
20 |         'Accept-Encoding': 'gzip, deflate',
21 |         'Accept': '*/*'
22 |     }
23 | 
24 |     if re.match(r'https?://pbs\.twimg\.com', url):
25 |         universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
26 |         return
27 | 
28 |     if re.match(r'https?://mobile', url): # normalize mobile URL
29 |         url = 'https://' + match1(url, r'//mobile\.(.+)')
30 | 
31 |     if re.match(r'https?://twitter\.com/i/moments/', url): # FIXME: moments
32 |         html = get_html(url, faker=True)
33 |         paths = re.findall(r'data-permalink-path="([^"]+)"', html)
34 |         for path in paths:
35 |             twitter_download('https://twitter.com' + path,
36 |                              output_dir=output_dir,
37 |                              merge=merge,
38 |                              info_only=info_only,
39 |                              **kwargs)
40 |         return
41 | 
42 |     m = re.match(r'^https?://(mobile\.)?(x|twitter)\.com/([^/]+)/status/(\d+)', url)
43 |     assert m
44 |     screen_name, item_id = m.group(3), m.group(4)
45 |     page_title = "{} [{}]".format(screen_name, item_id)
46 | 
47 |     # FIXME: this API won't work for protected or nsfw contents
48 |     api_url = 'https://cdn.syndication.twimg.com/tweet-result?id=%s&token=!' % item_id
49 |     content = get_content(api_url)
50 |     info = json.loads(content)
51 | 
52 |     author = info['user']['name']
53 |     url = 'https://twitter.com/%s/status/%s' % (info['user']['screen_name'], item_id)
54 |     full_text = info['text']
55 | 
56 |     if 'photos' in info:
57 |         for photo in info['photos']:
58 |             photo_url = photo['url']
59 |             title = item_id + '_' + photo_url.split('.')[-2].split('/')[-1]
60 |             urls = [ photo_url + ':orig' ]
61 |             size = urls_size(urls, headers=headers)
62 |             ext = photo_url.split('.')[-1]
63 | 
64 |             print_info(site_info, title, ext, size)
65 |             if not info_only:
66 |                 download_urls(urls, title, ext, size, output_dir, merge=merge)
67 | 
68 |     if 'video' in info:
69 |         for mediaDetail in info['mediaDetails']:
70 |             if 'video_info' not in mediaDetail: continue
71 |             variants = mediaDetail['video_info']['variants']
72 |             variants = sorted(variants, key=lambda kv: kv.get('bitrate', 0))
73 |             title = item_id + '_' + variants[-1]['url'].split('/')[-1].split('?')[0].split('.')[0]
74 |             urls = [ variants[-1]['url'] ]
75 |             size = urls_size(urls, headers=headers)
76 |             mime, ext = variants[-1]['content_type'], 'mp4'
77 | 
78 |             print_info(site_info, title, ext, size)
79 |             if not info_only:
80 |                 download_urls(urls, title, ext, size, output_dir, merge=merge, headers=headers)
81 | 
82 |     # TODO: should we deal with quoted tweets?
83 | 
84 | 
85 | site_info = "X.com"
86 | download = twitter_download
87 | download_playlist = playlist_not_supported('twitter')
88 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/veoh.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['veoh_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
 8 |     '''Get item_id'''
 9 |     if re.match(r'http://www.veoh.com/watch/\w+', url):
10 |         item_id = match1(url, r'http://www.veoh.com/watch/(\w+)')
11 |     elif re.match(r'http://www.veoh.com/m/watch.php\?v=\.*', url):
12 |         item_id = match1(url, r'http://www.veoh.com/m/watch.php\?v=(\w+)')
13 |     else:
14 |         raise NotImplementedError('Cannot find item ID')
15 |     veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = info_only, **kwargs)
16 | 
17 | #----------------------------------------------------------------------
18 | def veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = False, **kwargs):
19 |     """Source: Android mobile"""
20 |     webpage_url = 'http://www.veoh.com/m/watch.php?v={item_id}&quality=1'.format(item_id = item_id)
21 | 
22 |     #grab download URL
23 |     a = get_content(webpage_url, decoded=True)
24 |     url = match1(a, r'<source src="(.*?)\"\W')
25 | 
26 |     #grab title
27 |     title = match1(a, r'<meta property="og:title" content="([^"]*)"')
28 | 
29 |     type_, ext, size = url_info(url)
30 |     print_info(site_info, title, type_, size)
31 |     if not info_only:
32 |         download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
33 | 
34 | 
35 | site_info = "Veoh"
36 | download = veoh_download
37 | download_playlist = playlist_not_supported('veoh')
38 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/vk.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['vk_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | 
 8 | def get_video_info(url):
 9 |     video_page = get_content(url)
10 |     title = r1(r'<div class="vv_summary">(.[^>]+?)</div', video_page)
11 |     sources = re.findall(r'<source src=\"(.[^>]+?)"', video_page)
12 | 
13 |     for quality in ['.1080.', '.720.', '.480.', '.360.', '.240.']:
14 |         for source in sources:
15 |             if source.find(quality) != -1:
16 |                 url = source
17 |                 break
18 |     assert url
19 |     type, ext, size = url_info(url)
20 |     print_info(site_info, title, type, size)
21 | 
22 |     return url, title, ext, size
23 | 
24 | 
25 | def get_video_from_user_videolist(url):
26 |     ep = 'https://vk.com/al_video.php'
27 |     to_post = dict(act='show', al=1, module='direct', video=re.search(r'video(\d+_\d+)', url).group(1))
28 |     page = post_content(ep, post_data=to_post)
29 |     video_pt = r'<source src="(.+?)" type="video\/mp4"'
30 |     url = re.search(video_pt, page).group(1)
31 |     title = re.search(r'<div class="mv_title".+?>(.+?)</div>', page).group(1)
32 |     mime, ext, size = url_info(url)
33 |     print_info(site_info, title, mime, size)
34 | 
35 |     return url, title, ext, size
36 | 
37 | 
38 | def get_image_info(url):
39 |     image_page = get_content(url)
40 |     # used for title - vk page owner
41 |     page_of = re.findall(r'Sender:</dt><dd><a href=.*>(.[^>]+?)</a', image_page)
42 |     # used for title - date when photo was uploaded
43 |     photo_date = re.findall(r'<span class="item_date">(.[^>]+?)</span', image_page)
44 | 
45 |     title = (' ').join(page_of + photo_date)
46 |     image_link = r1(r'href="([^"]+)" class=\"mva_item\" target="_blank">Download full size', image_page)
47 |     type, ext, size = url_info(image_link)
48 |     print_info(site_info, title, type, size)
49 | 
50 |     return image_link, title, ext, size
51 | 
52 | 
53 | def vk_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs):
54 |     link = None
55 |     if re.match(r'(.+)z\=video(.+)', url):
56 |         link, title, ext, size = get_video_info(url)
57 |     elif re.match(r'(.+)vk\.com\/photo(.+)', url):
58 |         link, title, ext, size = get_image_info(url)
59 |     elif re.search(r'vk\.com\/video\d+_\d+', url):
60 |         link, title, ext, size = get_video_from_user_videolist(url)
61 |     else:
62 |         raise NotImplementedError('Nothing to download here')
63 | 
64 |     if not info_only and link is not None:
65 |         download_urls([link], title, ext, size, output_dir, merge=merge)
66 | 
67 | 
68 | site_info = "VK.com"
69 | download = vk_download
70 | download_playlist = playlist_not_supported('vk')
71 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/w56.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['w56_download', 'w56_download_by_id']
 4 | 
 5 | from ..common import *
 6 | 
 7 | from .sohu import sohu_download
 8 | 
 9 | import json
10 | 
11 | def w56_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
12 |     content = json.loads(get_html('http://vxml.56.com/json/%s/?src=site' % id))
13 |     info = content['info']
14 |     title = title or info['Subject']
15 |     assert title
16 |     hd = info['hd']
17 |     assert hd in (0, 1, 2)
18 |     hd_types = [['normal', 'qvga'], ['clear', 'vga'], ['super', 'wvga']][hd]
19 |     files = [x for x in info['rfiles'] if x['type'] in hd_types]
20 |     assert len(files) == 1
21 |     size = int(files[0]['filesize'])
22 |     url = files[0]['url'] + '&prod=56'
23 |     ext = 'mp4'
24 | 
25 |     print_info(site_info, title, ext, size)
26 |     if not info_only:
27 |         download_urls([url], title, ext, size, output_dir = output_dir, merge = merge)
28 | 
29 | def w56_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
30 |     content = get_content(url)
31 |     sohu_url = r1(r"url:\s*'([^']*)'", content)
32 |     if sohu_url:
33 |         sohu_download(sohu_url, output_dir, merge=merge, info_only=info_only, **kwargs)
34 |         return
35 | 
36 |     id = r1(r'http://www.56.com/u\d+/v_(\w+).html', url) or \
37 |          r1(r'http://www.56.com/.*vid-(\w+).html', url)
38 |     w56_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
39 | 
40 | site_info = "56.com"
41 | download = w56_download
42 | download_playlist = playlist_not_supported('56')
43 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/ximalaya.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['ximalaya_download_playlist', 'ximalaya_download', 'ximalaya_download_by_id']
 4 | 
 5 | from ..common import *
 6 | 
 7 | import json
 8 | import re
 9 | 
10 | stream_types = [
11 |         {'itag': '1', 'container': 'm4a', 'bitrate': 'default'},
12 |         {'itag': '2', 'container': 'm4a', 'bitrate': '32'},
13 |         {'itag': '3', 'container': 'm4a', 'bitrate': '64'}
14 |         ]
15 | 
16 | def ximalaya_download_by_id(id, title = None, output_dir = '.', info_only = False, stream_id = None):
17 |     BASE_URL = 'http://www.ximalaya.com/tracks/'
18 |     json_url = BASE_URL + id + '.json'
19 |     json_data = json.loads(get_content(json_url, headers=fake_headers))
20 |     if 'res' in json_data:
21 |         if json_data['res'] == False:
22 |             raise ValueError('Server reported id %s is invalid' % id)
23 |     if 'is_paid' in json_data and json_data['is_paid']:
24 |         if 'is_free' in json_data and not json_data['is_free']:
25 |             raise ValueError('%s is paid item' % id)
26 |     if (not title) and 'title' in json_data:
27 |         title = json_data['title']
28 | #no size data in the json. should it be calculated?
29 |     size = 0
30 |     url = json_data['play_path_64']
31 |     if stream_id:
32 |         if stream_id == '1':
33 |             url = json_data['play_path_32']
34 |         elif stream_id == '0':
35 |             url = json_data['play_path']
36 |     logging.debug('ximalaya_download_by_id: %s' % url)
37 |     ext = 'm4a' 
38 |     urls = [url]
39 |     print('Site:        %s' % site_info)
40 |     print('title:       %s' % title)
41 |     if info_only:
42 |         if stream_id:
43 |             print_stream_info(stream_id)
44 |         else:
45 |             for item in range(0, len(stream_types)):
46 |                 print_stream_info(item)
47 |     if not info_only:
48 |         print('Type:        MPEG-4 audio m4a')
49 |         print('Size:        N/A')
50 |         download_urls(urls, title, ext, size, output_dir = output_dir, merge = False)
51 | 
52 | def ximalaya_download(url, output_dir = '.', info_only = False, stream_id = None, **kwargs):
53 |     if re.match(r'http://www\.ximalaya\.com/(\d+)/sound/(\d+)', url):
54 |         id = match1(url, r'http://www\.ximalaya\.com/\d+/sound/(\d+)')
55 |     else:
56 |         raise NotImplementedError(url)
57 |     ximalaya_download_by_id(id, output_dir = output_dir, info_only = info_only, stream_id = stream_id)
58 | 
59 | def ximalaya_download_page(playlist_url, output_dir = '.', info_only = False, stream_id = None, **kwargs):
60 |     if re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', playlist_url):
61 |         page_content = get_content(playlist_url)
62 |         pattern = re.compile(r'<li sound_id="(\d+)"')
63 |         ids = pattern.findall(page_content)
64 |         for id in ids:
65 |             try:
66 |                 ximalaya_download_by_id(id, output_dir=output_dir, info_only=info_only, stream_id=stream_id)
67 |             except(ValueError):
68 |                 print("something wrong with %s, perhaps paid item?" % id)
69 |     else:
70 |         raise NotImplementedError(playlist_url)
71 |     
72 | def ximalaya_download_playlist(url, output_dir='.', info_only=False, stream_id=None, **kwargs):
73 |     match_result = re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', url)
74 |     if not match_result:
75 |         raise NotImplementedError(url)
76 |     pages = []
77 |     page_content = get_content(url)
78 |     if page_content.find('<div class="pagingBar_wrapper"') == -1:
79 |         pages.append(url)
80 |     else:
81 |         base_url = 'http://www.ximalaya.com/' + match_result.group(1) + '/album/' + match_result.group(2)
82 |         html_str = '<a href=(\'|")\/' + match_result.group(1) + '\/album\/' + match_result.group(2) + '\?page='
83 |         count = len(re.findall(html_str, page_content))
84 |         for page_num in range(count):
85 |             pages.append(base_url + '?page=' +str(page_num+1))
86 |             print(pages[-1])
87 |     for page in pages:
88 |         ximalaya_download_page(page, output_dir=output_dir, info_only=info_only, stream_id=stream_id)
89 | def print_stream_info(stream_id):
90 |     print('    - itag:        %s' % stream_id)
91 |     print('      container:   %s' % 'm4a')
92 |     print('      bitrate:     %s' % stream_types[int(stream_id)]['bitrate'])
93 |     print('      size:        %s' % 'N/A')
94 |     print('    # download-with: you-get --itag=%s [URL]' % stream_id)
95 | 
96 | site_info = 'ximalaya.com'
97 | download = ximalaya_download
98 | download_playlist = ximalaya_download_playlist 
99 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/xinpianchang.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import re
 4 | import json
 5 | from ..extractor import VideoExtractor
 6 | from ..common import get_content, playlist_not_supported
 7 | 
 8 | 
 9 | class Xinpianchang(VideoExtractor):
10 |     name = 'xinpianchang'
11 |     stream_types = [
12 |         {'id': '4K', 'quality': '超清 4K', 'video_profile': 'mp4-4K'},
13 |         {'id': '2K', 'quality': '超清 2K', 'video_profile': 'mp4-2K'},
14 |         {'id': '1080', 'quality': '高清 1080P', 'video_profile': 'mp4-FHD'},
15 |         {'id': '720', 'quality': '高清 720P', 'video_profile': 'mp4-HD'},
16 |         {'id': '540', 'quality': '清晰 540P', 'video_profile': 'mp4-SD'},
17 |         {'id': '360', 'quality': '流畅 360P', 'video_profile': 'mp4-LD'}
18 |     ]
19 | 
20 |     def prepare(self, **kwargs):
21 |         # find key
22 |         page_content = get_content(self.url)
23 |         match_rule = r"vid = \"(.+?)\";"
24 |         key = re.findall(match_rule, page_content)[0]
25 | 
26 |         # get videos info
27 |         video_url = 'https://openapi-vtom.vmovier.com/v3/video/' + key + '?expand=resource'
28 |         data = json.loads(get_content(video_url))
29 |         self.title = data["data"]["video"]["title"]
30 |         video_info = data["data"]["resource"]["progressive"]
31 | 
32 |         # set streams dict
33 |         for video in video_info:
34 |             url = video["https_url"]
35 |             size = video["filesize"]
36 |             profile = video["profile_code"]
37 |             stype = [st for st in self.__class__.stream_types if st['video_profile'] == profile][0]
38 | 
39 |             stream_data = dict(src=[url], size=size, container='mp4', quality=stype['quality'])
40 |             self.streams[stype['id']] = stream_data
41 | 
42 | 
43 | download = Xinpianchang().download_by_url
44 | download_playlist = playlist_not_supported('xinpianchang')
45 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/yixia.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __all__ = ['yixia_download']
  4 | 
  5 | from ..common import *
  6 | from urllib.parse import urlparse
  7 | from json import loads
  8 | import re
  9 | 
 10 | #----------------------------------------------------------------------
 11 | def miaopai_download_by_smid(smid, output_dir = '.', merge = True, info_only = False):
 12 |     """"""
 13 |     api_endpoint = 'https://n.miaopai.com/api/aj_media/info.json?smid={smid}'.format(smid = smid)
 14 | 
 15 |     html = get_content(api_endpoint)
 16 | 
 17 |     api_content = loads(html)
 18 | 
 19 |     video_url = api_content['data']['meta_data'][0]['play_urls']['l']
 20 |     title = api_content['data']['description']
 21 | 
 22 |     type, ext, size = url_info(video_url)
 23 | 
 24 |     print_info(site_info, title, type, size)
 25 |     if not info_only:
 26 |         download_urls([video_url], title, ext, size, output_dir, merge=merge)
 27 | 
 28 | #----------------------------------------------------------------------
 29 | def yixia_miaopai_download_by_scid(scid, output_dir = '.', merge = True, info_only = False):
 30 |     """"""
 31 |     api_endpoint = 'http://api.miaopai.com/m/v2_channel.json?fillType=259&scid={scid}&vend=miaopai'.format(scid = scid)
 32 | 
 33 |     html = get_content(api_endpoint)
 34 | 
 35 |     api_content = loads(html)
 36 | 
 37 |     video_url = match1(api_content['result']['stream']['base'], r'(.+)\?vend')
 38 |     title = api_content['result']['ext']['t']
 39 | 
 40 |     type, ext, size = url_info(video_url)
 41 | 
 42 |     print_info(site_info, title, type, size)
 43 |     if not info_only:
 44 |         download_urls([video_url], title, ext, size, output_dir, merge=merge)
 45 | 
 46 | #----------------------------------------------------------------------
 47 | def yixia_xiaokaxiu_download_by_scid(scid, output_dir = '.', merge = True, info_only = False):
 48 |     """"""
 49 |     api_endpoint = 'http://api.xiaokaxiu.com/video/web/get_play_video?scid={scid}'.format(scid = scid)
 50 | 
 51 |     html = get_content(api_endpoint)
 52 | 
 53 |     api_content = loads(html)
 54 | 
 55 |     video_url = api_content['data']['linkurl']
 56 |     title = api_content['data']['title']
 57 | 
 58 |     type, ext, size = url_info(video_url)
 59 | 
 60 |     print_info(site_info, title, type, size)
 61 |     if not info_only:
 62 |         download_urls([video_url], title, ext, size, output_dir, merge=merge)
 63 | 
 64 | #----------------------------------------------------------------------
 65 | def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 66 |     """wrapper"""
 67 |     hostname = urlparse(url).hostname
 68 |     if 'n.miaopai.com' == hostname: 
 69 |         smid = match1(url, r'n\.miaopai\.com/media/([^.]+)') 
 70 |         miaopai_download_by_smid(smid, output_dir, merge, info_only)
 71 |         return
 72 |     elif 'miaopai.com' in hostname:  #Miaopai
 73 |         yixia_download_by_scid = yixia_miaopai_download_by_scid
 74 |         site_info = "Yixia Miaopai"
 75 | 
 76 |         scid = match1(url, r'miaopai\.com/show/channel/([^.]+)\.htm') or \
 77 |                match1(url, r'miaopai\.com/show/([^.]+)\.htm') or \
 78 |                match1(url, r'm\.miaopai\.com/show/channel/([^.]+)\.htm') or \
 79 |                match1(url, r'm\.miaopai\.com/show/channel/([^.]+)')
 80 | 
 81 |     elif 'xiaokaxiu.com' in hostname:  #Xiaokaxiu
 82 |         yixia_download_by_scid = yixia_xiaokaxiu_download_by_scid
 83 |         site_info = "Yixia Xiaokaxiu"
 84 | 
 85 |         if re.match(r'http://v.xiaokaxiu.com/v/.+\.html', url):  #PC
 86 |             scid = match1(url, r'http://v.xiaokaxiu.com/v/(.+)\.html')
 87 |         elif re.match(r'http://m.xiaokaxiu.com/m/.+\.html', url):  #Mobile
 88 |             scid = match1(url, r'http://m.xiaokaxiu.com/m/(.+)\.html')
 89 | 
 90 |     else:
 91 |         pass
 92 | 
 93 |     yixia_download_by_scid(scid, output_dir, merge, info_only)
 94 | 
 95 | site_info = "Yixia"
 96 | download = yixia_download
 97 | download_playlist = playlist_not_supported('yixia')
 98 | 
 99 | #Another way
100 | #----------------------------------------------------------------------
101 | #def yixia_miaopai_download_by_scid(scid, output_dir = '.', merge = True, info_only = False):
102 |     #""""""
103 |     #headers = {
104 |     #'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25',
105 |     #'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
106 |     #'Cache-Control': 'max-age=0',
107 |     #}
108 | 
109 |     #html = get_content('http://m.miaopai.com/show/channel/' + scid, headers)
110 | 
111 |     #title = match1(html, r'<title>(\w+)')
112 | 
113 |     #video_url = match1(html, r'<div class="vid_img" data-url=\'(.+)\'')
114 | 
115 |     #type, ext, size = url_info(video_url)
116 | 
117 |     #print_info(site_info, title, type, size)
118 |     #if not info_only:
119 |         #download_urls([video_url], title, ext, size, output_dir, merge=merge)
120 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/yizhibo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['yizhibo_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 9 |     video_id = url[url.rfind('/')+1:].split(".")[0]
10 |     json_request_url = 'http://www.yizhibo.com/live/h5api/get_basic_live_info?scid={}'.format(video_id)
11 |     content = get_content(json_request_url)
12 |     error = json.loads(content)['result']
13 |     if (error != 1):
14 |         raise ValueError("Error : {}".format(error))
15 | 
16 |     data = json.loads(content)
17 |     title = data.get('data')['live_title']
18 |     if (title == ''):
19 |         title = data.get('data')['nickname']
20 |     m3u8_url = data.get('data')['play_url']
21 |     m3u8 = get_content(m3u8_url)
22 |     base_url = "/".join(data.get('data')['play_url'].split("/")[:7])+"/"
23 |     part_url = re.findall(r'([0-9]+\.ts)', m3u8)
24 |     real_url = []
25 |     for i in part_url:
26 |         url = base_url + i
27 |         real_url.append(url)
28 |     print_info(site_info, title, 'ts', float('inf'))
29 |     if not info_only:
30 |         if player:
31 |             launch_player(player, [m3u8_url])
32 |         download_urls(real_url, title, 'ts', float('inf'), output_dir, merge = merge)
33 | 
34 | site_info = "yizhibo.com"
35 | download = yizhibo_download
36 | download_playlist = playlist_not_supported('yizhibo')
37 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/zhanqi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['zhanqi_download']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | import base64
 8 | from urllib.parse import urlparse
 9 | 
10 | def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
11 |     path = urlparse(url).path[1:]
12 | 
13 |     if not (path.startswith('videos') or path.startswith('v2/videos')): #url = "https://www.zhanqi.tv/huashan?param_s=1_0.2.0"
14 |         path_list = path.split('/')
15 |         room_id = path_list[1] if path_list[0] == 'topic' else path_list[0]
16 |         zhanqi_live(room_id, merge=merge, output_dir=output_dir, info_only=info_only, **kwargs)
17 |     else: #url = 'https://www.zhanqi.tv/videos/Lyingman/2017/01/182308.html'
18 |         # https://www.zhanqi.tv/v2/videos/215593.html
19 |         video_id = path.split('.')[0].split('/')[-1]
20 |         zhanqi_video(video_id, merge=merge, output_dir=output_dir, info_only=info_only, **kwargs)
21 | 
22 | def zhanqi_live(room_id, merge=True, output_dir='.', info_only=False, **kwargs):
23 |     api_url = "https://www.zhanqi.tv/api/static/v2.1/room/domain/{}.json".format(room_id)
24 |     json_data = json.loads(get_content(api_url))['data']
25 |     status = json_data['status']
26 |     if status != '4':
27 |         raise Exception("The live stream is not online!")
28 | 
29 |     nickname = json_data['nickname']
30 |     title = nickname + ": " + json_data['title']
31 |     video_levels = base64.b64decode(json_data['flashvars']['VideoLevels']).decode('utf8')
32 |     m3u8_url = json.loads(video_levels)['streamUrl']
33 | 
34 |     print_info(site_info, title, 'm3u8', 0, m3u8_url=m3u8_url, m3u8_type='master')
35 |     if not info_only:
36 |         download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge)
37 | 
38 | def zhanqi_video(video_id, output_dir='.', info_only=False, merge=True, **kwargs):
39 |     api_url = 'https://www.zhanqi.tv/api/static/v2.1/video/{}.json'.format(video_id)
40 |     json_data = json.loads(get_content(api_url))['data']
41 | 
42 |     title = json_data['title']
43 |     vid = json_data['flashvars']['VideoID']
44 |     m3u8_url = 'http://dlvod.cdn.zhanqi.tv/' + vid
45 |     urls = general_m3u8_extractor(m3u8_url)
46 |     print_info(site_info, title, 'm3u8', 0)
47 |     if not info_only:
48 |         download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs)
49 | 
50 | site_info = "www.zhanqi.tv"
51 | download = zhanqi_download
52 | download_playlist = playlist_not_supported('zhanqi')
53 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/zhibo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['zhibo_download']
 4 | 
 5 | from ..common import *
 6 | 
 7 | def zhibo_vedio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
 8 |     # http://video.zhibo.tv/video/details/d103057f-663e-11e8-9d83-525400ccac43.html
 9 | 
10 |     html = get_html(url)
11 |     title = r1(r'<title>([\s\S]*)</title>', html)
12 |     total_size = 0
13 |     part_urls= []
14 | 
15 |     video_html = r1(r'<script type="text/javascript">([\s\S]*)</script></head>', html)
16 | 
17 |     # video_guessulike = r1(r"window.xgData =([s\S'\s\.]*)\'\;[\s\S]*window.vouchData", video_html)
18 |     video_url = r1(r"window.vurl = \'([s\S'\s\.]*)\'\;[\s\S]*window.imgurl", video_html)
19 |     part_urls.append(video_url)
20 |     ext = video_url.split('.')[-1]
21 | 
22 |     print_info(site_info, title, ext, total_size)
23 |     if not info_only:
24 |         download_urls(part_urls, title, ext, total_size, output_dir=output_dir, merge=merge)
25 | 
26 | 
27 | def zhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
28 |     if 'video.zhibo.tv' in url:
29 |         zhibo_vedio_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
30 |         return
31 | 
32 |     # if 'v.zhibo.tv' in url:
33 |     # http://v.zhibo.tv/31609372
34 |     html = get_html(url)
35 |     title = r1(r'<title>([\s\S]*)</title>', html)
36 |     is_live = r1(r"window.videoIsLive=\'([s\S'\s\.]*)\'\;[\s\S]*window.resDomain", html)
37 |     if is_live != "1":
38 |         raise ValueError("The live stream is not online! (Errno:%s)" % is_live)
39 | 
40 |     match = re.search(r"""
41 |     ourStreamName .*?
42 |     '(.*?)' .*?
43 |     rtmpHighSource .*?
44 |     '(.*?)' .*?
45 |     '(.*?)'
46 |     """, html, re.S | re.X)
47 |     real_url = match.group(3) + match.group(1) + match.group(2)
48 | 
49 |     print_info(site_info, title, 'flv', float('inf'))
50 |     if not info_only:
51 |         download_url_ffmpeg(real_url, title, 'flv', params={}, output_dir=output_dir, merge=merge)
52 | 
53 | site_info = "zhibo.tv"
54 | download = zhibo_download
55 | download_playlist = playlist_not_supported('zhibo')
56 | 


--------------------------------------------------------------------------------
/src/you_get/extractors/zhihu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __all__ = ['zhihu_download', 'zhihu_download_playlist']
 4 | 
 5 | from ..common import *
 6 | import json
 7 | 
 8 | 
 9 | def zhihu_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
10 |     paths = url.split("/")
11 |     # question or column
12 |     if len(paths) < 3 and len(paths) < 6:
13 |         raise TypeError("URL does not conform to specifications, Support column and question only."
14 |                         "Example URL: https://zhuanlan.zhihu.com/p/51669862 or "
15 |                         "https://www.zhihu.com/question/267782048/answer/490720324")
16 | 
17 |     if ("question" not in paths or "answer" not in paths) and "zhuanlan.zhihu.com" not in paths:
18 |         raise TypeError("URL does not conform to specifications, Support column and question only."
19 |                         "Example URL: https://zhuanlan.zhihu.com/p/51669862 or "
20 |                         "https://www.zhihu.com/question/267782048/answer/490720324")
21 | 
22 |     html = get_html(url, faker=True)
23 |     title = match1(html, r'data-react-helmet="true">(.*?)</title>')
24 |     for index, video_id in enumerate(matchall(html, [r'<a class="video-box" href="\S+video/(\d+)"'])):
25 |         try:
26 |             video_info = json.loads(
27 |                 get_content(r"https://lens.zhihu.com/api/videos/{}".format(video_id), headers=fake_headers))
28 |         except json.decoder.JSONDecodeError:
29 |             log.w("Video id not found:{}".format(video_id))
30 |             continue
31 | 
32 |         play_list = video_info["playlist"]
33 |         # first High Definition
34 |         # second Standard Definition
35 |         # third Low Definition
36 |         # finally continue
37 |         data = play_list.get("hd", play_list.get("sd", play_list.get("ld", None)))
38 |         if not data:
39 |             log.w("Video id No play address:{}".format(video_id))
40 |             continue
41 |         print_info(site_info, title, data["format"], data["size"])
42 |         if not info_only:
43 |             ext = "_{}.{}".format(index, data["format"])
44 |             if kwargs.get("zhihu_offset"):
45 |                 ext = "_{}".format(kwargs["zhihu_offset"]) + ext
46 |             download_urls([data["play_url"]], title, ext, data["size"],
47 |                           output_dir=output_dir, merge=merge, **kwargs)
48 | 
49 | 
50 | def zhihu_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs):
51 |     if "question" not in url or "answer" in url:  # question page
52 |         raise TypeError("URL does not conform to specifications, Support question only."
53 |                         " Example URL: https://www.zhihu.com/question/267782048")
54 |     url = url.split("?")[0]
55 |     if url[-1] == "/":
56 |         question_id = url.split("/")[-2]
57 |     else:
58 |         question_id = url.split("/")[-1]
59 |     videos_url = r"https://www.zhihu.com/api/v4/questions/{}/answers".format(question_id)
60 |     try:
61 |         questions = json.loads(get_content(videos_url))
62 |     except json.decoder.JSONDecodeError:
63 |         raise TypeError("Check whether the problem URL exists.Example URL: https://www.zhihu.com/question/267782048")
64 | 
65 |     count = 0
66 |     while 1:
67 |         for data in questions["data"]:
68 |             kwargs["zhihu_offset"] = count
69 |             zhihu_download("https://www.zhihu.com/question/{}/answer/{}".format(question_id, data["id"]),
70 |                            output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
71 |             count += 1
72 |         if questions["paging"]["is_end"]:
73 |             return
74 |         questions = json.loads(get_content(questions["paging"]["next"], headers=fake_headers))
75 | 
76 | 
77 | site_info = "zhihu.com"
78 | download = zhihu_download
79 | download_playlist = zhihu_download_playlist
80 | 


--------------------------------------------------------------------------------
/src/you_get/json_output.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import json
 3 | 
 4 | # save info from common.print_info()
 5 | last_info = None
 6 | 
 7 | def output(video_extractor, pretty_print=True):
 8 |     ve = video_extractor
 9 |     out = {}
10 |     out['url'] = ve.url
11 |     out['title'] = ve.title
12 |     out['site'] = ve.name
13 |     out['streams'] = ve.streams
14 |     try:
15 |         if ve.dash_streams:
16 |             out['streams'].update(ve.dash_streams)
17 |     except AttributeError:
18 |         pass
19 |     try:
20 |         if ve.audiolang:
21 |             out['audiolang'] = ve.audiolang
22 |     except AttributeError:
23 |         pass
24 |     extra = {}
25 |     if getattr(ve, 'referer', None) is not None:
26 |         extra["referer"] = ve.referer
27 |     if getattr(ve, 'ua', None) is not None:
28 |         extra["ua"] = ve.ua
29 |     if extra:
30 |         out["extra"] = extra
31 |     if pretty_print:
32 |         print(json.dumps(out, indent=4, ensure_ascii=False))
33 |     else:
34 |         print(json.dumps(out))
35 | 
36 | # a fake VideoExtractor object to save info
37 | class VideoExtractor(object):
38 |     pass
39 | 
40 | def print_info(site_info=None, title=None, type=None, size=None):
41 |     global last_info
42 |     # create a VideoExtractor and save info for download_urls()
43 |     ve = VideoExtractor()
44 |     last_info = ve
45 |     ve.name = site_info
46 |     ve.title = title
47 |     ve.url = None
48 | 
49 | def download_urls(urls=None, title=None, ext=None, total_size=None, refer=None):
50 |     ve = last_info
51 |     if not ve:
52 |         ve = VideoExtractor()
53 |         ve.name = ''
54 |         ve.url = urls
55 |         ve.title=title
56 |     # save download info in streams
57 |     stream = {}
58 |     stream['container'] = ext
59 |     stream['size'] = total_size
60 |     stream['src'] = urls
61 |     if refer:
62 |         stream['refer'] = refer
63 |     stream['video_profile'] = '__default__'
64 |     ve.streams = {}
65 |     ve.streams['__default__'] = stream
66 |     output(ve)
67 | 


--------------------------------------------------------------------------------
/src/you_get/processor/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | from .join_flv import concat_flv
4 | from .join_mp4 import concat_mp4
5 | from .ffmpeg import *
6 | from .rtmpdump import *
7 | 


--------------------------------------------------------------------------------
/src/you_get/processor/join_ts.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import struct
 4 | from io import BytesIO
 5 | 
 6 | ##################################################
 7 | # main
 8 | ##################################################
 9 | 
10 | def guess_output(inputs):
11 |     import os.path
12 |     inputs = map(os.path.basename, inputs)
13 |     n = min(map(len, inputs))
14 |     for i in reversed(range(1, n)):
15 |         if len(set(s[:i] for s in inputs)) == 1:
16 |             return inputs[0][:i] + '.ts'
17 |     return 'output.ts'
18 | 
19 | def concat_ts(ts_parts, output = None):
20 |     assert ts_parts, 'no ts files found'
21 |     import os.path
22 |     if not output:
23 |         output = guess_output(ts_parts)
24 |     elif os.path.isdir(output):
25 |         output = os.path.join(output, guess_output(ts_parts))
26 |     
27 |     print('Merging video parts...')
28 |     
29 |     ts_out_file = open(output, "wb")
30 |     for ts_in in ts_parts:
31 |         ts_in_file = open(ts_in, "rb")
32 |         ts_in_data = ts_in_file.read()
33 |         ts_in_file.close()
34 |         ts_out_file.write(ts_in_data)
35 |     ts_out_file.close()
36 |     return output
37 | 
38 | def usage():
39 |     print('Usage: [python3] join_ts.py --output TARGET.ts ts...')
40 | 
41 | def main():
42 |     import sys, getopt
43 |     try:
44 |         opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="])
45 |     except getopt.GetoptError as err:
46 |         usage()
47 |         sys.exit(1)
48 |     output = None
49 |     for o, a in opts:
50 |         if o in ("-h", "--help"):
51 |             usage()
52 |             sys.exit()
53 |         elif o in ("-o", "--output"):
54 |             output = a
55 |         else:
56 |             usage()
57 |             sys.exit(1)
58 |     if not args:
59 |         usage()
60 |         sys.exit(1)
61 |     
62 |     concat_ts(args, output)
63 | 
64 | if __name__ == '__main__':
65 |     main()
66 | 


--------------------------------------------------------------------------------
/src/you_get/processor/rtmpdump.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os.path
 4 | import subprocess
 5 | 
 6 | def get_usable_rtmpdump(cmd):
 7 |     try:
 8 |         p = subprocess.Popen([cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 9 |         out, err = p.communicate()
10 |         return cmd
11 |     except:
12 |         return None
13 | 
14 | RTMPDUMP = get_usable_rtmpdump('rtmpdump')
15 | 
16 | def has_rtmpdump_installed():
17 |     return RTMPDUMP is not None
18 | 
19 | #
20 | #params ={"-y":"playlist","-q":None,}
21 | #if Only Key ,Value should be None
22 | #-r -o should not be included in params
23 | 
24 | def download_rtmpdump_stream(url, title, ext,params={},output_dir='.'):
25 |     filename = '%s.%s' % (title, ext)
26 |     filepath = os.path.join(output_dir, filename)
27 | 
28 |     cmdline = [RTMPDUMP, '-r']
29 |     cmdline.append(url)
30 |     cmdline.append('-o')
31 |     cmdline.append(filepath)
32 | 
33 |     for key in params.keys():
34 |         cmdline.append(key)
35 |         if params[key]!=None:
36 |             cmdline.append(params[key])
37 | 
38 |     # cmdline.append('-y')
39 |     # cmdline.append(playpath)
40 |     print("Call rtmpdump:\n"+" ".join(cmdline)+"\n")
41 |     subprocess.call(cmdline)
42 |     return
43 | 
44 | #
45 | def play_rtmpdump_stream(player, url, params={}):
46 |     
47 |     #construct left side of pipe
48 |     cmdline = [RTMPDUMP, '-r']
49 |     cmdline.append(url)
50 |     
51 |     #append other params if exist
52 |     for key in params.keys():
53 |         cmdline.append(key)
54 |         if params[key]!=None:
55 |             cmdline.append(params[key])
56 | 
57 |     cmdline.append('-o')
58 |     cmdline.append('-')
59 | 
60 |     #pipe start
61 |     cmdline.append('|')
62 |     cmdline.append(player)
63 |     cmdline.append('-')
64 | 
65 |     #logging
66 |     print("Call rtmpdump:\n"+" ".join(cmdline)+"\n")
67 | 
68 |     #call RTMPDump!
69 |     subprocess.call(cmdline)
70 |     
71 |     # os.system("rtmpdump -r '%s' -y '%s' -o - | %s -" % (url, playpath, player))
72 |     return
73 | 


--------------------------------------------------------------------------------
/src/you_get/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/src/you_get/util/__init__.py


--------------------------------------------------------------------------------
/src/you_get/util/fs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from .os import detect_os
 4 | 
 5 | def legitimize(text, os=detect_os()):
 6 |     """Converts a string to a valid filename.
 7 |     """
 8 | 
 9 |     # POSIX systems
10 |     text = text.translate({
11 |         0: None,
12 |         ord('/'): '-',
13 |         ord('|'): '-',
14 |     })
15 | 
16 |     # FIXME: do some filesystem detection
17 |     if os == 'windows' or os == 'cygwin' or os == 'wsl':
18 |         # Windows (non-POSIX namespace)
19 |         text = text.translate({
20 |             # Reserved in Windows VFAT and NTFS
21 |             ord(':'): '-',
22 |             ord('*'): '-',
23 |             ord('?'): '-',
24 |             ord('\\'): '-',
25 |             ord('\"'): '\'',
26 |             # Reserved in Windows VFAT
27 |             ord('+'): '-',
28 |             ord('<'): '-',
29 |             ord('>'): '-',
30 |             ord('['): '(',
31 |             ord(']'): ')',
32 |             ord('\t'): ' ',
33 |         })
34 |     else:
35 |         # *nix
36 |         if os == 'mac':
37 |             # Mac OS HFS+
38 |             text = text.translate({
39 |                 ord(':'): '-',
40 |             })
41 | 
42 |         # Remove leading .
43 |         if text.startswith("."):
44 |             text = text[1:]
45 | 
46 |     text = text[:80] # Trim to 82 Unicode characters long
47 |     return text
48 | 


--------------------------------------------------------------------------------
/src/you_get/util/git.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import subprocess
 5 | from ..version import __version__
 6 | 
 7 | def get_head(repo_path):
 8 |     """Get (branch, commit) from HEAD of a git repo."""
 9 |     try:
10 |         ref = open(os.path.join(repo_path, '.git', 'HEAD'), 'r').read().strip()[5:].split('/')
11 |         branch = ref[-1]
12 |         commit = open(os.path.join(repo_path, '.git', *ref), 'r').read().strip()[:7]
13 |         return branch, commit
14 |     except:
15 |         return None
16 | 
17 | def get_version(repo_path):
18 |     try:
19 |         version = __version__.split('.')
20 |         major, minor, cn = [int(i) for i in version]
21 |         p = subprocess.Popen(['git',
22 |                               '--git-dir', os.path.join(repo_path, '.git'),
23 |                               '--work-tree', repo_path,
24 |                               'rev-list', 'HEAD', '--count'],
25 |                              stdout=subprocess.PIPE, stderr=subprocess.PIPE)
26 |         raw, err = p.communicate()
27 |         c_head = int(raw.decode('ascii'))
28 |         q = subprocess.Popen(['git',
29 |                               '--git-dir', os.path.join(repo_path, '.git'),
30 |                               '--work-tree', repo_path,
31 |                               'rev-list', 'master', '--count'],
32 |                              stdout=subprocess.PIPE, stderr=subprocess.PIPE)
33 |         raw, err = q.communicate()
34 |         c_master = int(raw.decode('ascii'))
35 |         cc = c_head - c_master
36 |         assert cc
37 |         return '%s.%s.%s' % (major, minor, cn + cc)
38 |     except:
39 |         return __version__
40 | 


--------------------------------------------------------------------------------
/src/you_get/util/log.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # This file is Python 2 compliant.
  3 | 
  4 | from ..version import script_name
  5 | 
  6 | import os, sys
  7 | 
  8 | TERM = os.getenv('TERM', '')
  9 | IS_ANSI_TERMINAL = TERM in (
 10 |     'eterm-color',
 11 |     'linux',
 12 |     'screen',
 13 |     'vt100',
 14 | ) or TERM.startswith('xterm')
 15 | 
 16 | # ANSI escape code
 17 | # See <http://en.wikipedia.org/wiki/ANSI_escape_code>
 18 | RESET = 0
 19 | BOLD = 1
 20 | UNDERLINE = 4
 21 | NEGATIVE = 7
 22 | NO_BOLD = 21
 23 | NO_UNDERLINE = 24
 24 | POSITIVE = 27
 25 | BLACK = 30
 26 | RED = 31
 27 | GREEN = 32
 28 | YELLOW = 33
 29 | BLUE = 34
 30 | MAGENTA = 35
 31 | CYAN = 36
 32 | LIGHT_GRAY = 37
 33 | DEFAULT = 39
 34 | BLACK_BACKGROUND = 40
 35 | RED_BACKGROUND = 41
 36 | GREEN_BACKGROUND = 42
 37 | YELLOW_BACKGROUND = 43
 38 | BLUE_BACKGROUND = 44
 39 | MAGENTA_BACKGROUND = 45
 40 | CYAN_BACKGROUND = 46
 41 | LIGHT_GRAY_BACKGROUND = 47
 42 | DEFAULT_BACKGROUND = 49
 43 | DARK_GRAY = 90                 # xterm
 44 | LIGHT_RED = 91                 # xterm
 45 | LIGHT_GREEN = 92               # xterm
 46 | LIGHT_YELLOW = 93              # xterm
 47 | LIGHT_BLUE = 94                # xterm
 48 | LIGHT_MAGENTA = 95             # xterm
 49 | LIGHT_CYAN = 96                # xterm
 50 | WHITE = 97                     # xterm
 51 | DARK_GRAY_BACKGROUND = 100     # xterm
 52 | LIGHT_RED_BACKGROUND = 101     # xterm
 53 | LIGHT_GREEN_BACKGROUND = 102   # xterm
 54 | LIGHT_YELLOW_BACKGROUND = 103  # xterm
 55 | LIGHT_BLUE_BACKGROUND = 104    # xterm
 56 | LIGHT_MAGENTA_BACKGROUND = 105 # xterm
 57 | LIGHT_CYAN_BACKGROUND = 106    # xterm
 58 | WHITE_BACKGROUND = 107         # xterm
 59 | 
 60 | def sprint(text, *colors):
 61 |     """Format text with color or other effects into ANSI escaped string."""
 62 |     return "\33[{}m{content}\33[{}m".format(";".join([str(color) for color in colors]), RESET, content=text) if IS_ANSI_TERMINAL and colors else text
 63 | 
 64 | def println(text, *colors):
 65 |     """Print text to standard output."""
 66 |     sys.stdout.write(sprint(text, *colors) + "\n")
 67 | 
 68 | def print_err(text, *colors):
 69 |     """Print text to standard error."""
 70 |     sys.stderr.write(sprint(text, *colors) + "\n")
 71 | 
 72 | def print_log(text, *colors):
 73 |     """Print a log message to standard error."""
 74 |     sys.stderr.write(sprint("{}: {}".format(script_name, text), *colors) + "\n")
 75 | 
 76 | def i(message):
 77 |     """Print a normal log message."""
 78 |     print_log(message)
 79 | 
 80 | def d(message):
 81 |     """Print a debug log message."""
 82 |     print_log(message, BLUE)
 83 | 
 84 | def w(message):
 85 |     """Print a warning log message."""
 86 |     print_log(message, YELLOW)
 87 | 
 88 | def e(message, exit_code=None):
 89 |     """Print an error log message."""
 90 |     print_log(message, YELLOW, BOLD)
 91 |     if exit_code is not None:
 92 |         sys.exit(exit_code)
 93 | 
 94 | def wtf(message, exit_code=1):
 95 |     """What a Terrible Failure!"""
 96 |     print_log(message, RED, BOLD)
 97 |     if exit_code is not None:
 98 |         sys.exit(exit_code)
 99 | 
100 | def yes_or_no(message):
101 |     ans = str(input('%s (y/N) ' % message)).lower().strip()
102 |     return ans == 'y'
103 | 


--------------------------------------------------------------------------------
/src/you_get/util/os.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from platform import system
 4 | 
 5 | def detect_os():
 6 |     """Detect operating system.
 7 |     """
 8 | 
 9 |     # Inspired by:
10 |     # https://github.com/scivision/pybashutils/blob/78b7f2b339cb03b1c37df94015098bbe462f8526/pybashutils/windows_linux_detect.py
11 | 
12 |     syst = system().lower()
13 |     os = 'unknown'
14 | 
15 |     if 'cygwin' in syst:
16 |         os = 'cygwin'
17 |     elif 'darwin' in syst:
18 |         os = 'mac'
19 |     elif 'linux' in syst:
20 |         os = 'linux'
21 |         # detect WSL https://github.com/Microsoft/BashOnWindows/issues/423
22 |         try:
23 |             with open('/proc/version', 'r') as f:
24 |                 if 'microsoft' in f.read().lower():
25 |                     os = 'wsl'
26 |         except: pass
27 |     elif 'windows' in syst:
28 |         os = 'windows'
29 |     elif 'bsd' in syst:
30 |         os = 'bsd'
31 | 
32 |     return os
33 | 


--------------------------------------------------------------------------------
/src/you_get/util/strings.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     # py 3.4
 3 |     from html import unescape as unescape_html
 4 | except ImportError:
 5 |     import re
 6 |     from html.entities import entitydefs
 7 | 
 8 |     def unescape_html(string):
 9 |         '''HTML entity decode'''
10 |         string = re.sub(r'&#[^;]+;', _sharp2uni, string)
11 |         string = re.sub(r'&[^;]+;', lambda m: entitydefs[m.group(0)[1:-1]], string)
12 |         return string
13 | 
14 |     def _sharp2uni(m):
15 |         '''&#...; ==> unicode'''
16 |         s = m.group(0)[2:].rstrip(';；')
17 |         if s.startswith('x'):
18 |             return chr(int('0'+s, 16))
19 |         else:
20 |             return chr(int(s))
21 | 
22 | from .fs import legitimize
23 | 
24 | def get_filename(htmlstring):
25 |     return legitimize(unescape_html(htmlstring))
26 | 
27 | def parameterize(string):
28 |     return "'%s'" % string.replace("'", r"'\''")
29 | 


--------------------------------------------------------------------------------
/src/you_get/util/term.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | def get_terminal_size():
 4 |     """Get (width, height) of the current terminal."""
 5 |     try:
 6 |         import fcntl, termios, struct # fcntl module only available on Unix
 7 |         return struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234'))
 8 |     except:
 9 |         return (40, 80)
10 | 


--------------------------------------------------------------------------------
/src/you_get/version.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | script_name = 'you-get'
4 | __version__ = '0.4.1743'
5 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soimort/you-get/049548f3f3f35e67ba8d3181c71fdc71d11cf260/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import unittest
 4 | 
 5 | from you_get.extractors import (
 6 |     imgur,
 7 |     magisto,
 8 |     youtube,
 9 |     missevan,
10 |     acfun,
11 |     bilibili,
12 |     soundcloud,
13 |     tiktok,
14 |     twitter,
15 |     miaopai
16 | )
17 | 
18 | 
19 | class YouGetTests(unittest.TestCase):
20 |     def test_imgur(self):
21 |         imgur.download('http://imgur.com/WVLk5nD', info_only=True)
22 |         imgur.download('https://imgur.com/we-should-have-listened-WVLk5nD', info_only=True)
23 | 
24 |     def test_magisto(self):
25 |         magisto.download(
26 |             'http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA',
27 |             info_only=True
28 |         )
29 | 
30 |     #def test_youtube(self):
31 |         #youtube.download(
32 |         #    'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True
33 |         #)
34 |         #youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True)
35 |         #youtube.download(
36 |         #    'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare',  # noqa
37 |         #    info_only=True
38 |         #)
39 |         #youtube.download(
40 |         #    'https://www.youtube.com/watch?v=oRdxUFDoQe0', info_only=True
41 |         #)
42 | 
43 |     def test_acfun(self):
44 |         acfun.download('https://www.acfun.cn/v/ac44560432', info_only=True)
45 | 
46 |     #def test_bilibili(self):
47 |         #bilibili.download('https://www.bilibili.com/video/BV1sL4y177sC', info_only=True)
48 | 
49 |     #def test_soundcloud(self):
50 |         ## single song
51 |         #soundcloud.download(
52 |         #    'https://soundcloud.com/keiny-pham/impure-bird', info_only=True
53 |         #)
54 |         ## playlist
55 |         #soundcloud.download(
56 |         #    'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True
57 |         #)
58 | 
59 |     def test_tiktok(self):
60 |         tiktok.download('https://www.tiktok.com/@zukky_48/video/7398162058153315605', info_only=True)
61 |         tiktok.download('https://www.tiktok.com/@/video/7398162058153315605', info_only=True)
62 |         tiktok.download('https://t.tiktok.com/i18n/share/video/7398162058153315605/', info_only=True)
63 |         tiktok.download('https://vt.tiktok.com/ZSYKjKt6M/', info_only=True)
64 | 
65 |     def test_twitter(self):
66 |         twitter.download('https://twitter.com/elonmusk/status/1530516552084234244', info_only=True)
67 |         twitter.download('https://x.com/elonmusk/status/1530516552084234244', info_only=True)
68 | 
69 |     def test_weibo(self):
70 |         miaopai.download('https://video.weibo.com/show?fid=1034:4825403706245135', info_only=True)
71 | 
72 | if __name__ == '__main__':
73 |     unittest.main()
74 | 


--------------------------------------------------------------------------------
/tests/test_common.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import unittest
 4 | 
 5 | from you_get.common import *
 6 | 
 7 | class TestCommon(unittest.TestCase):
 8 |     
 9 |     def test_match1(self):
10 |         self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)'), '1234567890A')
11 |         self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)', r'youtu.(\w+)'), ['1234567890A', 'be'])
12 | 


--------------------------------------------------------------------------------
/tests/test_util.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import unittest
 4 | 
 5 | from you_get.util.fs import *
 6 | 
 7 | class TestUtil(unittest.TestCase):
 8 |     def test_legitimize(self):
 9 |         self.assertEqual(legitimize("1*2", os="linux"), "1*2")
10 |         self.assertEqual(legitimize("1*2", os="mac"), "1*2")
11 |         self.assertEqual(legitimize("1*2", os="windows"), "1-2")
12 |         self.assertEqual(legitimize("1*2", os="wsl"), "1-2")
13 | 


--------------------------------------------------------------------------------
/you-get:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os, sys
 3 | 
 4 | _srcdir = '%s/src/' % os.path.dirname(os.path.realpath(__file__))
 5 | _filepath = os.path.dirname(sys.argv[0])
 6 | sys.path.insert(1, os.path.join(_filepath, _srcdir))
 7 | 
 8 | if sys.version_info[0] == 3:
 9 |     import you_get
10 |     if __name__ == '__main__':
11 |         you_get.main(repo_path=_filepath)
12 | else: # Python 2
13 |     from you_get.util import log
14 |     log.e("[fatal] Python 3 is required!")
15 |     log.wtf("try to run this script using 'python3 you-get'.")
16 | 


--------------------------------------------------------------------------------
/you-get.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "you-get",
 3 |   "author": "Mort Yao",
 4 |   "author_email": "mort.yao@gmail.com",
 5 |   "url": "https://you-get.org/",
 6 |   "license": "MIT",
 7 | 
 8 |   "description": "Dumb downloader that scrapes the web",
 9 |   "keywords": "video download youtube youku niconico",
10 | 
11 |   "classifiers": [
12 |     "Development Status :: 4 - Beta",
13 |     "Environment :: Console",
14 |     "Intended Audience :: Developers",
15 |     "Intended Audience :: End Users/Desktop",
16 |     "License :: OSI Approved :: MIT License",
17 |     "Operating System :: OS Independent",
18 |     "Programming Language :: Python",
19 |     "Programming Language :: Python :: 3",
20 |     "Programming Language :: Python :: 3 :: Only",
21 |     "Programming Language :: Python :: 3.7",
22 |     "Programming Language :: Python :: 3.8",
23 |     "Programming Language :: Python :: 3.9",
24 |     "Programming Language :: Python :: 3.10",
25 |     "Programming Language :: Python :: 3.11",
26 |     "Programming Language :: Python :: 3.12",
27 |     "Topic :: Internet",
28 |     "Topic :: Internet :: WWW/HTTP",
29 |     "Topic :: Multimedia",
30 |     "Topic :: Multimedia :: Graphics",
31 |     "Topic :: Multimedia :: Sound/Audio",
32 |     "Topic :: Multimedia :: Video",
33 |     "Topic :: Utilities"
34 |   ],
35 | 
36 |   "console_scripts": [
37 |     "you-get = you_get.__main__:main"
38 |   ]
39 | }
40 | 


--------------------------------------------------------------------------------
/you-get.plugin.zsh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env zsh
2 | alias you-get="noglob python3 $(dirname $0)/you-get"
3 | alias you-vlc="noglob python3 $(dirname $0)/you-get --player vlc"
4 | 


--------------------------------------------------------------------------------