├── .github
    └── ISSUE_TEMPLATE.md
├── .gitignore
├── CHANGELOG.rst
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── MANIFEST.in
├── Makefile
├── README.rst
├── cykdl
    ├── __init__.py
    └── __main__.py
├── live.mk
├── pyproject.toml
├── setup.cfg
├── setup.py
├── video.mk
├── webykdl
    ├── README.rst
    ├── dbpc
    ├── dbusplayer.py
    ├── playthread.py
    └── webykdl.py
└── ykdl
    ├── __init__.py
    ├── common.py
    ├── compact.py
    ├── extractor.py
    ├── extractors
        ├── __init__.py
        ├── _byted.py
        ├── _byted_X-Bogus.js
        ├── _byted_acrawler.js
        ├── _common.py
        ├── acfun
        │   ├── __init__.py
        │   ├── acbase.py
        │   ├── bangumi.py
        │   ├── live.py
        │   └── video.py
        ├── baomihua.py
        ├── bilibili
        │   ├── __init__.py
        │   ├── bangumi.py
        │   ├── bilibase.py
        │   ├── idconvertor.py
        │   ├── live.py
        │   ├── util.py
        │   ├── vc.py
        │   └── video.py
        ├── cctv.py
        ├── douban
        │   ├── __init__.py
        │   ├── movie.py
        │   └── music.py
        ├── douyin
        │   ├── __init__.py
        │   ├── live.py
        │   └── video.py
        ├── douyu
        │   ├── __init__.py
        │   ├── crypto-js-md5.min.js
        │   ├── live.py
        │   ├── util.py
        │   └── video.py
        ├── fun.py
        ├── generalembed.py
        ├── generalsimple.py
        ├── heibaizhibo.m.js
        ├── heibaizhibo.py
        ├── huajiao
        │   ├── __init__.py
        │   ├── live.py
        │   └── video.py
        ├── huya
        │   ├── __init__.py
        │   ├── live.py
        │   └── video.py
        ├── ifeng
        │   ├── __init__.py
        │   ├── gongkaike.py
        │   ├── news.py
        │   └── video.py
        ├── iqilu.py
        ├── iqiyi
        │   ├── __init__.py
        │   ├── cmd5x.js
        │   ├── cmd5x_iqiyi3.js
        │   ├── live.py
        │   ├── util.py
        │   └── video.py
        ├── ixigua.py
        ├── joy.py
        ├── kankanews.py
        ├── ku6.py
        ├── kuwo.py
        ├── laifeng.py
        ├── le.py
        ├── lizhi.py
        ├── longzhu.py
        ├── mgtv.py
        ├── miaopai.py
        ├── netease
        │   ├── __init__.py
        │   ├── live.py
        │   ├── livecc.py
        │   ├── m3g.py
        │   ├── music
        │   │   ├── __init__.py
        │   │   ├── music.py
        │   │   ├── musicbase.py
        │   │   ├── mv.py
        │   │   └── program.py
        │   ├── openc.py
        │   └── video.py
        ├── pps.py
        ├── pptv.py
        ├── qq
        │   ├── __init__.py
        │   ├── egame.py
        │   ├── live.py
        │   └── video.py
        ├── sina
        │   ├── __init__.py
        │   ├── embed.py
        │   ├── openc.py
        │   └── video.py
        ├── singlemultimedia.py
        ├── sohu
        │   ├── __init__.py
        │   ├── my.py
        │   ├── sohubase.py
        │   └── tv.py
        ├── tudou.py
        ├── weibo.py
        ├── yinyuetai.py
        ├── yizhibo.py
        ├── youku.py
        ├── youkujs.py
        ├── zhangyu.py
        ├── zhanqi.py
        └── zhuafan.py
    ├── mediainfo.py
    ├── util
        ├── __init__.py
        ├── download.py
        ├── external.py
        ├── fs.py
        ├── http.py
        ├── human.py
        ├── kt_player.py
        ├── lazy.py
        ├── log.py
        ├── m3u8.py
        ├── match.py
        ├── rangefetch_server.py
        ├── wrap.py
        └── xml2dict.py
    └── version.py


/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | 在提交前，请确保您已经检查了以下内容!
 2 | 
 3 | - [ ] 你的问题无关 VIP 内容。**YKDL** 不会主动支持 VIP 内容，相关问题一概不予受理。
 4 | - [ ] 你确实可以在浏览器或移动端中正常播放，但不能使用 **YKDL** 下载。
 5 | - [ ] 你的 **YKDL** 为最新发布版本，或开发版本。
 6 | - [ ] 你已经阅读并按 https://github.com/SeaHOH/ykdl/wiki 中的指引进行了操作。
 7 | - [ ] 你的问题没有在 https://github.com/SeaHOH/ykdl/issues 报告，否则请在原有 issue 下报告。
 8 | - [ ] 本问题确实关于 **YKDL** 或其依赖，而不是其他项目。
 9 | 
10 | 请将错误输出粘贴在下面:
11 | 
12 | ```
13 | [在这里粘贴完整日志]
14 | ```
15 | 
16 | 如果你有其他附言，例如问题只在某个视频发生，或者是一般性讨论或者提出新功能，请在下面添加:
17 | 
18 | [在这里输入内容文本]
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /build/
 2 | /dist/
 3 | /MANIFEST
 4 | dist*
 5 | *.egg-info/
 6 | *.py[cod]
 7 | 
 8 | _*/
 9 | *_
10 | 
11 | *.bak
12 | *.download
13 | *.cmt.*
14 | *.3gp
15 | *.asf
16 | *.flv
17 | *.f4v
18 | *.lrc
19 | *.mkv
20 | *.mp3
21 | *.mp4
22 | *.mpg
23 | *.aac
24 | *.ts
25 | *.webm
26 | README.html
27 | 
28 | *.DS_Store
29 | *.swp
30 | *~
31 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | To contributers
2 | 
3 | 1, free to approve/merge any extractors pull requests, including new sites support.
4 | 
5 | 2, free to git push any changes to extractors without my approve, including new sites support.
6 | 
7 | 3, any changes to core, need submit pull requests and approve/merge by me.
8 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ==============================================
 2 | This is a copy of the MIT license.
 3 | ==============================================
 4 | Copyright (C) 2012 - 2016 Mort Yao <mort.yao@gmail.com>
 5 | Copyright (C) 2012 Boyu Guo <iambus@gmail.com>
 6 | Copyright (C) 2015 - 2021 Zhang Ning <zhangn1985@gmail.com>
 7 | Copyright (C) 2017 - 2022 SeaHOH <SeaHOH@gmail.com>
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a copy of
10 | this software and associated documentation files (the "Software"), to deal in
11 | the Software without restriction, including without limitation the rights to
12 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
13 | of the Software, and to permit persons to whom the Software is furnished to do
14 | so, subject to the following conditions:
15 | 
16 | The above copyright notice and this permission notice shall be included in all
17 | copies or substantial portions of the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt
2 | include README.rst
3 | include CHANGELOG.rst
4 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | include video.mk
2 | include live.mk
3 | 
4 | test:
5 | 
6 | test_video: test_video1 test_video2 test_video3
7 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | YouKuDownLoader
 2 | ===============
 3 | 
 4 | .. image:: https://img.shields.io/pypi/v/ykdl.svg
 5 |    :target: https://pypi.python.org/pypi/ykdl
 6 | 
 7 | 
 8 | A video downloader focus on China mainland video sites.
 9 | 
10 | Origin website: https://github.com/zhangn1985/ykdl
11 | 
12 | Later migrated to a new website: https://github.com/SeaHOH/ykdl
13 | 
14 | Today it migrated to my repositories, and this is the new website: https://github.com/LifeActor/ykdl
15 | 
16 | This project is a fork of
17 | `you-get <https://github.com/soimort/you-get>`_ with below changes.
18 | 
19 | - Structured source code.
20 | - Focus on China mainland video sites.
21 | - Dropped supports of Python 3.4 and below
22 |   (see `#487 <https://github.com/SeaHOH/ykdl/issues/487>`_).
23 | 
24 | Simple installation guide
25 | -------------------------
26 | 
27 | There are some useful software package managers.
28 | 
29 | - **macOS/Linux**: `Homebrew <https://brew.sh/>`_
30 | - **Debian/Linux**: APT
31 | - **Windows**: `Chocolatey <https://chocolatey.org/install>`_
32 | 
33 | Step:
34 |  0. Dependencies
35 | 
36 |     | `FFmpeg <https://ffmpeg.org/>`_, for merge media files.
37 |     | `mpv <https://mpv.io/>`_, default media player (optimal compatibility).
38 | 
39 |  #. `Python 3 <https://www.python.org/downloads/>`_
40 | 
41 |  #. pip and setuptools, make sure they are updated.
42 | 
43 |     .. code-block:: console
44 | 
45 |         python3 -m ensurepip
46 |         python3 -m pip install pip --upgrade
47 |         python3 -m pip install setuptools --upgrade
48 | 
49 |  #. ykdl from PyPI or GitHub
50 | 
51 |     .. code-block:: console
52 | 
53 |         pip3 install ykdl --upgrade
54 | 
55 |     .. code-block:: console
56 | 
57 |         pip3 install https://github.com/SeaHOH/ykdl/archive/master.zip --force-reinstall --no-deps
58 |         pip3 install https://github.com/SeaHOH/ykdl/archive/master.zip --upgrade
59 | 
60 |  #. Make sure those folders are in your **PATH**, if they are not, add them.
61 | 
62 |     | **Windows**: folders of ffmpeg.exe, mpv.exe, and python.exe,
63 |                    and folder "<**PYTHONHOME**>\\Scripts"
64 |     | **others**: "~/.local/bin" or "/usr/local/bin"
65 | 
66 | Site status
67 | -----------
68 | 
69 | Please check wiki page:
70 | `sites-status <https://github.com/SeaHOH/ykdl/wiki/sites-status>`_
71 | 
72 | Bugs report, features require, and pull requests are welcome.
73 | 
74 | Project Ownership Transition
75 | ----------------------------
76 | 
77 | I am pleased to announce that I have taken over the ownership of the YouKuDownLoader project from @SeaHOH today. This transition marks the beginning of an exciting new chapter for the project, and I am committed to ensuring its continued development and success.
78 | 
79 | Future Plans 
80 | ----------------------------
81 | 
82 | - **Enhanced Features**: I plan to introduce new features that will improve the user experience and expand the capabilities of the downloader.
83 | - **Regular Updates**: Regular updates will be pushed to ensure compatibility with the latest video sites and technologies.
84 | - **Community Engagement**: I will actively engage with the community to gather feedback and suggestions for future enhancements.
85 | - **Bug Fixes**: Addressing existing bugs and issues to ensure a smooth and reliable user experience.
86 | 
87 | Your support and contributions are invaluable to the project's success. Together, we can make YouKuDownLoader an even more powerful tool for downloading videos from China mainland video sites.
88 | 
89 | Thank you for your continued support!
90 | 
91 | ---
92 | 
93 | *Note: For any inquiries or suggestions, please feel free to open an issue or reach out directly.*
94 | 


--------------------------------------------------------------------------------
/cykdl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LifeActor/ykdl/44cba0d088787a4c9aed62bba1ff84c7e4066c15/cykdl/__init__.py


--------------------------------------------------------------------------------
/live.mk:
--------------------------------------------------------------------------------
 1 | test_live: test_bobo test_douyu test_huomao test_longzhu test_panda test_zhanqi test_bililive test_huyalive test_lelive test_cc test_qqlive
 2 | 
 3 | PYTHON ?= python3
 4 | 
 5 | test_bobo:
 6 | 	${PYTHON} -m cykdl -i http://www.bobo.com/10003822?f=pHome.Hot_anchor.1
 7 | 
 8 | test_douyu:
 9 | 	${PYTHON} -m cykdl -i http://www.douyu.com/58428
10 | 
11 | test_huomao:
12 | 	${PYTHON} -m cykdl -i http://www.huomaotv.cn/live/845
13 | 
14 | test_longzhu:
15 | 	${PYTHON} -m cykdl -i http://star.longzhu.com/133097?from=challcontent
16 | 
17 | test_panda:
18 | 	${PYTHON} -m cykdl -i http://www.panda.tv/60995
19 | 
20 | test_zhanqi:
21 | 	${PYTHON} -m cykdl -i https://www.zhanqi.tv/naigege
22 | 
23 | test_bililive:
24 | 	${PYTHON} -m cykdl -i http://live.bilibili.com/3
25 | 
26 | test_huyalive:
27 | 	${PYTHON} -m cykdl -i http://www.huya.com/lengsimo
28 | 
29 | test_lelive:
30 | 	${PYTHON} -m cykdl -i http://live.le.com/lunbo/play/index.shtml?channel=224
31 | 
32 | test_cc:
33 | 	${PYTHON} -m cykdl -i http://cc.163.com/30348786/
34 | 
35 | test_qqlive:
36 | 	${PYTHON} -m cykdl -i http://live.qq.com/10001075
37 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | # These are the assumed default build requirements from pip:
3 | # https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support
4 | requires = ["setuptools>=43.0.0", "wheel"]
5 | build-backend = "setuptools.build_meta"
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [build]
 2 | force = 0
 3 | 
 4 | [global]
 5 | verbose = 0
 6 | 
 7 | [egg_info]
 8 | tag_build = 
 9 | tag_date = 0
10 | tag_svn_revision = 0
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from setuptools import setup, find_packages
 4 | import os
 5 | import re
 6 | 
 7 | 
 8 | def read_file(*paths):
 9 |     with open(os.path.join(here, *paths), 'r', encoding='utf-8') as fp:
10 |         return fp.read()
11 | 
12 | def get_version():
13 |     content = read_file('ykdl', 'version.py')
14 |     version_match = re.search('^__version__ = [\'"]([^\'"]+)', content, re.M)
15 |     if version_match:
16 |         return version_match.group(1)
17 |     raise RuntimeError('Unable to find version string.')
18 | 
19 | # memo: pycryptodome is not being used now
20 | REQ = [
21 |     'm3u8>=1.0.0',
22 |     'jsengine>=1.0.5',
23 |     "colorama;os_name=='nt'",
24 | ]
25 | EXT = {
26 |   'proxy': ['ExtProxy'],
27 |   'br': ['BrotliCFFI'],
28 | }
29 | EXT['net'] = sum(EXT.values(), [])
30 | EXT['js'] = ['quickjs']
31 | EXT['all'] = list(set(sum((EXT.values()), [])))
32 | 
33 | here = os.path.abspath(os.path.dirname(__file__))
34 | LONGS = '\n\n'.join((
35 |     read_file('README.rst'),
36 |     *read_file('CHANGELOG.rst').split('\n\n\n')[:4],
37 |     '`See full change log '
38 |     '<https://github.com/SeaHOH/ykdl/blob/master/CHANGELOG.rst>`_.\n'
39 | ))
40 | 
41 | 
42 | setup(
43 |     name = 'ykdl',
44 |     version = get_version(),
45 |     author = 'Zhang Ning',
46 |     author_email = 'zhangn1985@gmail.com',
47 |     maintainer = 'SeaHOH',
48 |     maintainer_email = 'seahoh@gmail.com',
49 |     url = 'https://github.com/SeaHOH/ykdl',
50 |     license = 'MIT',
51 |     description = 'a video downloader written in Python',
52 |     long_description = LONGS,
53 |     keywords = 'video download youku acfun bilibili',
54 |     packages = find_packages(here),
55 |     install_requires = REQ,
56 |     extras_require = EXT,
57 |     platforms = 'any',
58 |     zip_safe = True,
59 |     package_data = {
60 |         'ykdl': ['extractors/*.js', 'extractors/*/*.js'],
61 |     },
62 |     python_requires = '>=3.5',
63 | 
64 |     classifiers = [
65 |         'Development Status :: 4 - Beta',
66 |         'Environment :: Console',
67 |         'Intended Audience :: Developers',
68 |         'Intended Audience :: End Users/Desktop',
69 |         'License :: OSI Approved :: MIT License',
70 |         'Operating System :: OS Independent',
71 |         'Programming Language :: Python',
72 |         'Programming Language :: Python :: 3',
73 |         'Programming Language :: Python :: 3.5',
74 |         'Programming Language :: Python :: 3.6',
75 |         'Programming Language :: Python :: 3.7',
76 |         'Programming Language :: Python :: 3.8',
77 |         'Programming Language :: Python :: 3.9',
78 |         'Programming Language :: Python :: 3.10',
79 |         'Topic :: Internet',
80 |         'Topic :: Internet :: WWW/HTTP',
81 |         'Topic :: Multimedia',
82 |         'Topic :: Multimedia :: Sound/Audio',
83 |         'Topic :: Multimedia :: Video',
84 |         'Topic :: Utilities'
85 |     ],
86 |     entry_points = {
87 |         'console_scripts': ['ykdl=cykdl.__main__:main']
88 |     },
89 | )
90 | 


--------------------------------------------------------------------------------
/video.mk:
--------------------------------------------------------------------------------
 1 | test_video1: test_youku test_acfun test_bilibili test_ifeng test_163m test_sohutv test_cctv test_tudou
 2 | test_video2: test_iqilu test_iqiyi test_joy test_ku6 test_kuwo test_sina test_qq test_sohumy test_baomihua
 3 | test_video3: test_xiami test_yinyuetai test_baidu test_douban test_huya test_163v test_le test_mgtv
 4 | 
 5 | PYTHON ?= python3
 6 | 
 7 | test_youku:
 8 | 	${PYTHON} -m cykdl -i http://v.youku.com/v_show/id_XMTYwMDIxNDI2MA==.html
 9 | 
10 | test_acfun:
11 | 	${PYTHON} -m cykdl -i http://www.acfun.cn/v/ac213736
12 | 
13 | test_bilibili:
14 | 	${PYTHON} -m cykdl -i http://bangumi.bilibili.com/anime/2539/play#63470
15 | 
16 | test_baomihua:
17 | 	${PYTHON} -m cykdl -i http://www.baomihua.com/user/24204_36300935
18 | 
19 | test_cctv:
20 | 	${PYTHON} -m cykdl -i http://tv.cctv.com/2016/06/08/VIDEa0Y5V5HY9MLeoVM5tcQC160608.shtml -t 300
21 | 
22 | test_ifeng:
23 | 	${PYTHON} -m cykdl -i http://v.ifeng.com/video_8632601.shtml
24 | 
25 | test_iqilu:
26 | 	${PYTHON} -m cykdl -i http://v.iqilu.com/shpd/rmxf/2016/0607/4332820.html
27 | 
28 | test_iqiyi:
29 | 	${PYTHON} -m cykdl -i http://www.iqiyi.com/v_19rrle48gg.html
30 | 
31 | test_joy:
32 | 	${PYTHON} -m cykdl -i http://www.joy.cn/video?resourceId=60239051
33 | 
34 | test_ku6:
35 | 	${PYTHON} -m cykdl -i http://www.ku6.com/video/detail?id=lfx8PD61clQ0knUJQad1R4Mbu2w
36 | 
37 | test_kuwo:
38 | 	${PYTHON} -m cykdl -i http://www.kuwo.cn/yinyue/7119332?catalog=yueku2016
39 | 
40 | test_lizhi:
41 | 	${PYTHON} -m cykdl -i http://www.lizhi.fm/202840/29101368624039686
42 | 
43 | test_sina:
44 | 	${PYTHON} -m cykdl -i 'http://video.sina.com.cn/ent/#250623748' -t 300
45 | 
46 | test_xiami:
47 | 	${PYTHON} -m cykdl -li http://www.xiami.com/album/2100285370?spm=a1z1s.3057849.0.0.hAuVwv
48 | 
49 | test_yinyuetai:
50 | 	${PYTHON} -m cykdl -i http://v.yinyuetai.com/video/2832181?f=SY-MKDT-MVSB-1
51 | 
52 | test_baidu:
53 | 	${PYTHON} -m cykdl -li http://music.baidu.com/album/266327865?pst=shoufa
54 | 
55 | test_douban:
56 | 	${PYTHON} -m cykdl -li https://music.douban.com/artists/player/?sid=660498,647629,647625,633870,622482,600594,589516,588385,583322,580114,576350
57 | 
58 | test_huya:
59 | 	${PYTHON} -m cykdl -i http://v.huya.com/play/2209082.html
60 | 
61 | test_le:
62 | 	${PYTHON} -m cykdl -i http://www.le.com/ptv/vplay/26859747.html -t 300
63 | 
64 | test_163v:
65 | 	${PYTHON} -m cykdl -i http://v.163.com/paike/VBI038VCL/VBNERA654.html
66 | 
67 | test_163m:
68 | 	${PYTHON} -m cykdl -li http://music.163.com/playlist?id=396542983
69 | 
70 | test_qq:
71 | 	${PYTHON} -m cykdl -i http://v.qq.com/cover/q/qsm7nxzwbnzc4dp.html?vid=m0305m0ur33
72 | 
73 | test_sohutv:
74 | 	${PYTHON} -m cykdl -i http://tv.sohu.com/20160607/n453456746.shtml
75 | 
76 | test_sohumy:
77 | 	${PYTHON} -m cykdl -i http://my.tv.sohu.com/pl/9090402/84077110.shtml
78 | 
79 | test_mgtv:
80 | 	${PYTHON} -m cykdl -i http://www.mgtv.com/v/2/293140/c/3269011.html
81 | 
82 | test_tudou:
83 | 	${PYTHON} -m cykdl -i http://video.tudou.com/v/XMjc2MTg1MzIzNg==.html
84 | 


--------------------------------------------------------------------------------
/webykdl/README.rst:
--------------------------------------------------------------------------------
 1 | WebYKDL 
 2 | ===============
 3 | a Flask web interface for YKDL
 4 | 
 5 | playerthread: a threaded mpv player class
 6 | 
 7 | dbusplayer: a dbus interface for playerthread
 8 | 
 9 | dbpc: cmdline interface for dbusplayer
10 | 
11 | webykdl: flask web interface for ykdl
12 | 


--------------------------------------------------------------------------------
/webykdl/dbpc:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import sys, json, os.path
 5 | from pydbus import SessionBus
 6 | bus = SessionBus()
 7 | 
 8 | player = bus.get("github.zhangn1985.dbplay")
 9 | 
10 | if len(sys.argv) == 1:
11 |     print(sys.argv[0] + " URI")
12 | 
13 | for u in sys.argv[1:]:
14 |     i = json.dumps({"urls": [os.path.abspath(u)]})
15 |     player.play(i)
16 | 


--------------------------------------------------------------------------------
/webykdl/dbusplayer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from gi.repository import GLib
 5 | from pydbus import SessionBus
 6 | import json
 7 | 
 8 | from playthread import Mpvplayer
 9 | 
10 | loop = GLib.MainLoop()
11 | player = Mpvplayer()
12 | player.start()
13 | 
14 | class DBUSPlayerService(object):
15 |     '''
16 |         <node>
17 |             <interface name='github.zhangn1985.dbplay'>
18 |                 <method name='play'>
19 |                     <arg type='s' name='playinfo' direction='in'/>
20 |                 </method>
21 |                 <method name='stop'/>
22 |                 <method name='exit'/>
23 |             </interface>
24 |         </node>
25 |     '''
26 | 
27 |     def play(self, playinfo):
28 |         player.play(playinfo)
29 |     def stop(self):
30 |         player.stop()
31 |     def exit(self):
32 |         player.exit()
33 |         loop.quit()
34 | 
35 | bus = SessionBus()
36 | bus.publish('github.zhangn1985.dbplay', DBUSPlayerService())
37 | loop.run()
38 | 


--------------------------------------------------------------------------------
/webykdl/playthread.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from threading import Thread
 4 | from time import sleep
 5 | import json
 6 | 
 7 | from ykdl.util.external import launch_player
 8 | 
 9 | 
10 | class Mpvplayer(Thread):
11 |     def __init__(self):
12 |         Thread.__init__(self)
13 |         self.playlist = []
14 |         self.name = 'mpv playback thread'
15 |         self.handle = None
16 |         self.__exit__ = False
17 | 
18 |     def play(self, obj):
19 |         self.playlist.append(obj)
20 |         return 0
21 | 
22 |     def stop(self):
23 |         if self.handle:
24 |             self.handle.terminate()
25 | 
26 |     def exit(self):
27 |         self.__exit__ = True
28 |         self.stop()
29 | 
30 |     def run(self):
31 |         while not self.__exit__:
32 |             if len(self.playlist) == 0:
33 |                 sleep(10)
34 |                 continue
35 |             o = self.playlist[0]
36 |             self.playlist.remove(o)
37 |             obj = json.loads(o)
38 |             if not 'args' in obj:
39 |                 obj['args'] = {'ua':'', 'header':'', 'title':'', 'referer':''}
40 |             obj['play'] = False
41 |             self.handle = launch_player(obj['urls'], obj['ext'], **obj['args'])
42 |             self.handle.wait()
43 | 


--------------------------------------------------------------------------------
/webykdl/webykdl.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from flask import Flask
 5 | from flask import request
 6 | app = Flask(__name__)
 7 | 
 8 | from pydbus import SessionBus
 9 | bus = SessionBus()
10 | try:
11 |     player = bus.get('github.zhangn1985.dbplay')
12 | except:
13 |     from playthread import Mpvplayer
14 |     player = Mpvplayer()
15 |     player.start()
16 | 
17 | import json
18 | import types
19 | 
20 | from ykdl.common import url_to_module
21 | 
22 | 
23 | def handle_videoinfo(info):
24 |     player_args = info.extra
25 |     player_args['title'] = info.title
26 |     stream = info.streams[info.stream_types[0]]
27 |     video = json.dumps({
28 |         'urls': stream['src'],
29 |         'ext': stream['container'],
30 |         'args': player_args})
31 |     player.play(video)
32 | 
33 | @app.route('/play', methods=['POST', 'GET'])
34 | def play():
35 |     if request.method == 'POST':
36 |         url = request.form['url']
37 |         try:
38 |             islist = request.form['list']
39 |             islist = islist == 'True'
40 |         except:
41 |             islist = False
42 |         m,u = url_to_module(url)
43 |         if not islist:
44 |             parser = m.parser
45 |         else:
46 |             parser = m.parser_list
47 |         try:
48 |            info = parser(u)
49 |         except AssertionError as e:
50 |            return str(e)
51 |         if type(info) is types.GeneratorType or type(info) is list:
52 |             for i in info:
53 |                 handle_videoinfo(i)
54 |         else:
55 |             handle_videoinfo(info)
56 |         return 'OK'
57 |     else:
58 |         return 'curl --data-urlencode "url=<URL>" http://IP:5000/play'
59 | 
60 | @app.route('/stop')
61 | def stop():
62 |     player.stop()
63 |     return 'OK'
64 | 
65 | @app.route('/')
66 | def index():
67 |     return '''
68 | <!DOCTYPE html>
69 | <html>
70 | <head>
71 | <meta charset="utf-8" />
72 | <title>Web YKDL</title>
73 | </head>
74 | <body>
75 | <style>form{float:left;}</style>
76 | <form action="/play" method="post" target="_blank">
77 |   输入视频网址: <input type="text" name="url" />
78 | 播放列表？<input type="checkbox" name="list" value="True" />
79 |   <input type="submit" value="播放" />
80 | </form>
81 | <form action="/stop" method="get" target="_blank">
82 |   <input type="submit" value="停止" />
83 | </form>
84 | 
85 | </body>
86 | </html>
87 | '''
88 | 
89 | if __name__ == '__main__':
90 |     app.run(host='0.0.0.0')
91 | 


--------------------------------------------------------------------------------
/ykdl/__init__.py:
--------------------------------------------------------------------------------
1 | from . import compact
2 | del compact
3 | 


--------------------------------------------------------------------------------
/ykdl/common.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import logging
 3 | from importlib import import_module
 4 | 
 5 | from .util.http import get_head_response
 6 | from .util.wrap import reverse_list_dict
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | # TODO: add support to find module via mid@site[.type]
12 | 
13 | alias = reverse_list_dict({
14 |     'acfun'     : ['aixifan'],
15 |     'cctv'      : ['cntv'],
16 |     'douyin'    : ['amemv', 'iesdouyin'],
17 |     'douyu'     : ['douyutv'],
18 |     'netease'   : ['163'],
19 |     'qq'        : ['wetv'],
20 |     'sina'      : ['iask'],
21 |     'weibo'     : ['weibocdn'],
22 | })
23 | exclude_list = {'com', 'net', 'org'}
24 | 
25 | def url_to_module(url):
26 |     redirection = False
27 |     while True:
28 |         if not url.startswith('http'):
29 |             logger.warning('> url not starts with http(s) ' + url)
30 |             logger.warning('> assume http connection!')
31 |             url = 'http://' + url
32 |         url_infos = re.match('''(?x)
33 |             https?://
34 |             (                       # catch host
35 |                     [\-\w\.]*?      # ignore
36 |                 (?:([\-\w]+)\.)?    # try catch 3rd domain
37 |                    ([\-\w]+)\.      # catch 2nd domain
38 |                     [\-\w]+         # top domain
39 |             )
40 |             (?::\d+)?               # allow port
41 |             (?=/|$)                 # allow empty path
42 |             (?:
43 |                 /                   # path start
44 |                 .+?                 # path & main name
45 |                 (?:\.(\w+))?        # try catch extension name
46 |                 (?:\?|\#|&|$)       # path end, '&' is used to ignore wrong query
47 |             )?
48 |             ''', url)
49 |         assert url_infos, 'wrong URL string!'
50 |         host, dm3, dm2, ext = url_infos.groups()
51 |         logger.debug('host> ' + host)
52 | 
53 |         short_name = dm2 in exclude_list and dm3 or dm2
54 |         if short_name in alias.keys():
55 |             short_name = alias[short_name]
56 |         logger.debug('short_name> ' + short_name)
57 | 
58 |         try:
59 |             m = import_module('.'.join(['ykdl','extractors', short_name]))
60 |             if hasattr(m, 'get_extractor'):
61 |                 site, url = m.get_extractor(url)
62 |             else:
63 |                 site = m.site
64 |             return site, url
65 | 
66 |         except ImportError as e:
67 |             logger.debug('Import Error: %s', e)
68 | 
69 |             from .extractors import singlemultimedia
70 | 
71 |             if ext in singlemultimedia.extNames:
72 |                 logger.debug('> the extension name %r match multimedia types', ext)
73 |                 logger.debug('> Go SingleMultimedia')
74 |                 return singlemultimedia.site, url
75 | 
76 |             if not redirection:
77 |                 logger.debug('> Try HTTP Redirection!')
78 |                 response = get_head_response(url, max_redirections=None)
79 | 
80 |             if response.url == url:
81 |                 if not redirection:
82 |                     logger.debug('> NO HTTP Redirection')
83 |                 if response.headers.get('Content-Type', '').startswith('text/'):
84 |                     logger.debug('> Try GeneralSimple')
85 |                     from ykdl.extractors.generalsimple import site
86 |                     site = site.get_proxy('parser_list', url)
87 |                     if site:
88 |                         return site, url
89 |                     logger.debug('> Try GeneralEmbed')
90 |                     return import_module('ykdl.extractors.generalembed').site, url
91 |                 else:
92 |                     logger.debug('> Try SingleMultimedia')
93 |                     return singlemultimedia.site, url
94 | 
95 |             logger.info('> New url: ' + response.url)
96 |             url = response.url
97 |             redirection = True
98 | 


--------------------------------------------------------------------------------
/ykdl/compact.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import io
  4 | import socket
  5 | import random
  6 | import inspect
  7 | import logging
  8 | import tempfile
  9 | import builtins
 10 | 
 11 | from .util.log import ColorHandler
 12 | 
 13 | 
 14 | logging.basicConfig(handlers=[ColorHandler()])
 15 | 
 16 | 
 17 | builtins.Infinity = float('inf')
 18 | 
 19 | 
 20 | if sys.version_info > (3, 13):
 21 |     import warnings
 22 |     warnings.filterwarnings('ignore',
 23 |                             'invalid escape sequence',
 24 |                             SyntaxWarning, append=True)
 25 | 
 26 | 
 27 | if sys.version_info < (3, 10):
 28 |     import types
 29 |     types.NoneType = type(None)
 30 | 
 31 | 
 32 | def bound_monkey_patch(orig, new):
 33 |     '''Monkey patch the original function with new, and bind the original
 34 |     function as its first argument, at end clear the new function from the
 35 |     module which it defined with.
 36 |     '''
 37 |     if hasattr(orig, 'orig'):
 38 |         raise ValueError(
 39 |                 'Monkey patched function can not be patched twice, please use '
 40 |                 'the attribute `orig` to get original function and patch it.')
 41 |     f = sys._getframe()
 42 |     module = f.f_globals['__name__']
 43 |     co_name = f.f_code.co_name
 44 |     argspec = str(inspect.signature(orig))
 45 |     marks = '*' * 76
 46 |     doc = new.__doc__ or ''
 47 |     doc += '''
 48 |     {marks}
 49 |     {orig.__name__}.orig{argspec}
 50 | 
 51 |     This is a bound monkey patched function via use '{module}.{co_name}',
 52 |     {orig.__name__}.orig is the original.
 53 |     '''
 54 |     if orig.__doc__:
 55 |         doc += '''{marks}
 56 | 
 57 |     {orig.__doc__}
 58 |     '''
 59 |     new.__doc__ = doc.format(**vars())
 60 |     new.orig = orig
 61 |     new = new.__get__(orig, type(new))  # bind original as the first argument
 62 |     orig.__globals__[orig.__name__] = new
 63 |     del new.__globals__[new.__name__]
 64 | 
 65 | 
 66 | if os.name == 'nt':
 67 | 
 68 |     # Re-encoding Windows cmd shell output, py35 and below
 69 | 
 70 |     if sys.version_info < (3, 6):
 71 |         sys.stderr = io.TextIOWrapper(sys.stderr.detach(),
 72 |                                       encoding=sys.stderr.encoding,
 73 |                                       errors='ignore',
 74 |                                       line_buffering=True)
 75 |         sys.stdout = io.TextIOWrapper(sys.stdout.detach(),
 76 |                                       encoding=sys.stdout.encoding,
 77 |                                       errors='ignore',
 78 |                                       line_buffering=True)
 79 | 
 80 | 
 81 |     # Implements as general method instead of Windows primitive delete-on-close
 82 |     # which would lock the temporary files
 83 | 
 84 |     class _TemporaryFileCloser:
 85 |         # codes were copied from tempfile._TemporaryFileCloser
 86 |         def close(self, unlink=os.unlink):
 87 |             if not self.close_called and self.file is not None:
 88 |                 self.close_called = True
 89 |                 try:
 90 |                     self.file.close()
 91 |                 finally:
 92 |                     if self.delete:
 93 |                         unlink(self.name)
 94 |         def __del__(self):
 95 |             self.close()
 96 | 
 97 |     def NamedTemporaryFile(orig,
 98 |                            mode='w+b', buffering=-1, encoding=None, newline=None,
 99 |                            suffix=None, prefix='tmp', dir=None, delete=True,
100 |                            *, errors=None):
101 |         '''Windows delete-on-close flag will not be used, a closer is use to
102 |         close the temporary file, so it can be opened as shared.
103 |         '''
104 |         kwargs = vars()
105 |         del kwargs['orig']
106 |         kwargs['delete'] = False  # skip setting os.O_TEMPORARY in the flags
107 |         if sys.version_info < (3, 8):
108 |             del kwargs['errors']
109 |         tempfile = orig(**kwargs)
110 |         # at here setting whether is deleted on close
111 |         tempfile._closer.delete = tempfile.delete = delete
112 |         return tempfile
113 | 
114 |     tempfile._TemporaryFileCloser.close = _TemporaryFileCloser.close
115 |     tempfile._TemporaryFileCloser.__del__ = _TemporaryFileCloser.__del__
116 |     del _TemporaryFileCloser
117 |     bound_monkey_patch(tempfile.NamedTemporaryFile, NamedTemporaryFile)
118 | 
119 | 
120 | # Shuffles getaddrinfo() result, that helps multi-connect to servers
121 | 
122 | def getaddrinfo(orig, *args, **kwargs):
123 |     '''Shuffles the orig result.'''
124 |     addrlist = orig(*args, **kwargs)
125 |     random.shuffle(addrlist)
126 |     return addrlist
127 | 
128 | bound_monkey_patch(socket.getaddrinfo, getaddrinfo)
129 | 
130 | 
131 | #compact_dev_null = open(os.devnull, 'w')
132 | 


--------------------------------------------------------------------------------
/ykdl/extractors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LifeActor/ykdl/44cba0d088787a4c9aed62bba1ff84c7e4066c15/ykdl/extractors/__init__.py


--------------------------------------------------------------------------------
/ykdl/extractors/_byted.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from ._common import *
  4 | 
  5 | 
  6 | # result length is incorrect
  7 | #
  8 | #js_dom = '''
  9 | #var window = this,
 10 | #    document = {{referrer: 'http://www.douyin.com/'}},
 11 | #    location = {{href: '{url}', protocol: 'https'}},
 12 | #    navigator = {{userAgent: '{ua}'}};
 13 | #'''
 14 | #js_acrawler = None
 15 | #
 16 | #def get_acrawler_signer(url):
 17 | #    assert JSEngine, "No JS Interpreter found, can't load byted acrawler!"
 18 | #    global js_acrawler
 19 | #    if js_acrawler is None:
 20 | #        js_acrawler = get_pkgdata_str(__name__, '_byted_acrawler.js',
 21 | #                      'https://lf3-cdn-tos.bytescm.com/obj/rc-web-sdk/acrawler.js')
 22 | #
 23 | #    js_ctx = JSEngine(js_dom.format(url=url, ua=fake_headers['User-Agent']))
 24 | #    js_ctx.append(js_acrawler)
 25 | #
 26 | #    def sign(*args):
 27 | #        return js_ctx.call('byted_acrawler.sign', *args)
 28 | #
 29 | #    return sign
 30 | #
 31 | #def get_acrawler_cookies(url):
 32 | #    assert JSEngine, "No JS Interpreter found, can't load byted acrawler!"
 33 | #    install_cookie()
 34 | #    __ac_nonce = get_random_id(21)
 35 | #    _cookies['signed'] = cookies = {
 36 | #        '__ac_nonce': __ac_nonce,
 37 | #        '__ac_signature': get_acrawler_signer(url)('', __ac_nonce),
 38 | #        '__ac_referer': '__ac_blank'
 39 | #    }
 40 | #    _get_response(url, headers={'Cookie': cookies}, cache=False)
 41 | #    cookies.update(get_cookies_d(url))
 42 | #    uninstall_cookie()
 43 | #    return cookies
 44 | 
 45 | 
 46 | def generate_mstoken():
 47 |     ms = base64.b64encode(os.urandom(random.randrange(91,100))) \
 48 |                         .decode().replace('+','9').replace('/','9').rstrip('=')
 49 |     if len(ms) <= 128:
 50 |         ms += '=='
 51 |     while len(ms) < 132:
 52 |         i = random.randrange(128)
 53 |         c = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
 54 |         p = random.choice('-_')
 55 |         ms = ms[:i] + ms[i:].replace(c, c+p, 1)
 56 |     return ms
 57 | 
 58 | def sign_xbogus(params, ua):
 59 |     assert JSEngine, "No JS Interpreter found, can't load byted X-Bogus util!"
 60 |     if not isinstance(params, str):
 61 |         params = urlencode(params)
 62 |     js_ctx = JSEngine(get_pkgdata_str(__name__, '_byted_X-Bogus.js'))
 63 |     return js_ctx.call('sign', params, ua)
 64 | 
 65 | def get_cookies_d(url):
 66 |     return {c.name: c.value
 67 |             for c in get_cookies(urlsplit(url).hostname, '/')}
 68 | 
 69 | def get_nonce_cookies():
 70 |     __ac_nonce = get_random_id(21)
 71 |     _cookies['nonce'] = cookies = {
 72 |         '__ac_nonce': __ac_nonce,
 73 |     }
 74 |     return cookies
 75 | 
 76 | def get_ttwid_cookies(url):
 77 |     install_cookie()
 78 |     _cookies['ttwid'] = cookies = {
 79 |         '__ac_nonce': get_random_id(21),
 80 |         'ttwid_date': '1'
 81 |     }
 82 |     _get_response(url, headers={'Cookie': cookies}, cache=False)
 83 |     cookies.update(get_cookies_d(url))
 84 |     uninstall_cookie()
 85 |     return cookies
 86 | 
 87 | _cookies = {}
 88 | _get_response = get_response
 89 | _get_content = get_content
 90 | 
 91 | def get_response(url, *args, **kwargs):
 92 |     if 'live.douyin.' in url:
 93 |         cookies = _cookies.get('nonce') or get_nonce_cookies(url)
 94 |     elif 'ixigua.' in url:
 95 |         cookies = _cookies.get('ttwid') or get_ttwid_cookies(url)
 96 |     kwargs.setdefault('headers', {})['Cookie'] = cookies
 97 |     return _get_response(url, *args, **kwargs)
 98 | 
 99 | def get_content(*args, **kwargs):
100 |     response = get_response(*args, **kwargs)
101 |     if kwargs.get('encoding') == 'ignore':
102 |         return response.content
103 |     return response.text
104 | 


--------------------------------------------------------------------------------
/ykdl/extractors/_common.py:
--------------------------------------------------------------------------------
 1 | '''How we do import here.
 2 | 
 3 | We do import the most functions/classes and variables/constants which are
 4 | common using in our extractors at here.
 5 | 
 6 | Don't import ALL (*) from module unless ensure all them are needed, if not sure
 7 | then only import the module or its attributes which we are used.
 8 | '''
 9 | 
10 | from ..extractor import *
11 | from ..mediainfo import MediaInfo
12 | 
13 | from ..util.http import *
14 | from ..util.human import *
15 | from ..util.m3u8 import *
16 | from ..util.match import *
17 | from ..util.wrap import *
18 | from ..util.kt_player import *
19 | 
20 | from ..util.lazy import lazy_import
21 | lazy_import('from jsengine import JSEngine')
22 | del lazy_import
23 | 
24 | import os
25 | import sys
26 | import re
27 | import json
28 | import time
29 | import base64
30 | import random
31 | import functools
32 | import urllib.parse
33 | import urllib.request
34 | 
35 | from html import *
36 | from urllib.parse import *
37 | from tempfile import NamedTemporaryFile
38 | 
39 | g = globals()
40 | for name in urllib.request.__all__:
41 |     if name.startswith('HTTP') or name.endswith('Handler'):
42 |         g[name] = urllib.request.__dict__[name]
43 | del g, name
44 | 


--------------------------------------------------------------------------------
/ykdl/extractors/acfun/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | def get_extractor(url):
 4 |     if '/bangumi/' in url:
 5 |         from . import bangumi as s
 6 |     elif '/live' in url:
 7 |         from . import live as s
 8 |     else:
 9 |         from . import video as s
10 | 
11 |     return s.site, url
12 | 


--------------------------------------------------------------------------------
/ykdl/extractors/acfun/acbase.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class AcBase(Extractor):
 7 | 
 8 |     quality_2_id = {
 9 |         2160: '4K',
10 |         1080: 'BD',
11 |          720: 'TD',
12 |          540: 'HD',
13 |          360: 'SD',
14 |          270: 'LD'
15 |     }
16 | 
17 |     def prepare(self):
18 |         info = MediaInfo(self.name)
19 | 
20 |         self.mid  # scan & check
21 |         html = get_content(self.url)
22 |         info.title, info.artist, sourceVid, data = self.get_page_info(html)
23 | 
24 |         data = json.loads(data)['adaptationSet'][0]['representation']
25 |         self.logger.debug('data:\n%s', data)
26 | 
27 |         url = random.choice(['url', 'backupUrl'])
28 |         for q in data:
29 |             quality = int(match1(q['qualityType'], '(\d+)'))
30 |             stream_id = self.quality_2_id[quality]
31 |             if q['frameRate'] > 30:
32 |                 stream_id += '-f' + str(int(q['frameRate'] + 0.1))
33 |             stream_profile = q['qualityLabel']
34 |             urls = q[url]
35 |             if not isinstance(urls, list):
36 |                 urls = [urls]
37 |             info.streams[stream_id] = {
38 |                 'container': 'm3u8',
39 |                 'profile': stream_profile,
40 |                 'src': urls
41 |             }
42 | 
43 |         return info
44 | 


--------------------------------------------------------------------------------
/ykdl/extractors/acfun/bangumi.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .acbase import AcBase
 5 | 
 6 | 
 7 | class AcBan(AcBase):
 8 | 
 9 |     name = 'AcFun 弹幕视频网 (番剧)'
10 | 
11 |     def get_page_info(self, html):
12 |         artist = None
13 |         bgmInfo = json.loads(match1(html, '(?:pageInfo|bangumiData) = ({.+?});'))
14 |         videoInfo = bgmInfo.get('currentVideoInfo')
15 |         assert videoInfo, bgmInfo.get('playErrorMessage') or "can't play this video!!"
16 | 
17 |         title = '{} - {}'.format(bgmInfo['bangumiTitle'], bgmInfo['episodeName'])
18 |         sourceVid = videoInfo['id']
19 |         m3u8Info = videoInfo.get('playInfos')
20 |         if m3u8Info:
21 |             m3u8Info = m3u8Info[0]
22 |         else:
23 |             m3u8Info = videoInfo.get('ksPlayJson')
24 | 
25 |         return title, artist, sourceVid, m3u8Info
26 | 
27 |     def format_mid(self, mid):
28 |         if not isinstance(mid, tuple):
29 |             mid = mid, None
30 |         mid = mid[:2]
31 |         if len(mid) == 1:
32 |             mid += (None, )
33 |         bid, iid = mid
34 |         assert fullmatch(bid, '(aa)?\d+')
35 |         assert not iid or fullmatch(iid, '\d+_\d+')
36 |         bid = match1(bid, '(\d+)')
37 |         if self.url is None:
38 |             if iid:
39 |                 self.url = 'https://www.acfun.cn/bangumi/aa{bid}_{iid}'.format(**vars())
40 |             else:
41 |                 self.url = 'https://www.acfun.cn/bangumi/aa{bid}'.format(**vars())
42 |         return mid
43 | 
44 |     def prepare_mid(self):
45 |         mid = matchm(self.url, '/aa(\d+)_(\d+_\d+)', '/aa(\d+)')
46 |         if mid[0]:
47 |             return mid
48 | 
49 |     def list_only(self):
50 |         bid, iid = self.mid
51 |         return bid and not iid
52 | 
53 |     def prepare_list(self):
54 |         bid, iid = self.mid
55 |         html = get_content(
56 |             'https://www.acfun.cn/bangumi/aa{bid}'.format(**vars()),
57 |             params={
58 |                 'pagelets': 'pagelet_partlist',
59 |                 'reqID': 0,
60 |                 'ajaxpipe': 1,
61 |                 't': int(time.time() * 1000)
62 |             })
63 |         iids = matchall(html, '{bid}_(\d+_\d+)'.format(**vars()))
64 |         self.set_index(iid, iids)
65 |         for iid in iids:
66 |             yield 'https://www.acfun.cn/bangumi/aa{bid}_{iid}'.format(**vars())
67 | 
68 | site = AcBan()
69 | 


--------------------------------------------------------------------------------
/ykdl/extractors/acfun/live.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from .._common import *
  4 | 
  5 | 
  6 | class AcLive(Extractor):
  7 |     name = 'AcFun 弹幕视频网 (直播)'
  8 | 
  9 |     headers = {
 10 |         'Accept': 'application/json, text/plain, */*',
 11 |         'Referer': 'https://live.acfun.cn/'
 12 |     }
 13 | 
 14 |     @staticmethod
 15 |     def profile_2_id(profile):
 16 |         p1, p2 = matchm(profile, '(\S+) ?(\d+M)?')
 17 |         id = {
 18 |             '蓝光': 'BD',
 19 |             '超清': 'TD',
 20 |             '高清': 'HD'
 21 |         }[p1]
 22 |         if p2:
 23 |             id += p2
 24 |         return id
 25 | 
 26 |     @staticmethod
 27 |     def format_mid(mid):
 28 |         mid = fullmatch(mid, '\d+')
 29 |         assert mid
 30 |         return mid
 31 | 
 32 |     def prepare_mid(self):
 33 |         return match1(self.url, '/live/(\d+)')
 34 | 
 35 |     @functools.cache
 36 |     def prepare_auth(self):
 37 |         self.mid  # scan & check
 38 |         did = 'web_{}{}{}'.format(random.randrange(1, 10),       # 9
 39 |                                   random.randrange(1, 10 ** 7),  # 9999999
 40 |                                   get_random_hex(8).upper())     # FFFFFFFF
 41 |         self.headers['Cookie'] = {'_did': did}
 42 |         data = get_response(
 43 |                 'https://id.app.acfun.cn/rest/app/visitor/login',
 44 |                 data=b'sid=acfun.api.visitor',
 45 |                 headers=self.headers
 46 |             ).json()
 47 |         assert data['result'] == 0, data['error_msg']
 48 |         return did, data['userId'], data['acfun.api.visitor_st']
 49 | 
 50 |     def prepare(self):
 51 |         info = MediaInfo(self.name, True)
 52 | 
 53 |         did, user_id, visitor_st = self.prepare_auth()
 54 |         data = get_response(
 55 |                 'https://api.kuaishouzt.com/rest/zt/live/web/startPlay',
 56 |                 params={
 57 |                     'subBiz': 'mainApp',
 58 |                     'kpn': 'ACFUN_APP',
 59 |                     'kpf': 'PC_WEB',
 60 |                     'userId': user_id,
 61 |                     'did': did,
 62 |                     'acfun.api.visitor_st': visitor_st
 63 |                 },
 64 |                 data={
 65 |                     'authorId': self.mid,
 66 |                     'pullstreamType': 'FLV'
 67 |                 },
 68 |                 headers=self.headers
 69 |             ).json()
 70 |         assert data['result'] == 1, data['error_msg']
 71 |         data = data['data']
 72 | 
 73 |         info.title = data['caption']
 74 | 
 75 |         data = json.loads(data['videoPlayRes'])
 76 |         for stream in data['liveAdaptiveManifest'][0]['adaptationSet']['representation']:
 77 |             stream_profile = stream['name']
 78 |             stream_id = self.profile_2_id(stream_profile)
 79 |             info.streams[stream_id] = {
 80 |                 'container': 'flv',
 81 |                 'profile': stream_profile,
 82 |                 'src' : [stream['url']],
 83 |                 'size': Infinity
 84 |             }
 85 | 
 86 |         data = get_response(
 87 |                 'https://live.acfun.cn/rest/pc-direct/user/userInfo',
 88 |                 params={'userId': self.mid},
 89 |                 headers=self.headers
 90 |             ).json()
 91 |         assert data['result'] == 0, data['error_msg']
 92 |         data = data['profile']
 93 | 
 94 |         info.artist = data['name']
 95 |         info.add_comment(data['signature'])
 96 | 
 97 |         return info
 98 | 
 99 | site = AcLive()
100 | 


--------------------------------------------------------------------------------
/ykdl/extractors/acfun/video.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .acbase import AcBase
 5 | 
 6 | 
 7 | class AcVideo(AcBase):
 8 | 
 9 |     name = 'AcFun 弹幕视频网'
10 | 
11 |     def get_page_info(self, html):
12 |         pageInfo = json.loads(match1(html, '(?:pageInfo|videoInfo) = ({.+?});'))
13 |         videoList = pageInfo['videoList']
14 |         videoInfo = pageInfo.get('currentVideoInfo')
15 |         assert videoInfo, bgmInfo.get('playErrorMessage') or "can't play this video!!"
16 | 
17 |         title = pageInfo['title']
18 |         sub_title = videoInfo['title']
19 |         artist = pageInfo['user']['name']
20 |         if sub_title not in ('noTitle', 'Part1', title) or len(videoList) > 1:
21 |             title = '{title} - {sub_title}'.format(**vars())
22 |         sourceVid = videoInfo['id']
23 | 
24 |         m3u8Info = videoInfo.get('playInfos')
25 |         if m3u8Info:
26 |             m3u8Info = m3u8Info[0]
27 |         else:
28 |             m3u8Info = videoInfo.get('ksPlayJson')
29 | 
30 |         return title, artist, sourceVid, m3u8Info
31 | 
32 |     def format_mid(self, mid):
33 |         assert fullmatch(mid, '(ac)?\d+')
34 |         mid = match1(mid, '(\d+)')
35 |         # force rebuild url for list index
36 |         self.url = 'https://www.acfun.cn/v/ac{mid}'.format(**vars())
37 |         return mid
38 | 
39 |     def prepare_mid(self):
40 |         return match1(self.url, 'v/ac(\d+)', r'\bac=(\d+)')
41 | 
42 |     def prepare_list(self):
43 |         html = get_content(self.url)
44 |         videos = ['https://www.acfun.cn' + path for path in
45 |                   matchall(html, 'href=[\'"](/v/ac[0-9_]+)[\'"] title=[\'"]')]
46 |         self.set_index(self.url, videos)
47 |         return videos
48 | 
49 | site = AcVideo()
50 | 


--------------------------------------------------------------------------------
/ykdl/extractors/baomihua.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class Baomihua(Extractor):
 7 |     # https://www.baomihua.com/
 8 |     name = '爆米花（Baomihua)'
 9 | 
10 |     def prepare_mid(self):
11 |         return match1(self.url, '_(\d+)', 'm/(\d+)', 'v/(\d+)')
12 | 
13 |     def prepare(self):
14 |         info = MediaInfo(self.name)
15 | 
16 |         add_header('Referer', 'https://m.mideo.baomihua.com/')
17 |         data = get_response('https://play.baomihua.com/getvideourl.aspx',
18 |                             params={
19 |                                 'flvid': self.mid,
20 |                                 'datatype': 'json',
21 |                                 'devicetype': 'wap'
22 |                             }).json()
23 | 
24 |         info.title = data['title']
25 |         host = data['host']
26 |         stream_name = data['stream_name']
27 |         t = data['videofiletype']
28 |         size = int(data['videofilesize'])
29 | 
30 |         hls = data['ishls']
31 |         url = 'http://{host}/{hls}/{stream_name}.{t}'.format(**vars())
32 |         info.streams['current'] = {
33 |             'container': t,
34 |             'profile': 'current',
35 |             'src' : [url],
36 |             'size': size
37 |         }
38 |         return info
39 | 
40 | site = Baomihua()
41 | 


--------------------------------------------------------------------------------
/ykdl/extractors/bilibili/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .util import *
 5 | 
 6 | def get_extractor(url):
 7 |     install_cookie()
 8 |     add_header('Referer', 'https://www.bilibili.com/')
 9 | 
10 |     if 'live.bilibili' in url:
11 |         from . import live as s
12 |         return s.site, url
13 |     elif 'vc.bilibili' in url:
14 |         from . import vc as s
15 |         return s.site, url
16 |     elif '/bangumi/' in url:
17 |         from . import bangumi as s
18 |         return s.site, url
19 | 
20 |     page_index = match1(url, '(?:page|\?p)=(\d+)', 'index_(\d+)\.') or '1'
21 | 
22 |     bv_id = match1(url, r'\b((?:BV|bv)[0-9A-Za-z]{10})')
23 |     if not bv_id:
24 |         av_id = match1(url, r'\b(?:av|aid=)(\d+)')
25 |         if av_id:
26 |             bv_id = av2bv(av_id)
27 | 
28 |     if bv_id:
29 |         data = get_media_data(bv_id)
30 |         forward = data.get('forward')
31 |         if forward:
32 |             from .video import site
33 |             forward = av2bv(forward)
34 |             site.logger.warning('视频撞车了! 从 %s 跳转至首发 %s', bv_id, forward)
35 |             bv_id = forward
36 |             data = get_media_data(bv_id)
37 |         url = data.get('redirect_url') or \
38 |               'https://www.bilibili.com/video/{bv_id}/'.format(**vars())
39 |     else:
40 |         url = get_location(url)
41 | 
42 |     if '/bangumi/' in url:
43 |         from . import bangumi as s
44 |     else:
45 |         if page_index > '1':
46 |             url = '{url}?p={page_index}'.format(**vars())
47 |         from . import video as s
48 | 
49 |     return s.site, url
50 | 


--------------------------------------------------------------------------------
/ykdl/extractors/bilibili/bangumi.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .util import *
 5 | from .bilibase import BiliBase
 6 | 
 7 | 
 8 | APPKEY = '84956560bc028eb7'
 9 | SECRETKEY = '94aba54af9065f71de72f5508f1cd42e'
10 | api_url = 'https://bangumi.bilibili.com/player/web_api/v2/playurl'
11 | 
12 | ua_legacy = 'Mozilla/5.0 (X11; Linux x86_64; rv:50.1) Gecko/20100101 Firefox/50.1'
13 | 
14 | class BiliBan(BiliBase):
15 |     name = '哔哩哔哩 番剧 (Bilibili Bangumi)'
16 | 
17 |     def list_only(self):
18 |         return '/play/ss' in self.url
19 | 
20 |     def get_page_info(self, info):
21 |         html = get_content(self.url, headers={'User-Agent': ua_legacy})
22 |         data = json.loads(match1(html, '__INITIAL_STATE__=({.+?});'))
23 | 
24 |         epInfo = data['epInfo']
25 |         assert epInfo['epStatus'] != 13, "can't play VIP video!"
26 | 
27 |         self.mid = epInfo['cid']
28 |         mediaInfo = data['mediaInfo']
29 |         self.seasonType = mediaInfo['ssType']
30 |         ssTypeFormat = mediaInfo['ssTypeFormat']
31 |         ss_name = ssTypeFormat['name']
32 |         ss_name_e = ssTypeFormat['homeLink'].split('/')[-2].title()
33 |         if ss_name_e != 'Anime':
34 |             if ss_name_e == 'Tv':
35 |                 ss_name_e = 'TV'
36 |             info.site = '哔哩哔哩 {ss_name} (Bilibili {ss_name_e})'.format(**vars())
37 | 
38 |         def get_badge():
39 |             stype = epInfo['sectionType']
40 |             if stype:
41 |                 for s in data['sections']:
42 |                     if s['type'] == stype:
43 |                         return s['title']
44 |             else:
45 |                 return epInfo['badge']
46 | 
47 |         title_h1 = data['h1Title']
48 |         title_share = epInfo['share_copy']
49 |         title = title_h1 in title_share and title_share or title_h1
50 |         badge = get_badge()
51 |         if badge != '预告':
52 |             badge = ''
53 |         info.title = '{title} {badge}'.format(**vars())
54 |         info.artist = mediaInfo.get('upInfo', {}).get('name') or \
55 |                       mediaInfo.get('up_info', {}).get('uname')
56 |         info.duration = epInfo['duration'] // 1000
57 | 
58 |     def get_api_url(self, qn):
59 |         params = {
60 |             'appkey': APPKEY,
61 |             'cid': self.mid,
62 |             'module': 'bangumi',
63 |             'platform': 'html5',
64 |             'player': 1,
65 |             'qn': qn,
66 |             'season_type': self.seasonType
67 |         }
68 |         return sign_api_url(api_url, params, SECRETKEY)
69 | 
70 |     def prepare_list(self):
71 |         html = get_content(self.url, headers={'User-Agent': ua_legacy})
72 |         data = json.loads(match1(html, '__INITIAL_STATE__=({.+?});'))
73 |         epid = data['epInfo']['id']
74 |         eplist = sum((s['epList'] for s in data['sections']), data['epList'])
75 |         epids = [ep['id'] for ep in eplist if ep['epStatus'] != 13]
76 | 
77 |         skiped = len(eplist) - len(epids)
78 |         if skiped:
79 |             self.logger.info('skiped %d VIP videos', skiped)
80 |         assert epids, "can't play VIP videos!"
81 | 
82 |         self.set_index(epid, epids)
83 |         for id in epids:
84 |             yield 'https://www.bilibili.com/bangumi/play/ep{id}'.format(**vars())
85 | 
86 | site = BiliBan()
87 | 


--------------------------------------------------------------------------------
/ykdl/extractors/bilibili/bilibase.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class BiliBase(Extractor):
 7 |     format_2_type_profile = {
 8 |         'hdflv2': ('4K', '超清 4K'),       #120 IGNORE
 9 |        'flv_p60': ('BD', '高清 1080P60'),  #116 IGNORE
10 |            'flv': ('BD', '高清 1080P'),    #80
11 |     'flv720_p60': ('TD', '高清 720P'),     #74  IGNORE
12 |         'flv720': ('TD', '高清 720P'),     #64
13 |          'hdmp4': ('TD', '高清 720P'),     #48
14 |         'flv480': ('HD', '清晰 480P'),     #32
15 |            'mp4': ('SD', '流畅 360P'),     #16
16 |         'flv360': ('SD', '流畅 360P'),     #15
17 |         }
18 | 
19 |     def prepare(self):
20 |         info = MediaInfo(self.name)
21 |         info.extra.referer = 'https://www.bilibili.com/'
22 |         info.extra['ua'] = fake_headers['User-Agent']
23 | 
24 |         self.get_page_info(info)
25 | 
26 |         def get_video_info(qn=0):
27 |             # need login with high qn
28 |             if qn == 74 or qn > 80:
29 |                 return
30 | 
31 |             api_url = self.get_api_url(qn)
32 |             data = get_response(api_url).xml()['root']
33 |             assert data['result'] == 'suee', '{}: {}, {}'.format(
34 |                                   data['result'], data['code'], data['message'])
35 | 
36 |             durl = data['durl']
37 |             urls = []
38 |             size = 0
39 |             for d in durl:
40 |                 urls.append(d['url'])
41 |                 size += d['size']
42 |             fmt = data['format']
43 |             if 'mp4' in fmt:
44 |                 ext = 'mp4'
45 |             elif 'flv' in fmt:
46 |                 ext = 'flv'
47 |             st, prf = self.format_2_type_profile[fmt]
48 |             if urls:
49 |                 st += '-' + str(data['quality'])
50 |                 info.streams[st] = {
51 |                     'container': ext,
52 |                     'profile': prf,
53 |                     'src' : urls,
54 |                     'size': size
55 |                 }
56 | 
57 |             if qn == 0:
58 |                 aqlts = data['accept_quality'].split(',')
59 |                 aqlts.remove(str(data['quality']))
60 |                 for aqlt in aqlts:
61 |                     get_video_info(int(aqlt))
62 | 
63 |         get_video_info()
64 | 
65 |         assert info.streams, "can't play this video!!"
66 |         return info
67 | 


--------------------------------------------------------------------------------
/ykdl/extractors/bilibili/idconvertor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | '''Bilibili VID convertor, AV <=> BV.
 4 | 
 5 | Origin by mcft:
 6 |   https://www.zhihu.com/question/381784377/answer/1099438784
 7 | 
 8 | Modified by SeaHOH
 9 | '''
10 | 
11 | 
12 | __all__ = ['bv2av', 'av2bv']
13 | 
14 | tablec = list('fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF')
15 | tablei = {c: i for i, c in enumerate(tablec)}
16 | bvtl = list('BV1**4*1*7**')
17 | bvco = [9, 8, 1, 6, 2, 4]  # av >= 29460791296 ( 2^35 - 2^32 - 2^29 - 2^26 )
18 |                            # ? [9, 8, 1, 6, 2, 4, 0, 7, 3, 5]
19 |                            # ? [9, 8, 1, 6, 2, 4, 5, 7, 3, 0]
20 | xor = 177451812
21 | add = []
22 | _d = 100618342136696320
23 | while _d:
24 |     _d, _m = divmod(_d, 58)
25 |     add.append(_m)
26 | 
27 | 
28 | def bv2av(bv):
29 |     r = 0
30 |     x = list(bv[-10:])
31 |     for p, i in enumerate(bvco):
32 |         r += (tablei[x[i]] - add[p]) * 58 ** p
33 |     return str(r ^ xor)
34 | 
35 | def av2bv(av):
36 |     if isinstance(av, str):
37 |         av = av.lstrip('av')
38 |     r = bvtl.copy()
39 |     x = int(av) ^ xor
40 |     for p, i in enumerate(bvco):
41 |         x, m = divmod(x + add[p], 58)
42 |         r[i + 2] = tablec[m]
43 |     return ''.join(r)
44 | 
45 | 


--------------------------------------------------------------------------------
/ykdl/extractors/bilibili/util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .idconvertor import *
 5 | 
 6 | 
 7 | __all__ = ['av2bv', 'sign_api_url', 'get_media_data']
 8 | 
 9 | def sign_api_url(api_url, params, skey):
10 |     params = sorted(params.items())
11 |     params.append(('sign', hash.md5(urlencode(params) + skey)))
12 |     params_str = urlencode(params)
13 |     return '{api_url}?{params_str}'.format(**vars())
14 | 
15 | def get_media_data(bvid):
16 |     data = get_response('https://api.bilibili.com/x/web-interface/view',
17 |                         params={'bvid': bvid}).json()
18 |     assert data['code'] == 0, "can't play this video!!"
19 |     return data['data']
20 | 


--------------------------------------------------------------------------------
/ykdl/extractors/bilibili/vc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class BiliVC(Extractor):
 7 |     name = '哔哩哔哩 小视频 (Bili VC)'
 8 | 
 9 |     def prepare(self):
10 |         return match1(self.url, 'video/(\d+)')
11 | 
12 |     def prepare(self):
13 |         info = MediaInfo(self.name)
14 | 
15 |         video_data = get_response(
16 |                 'https://api.vc.bilibili.com/clip/v1/video/detail',
17 |                 params={'video_id': self.mid}).json()
18 | 
19 |         info.title = video_data['data']['item']['description']
20 |         info.artist = video_data['data']['user']['name']
21 | 
22 |         info.streams['current'] = {
23 |             'container': 'mp4',
24 |             'profile': 'current',
25 |             'src' : [video_data['data']['item']['video_playurl']],
26 |             'size': int(video_data['data']['item']['video_size'])
27 |         }
28 | 
29 |         return info
30 | 
31 | site = BiliVC()
32 | 
33 | 


--------------------------------------------------------------------------------
/ykdl/extractors/bilibili/video.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .util import *
 5 | from .bilibase import BiliBase
 6 | 
 7 | 
 8 | APPKEY = 'iVGUTjsxvpLeuDCf'
 9 | SECRETKEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
10 | api_url = 'https://interface.bilibili.com/v2/playurl'
11 | 
12 | class BiliVideo(BiliBase):
13 |     name = '哔哩哔哩 (Bilibili)'
14 | 
15 |     def get_page_info(self, info):
16 |         page_index = match1(self.url, '\?p=(\d+)', 'index_(\d+)\.') or '1'
17 |         html = get_content(self.url)
18 |         data = match1(html, '__INITIAL_STATE__=({.+?});')
19 |         self.logger.debug('data:\n%s', data)
20 |         data = json.loads(data)['videoData']
21 |         title = data['title']
22 |         pages = data['pages']
23 |         for page in pages:
24 |             index = str(page['page'])
25 |             subtitle = page['part']
26 |             if index == page_index:
27 |                 self.mid = page['cid']
28 |                 if len(pages) > 1:
29 |                     title = '{title} - {index} - {subtitle}'.format(**vars())
30 |                 elif subtitle and subtitle != title:
31 |                     title = '{title} - {subtitle}'.format(**vars())
32 |                 info.duration = page['duration']
33 |                 break
34 |         info.title = title
35 |         info.artist = data['owner']['name']
36 |         info.add_comment(data['tname'])
37 | 
38 |     def get_api_url(self, qn):
39 |         params = {
40 |             'appkey': APPKEY,
41 |             'cid': self.mid,
42 |             'platform': 'html5',
43 |             'player': 0,
44 |             'qn': qn
45 |         }
46 |         return sign_api_url(api_url, params, SECRETKEY)
47 | 
48 |     def prepare_list(self):
49 |         vid = match1(self.url, '/(av\d+|(?:BV|bv)[0-9A-Za-z]{10})')
50 |         if vid[:2] == 'av':
51 |             vid = av2bv(vid)
52 |         data = get_media_data(vid)
53 | 
54 |         if 'ugc_season' in data:
55 |             bvids = [episode['bvid'] for episode in
56 |                         sum((section['episodes'] for section in
57 |                             data['ugc_season']['sections']), [])]
58 |             self.set_index(vid, bvids)
59 |             for bvid in bvids:
60 |                 yield 'https://www.bilibili.com/video/{bvid}/'.format(**vars())
61 | 
62 |         else:
63 |             page = int(match1(self.url, '[^a-z]p(?:age)?=(\d+)',
64 |                                         'index_(\d+)\.')
65 |                        or '1')
66 |             self.set_index(page, data['videos'])
67 |             for p in range(data['videos']):
68 |                 p = p + 1
69 |                 yield 'https://www.bilibili.com/video/{vid}/?p={p}'.format(**vars())
70 | 
71 | site = BiliVideo()
72 | 


--------------------------------------------------------------------------------
/ykdl/extractors/cctv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class CNTV(Extractor):
 7 |     name = '央视网 (CNTV)'
 8 | 
 9 |     supported_chapters = [
10 |         ['chapters6',   'BD', '超高清 1080P'],
11 |         ['chapters5',   'TD', '超高清 720P'],
12 |         ['chapters4',   'TD', '超清'],
13 |         ['chapters3',   'HD', '高清'],
14 |         ['chapters2',   'SD', '标清'],
15 |         ['lowChapters', 'LD', '流畅']]
16 | 
17 |     def prepare_mid(self):
18 |         mid = match1(self.url, '(?:guid|videoCenterId)=(\w+)',
19 |                                '(\w+)/index\.shtml')
20 |         if mid is None:
21 |             html = get_content(self.url)
22 |             mid = match1(html, 'guid\s*=\s*[\'"]([^\'"]+)',
23 |                                '"videoCenterId","([^"]+)',
24 |                                'initMyAray\s*=\s*[\'"]([^\'"]+)')
25 |         return mid
26 | 
27 |     def prepare(self):
28 |         info = MediaInfo(self.name)
29 | 
30 |         data = get_response(
31 |             'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do',
32 |             params={
33 |                 'pid': self.mid,
34 |                 'tsp': int(time.time()),
35 |                 'vn' : 2054,
36 |                 'pcv': 152438790
37 |             }).json()
38 | 
39 |         info.title = '{} - {}'.format(data['title'], data['play_channel'])
40 | 
41 |         video_data = data['video']
42 |         for chapters, stream_id, stream_profile in self.supported_chapters:
43 |             stream_data = video_data.get(chapters)
44 |             if stream_data:
45 |                 urls = []
46 |                 for v in stream_data:
47 |                    urls.append(v['url'])
48 |                 info.streams[stream_id] = {
49 |                     'container': 'mp4',
50 |                     'profile': stream_profile,
51 |                     'src': urls
52 |                 }
53 |         return info
54 | 
55 | site = CNTV()
56 | 


--------------------------------------------------------------------------------
/ykdl/extractors/douban/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | def get_extractor(url):
 5 |     if 'music.douban' in url and '/subject/' not in url or 'site.douban' in url:
 6 |         from . import music as s
 7 |         return s.site, url
 8 | 
 9 |     if 'movie.douban' in url and ('/trailer' in url or '/video' in url):
10 |         from . import movie as s
11 |         return s.site, url
12 | 
13 |     raise NotImplementedError(url)
14 | 


--------------------------------------------------------------------------------
/ykdl/extractors/douban/movie.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class DoubanMovie(Extractor):
 7 |     name = 'Douban movie (豆瓣电影)'
 8 | 
 9 |     def prepare(self):
10 |         info = MediaInfo(self.name)
11 |         html = get_content(self.url)
12 |         info.title = match1(html, '<meta name="description" content='
13 |                                   '"(.+?) 在线观看"/>')
14 |         url = match1(html,'"embedUrl": "(.+?)"')
15 | 
16 |         info.streams['current'] = {
17 |             'container': 'mp4',
18 |             'profile': 'current',
19 |             'src': [url]
20 |         }
21 |         return info
22 | 
23 |     def list_only(self):
24 |         return '/subject/' in self.url
25 | 
26 |     def prepare_list(self):
27 |         html = get_content(self.url)
28 |         return matchall(html, '<a class="pr-video" href="'
29 |                               '(https://movie.douban.com/trailer/\d+/)#')
30 | 
31 | site = DoubanMovie()
32 | 


--------------------------------------------------------------------------------
/ykdl/extractors/douban/music.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | def get_info_list(sids):
 7 |     if not sids: return
 8 | 
 9 |     data = get_response('https://music.douban.com/j/artist/playlist',
10 |                         data={
11 |                             'source' : '',
12 |                             'sids' : sids,
13 |                             'ck' : ''
14 |                         }).json()
15 | 
16 |     for song in data['songs']:
17 |         info = MediaInfo(site.name)
18 |         artist = song['artist']
19 |         info.title = song['title']
20 |         info.artist = artist['name']
21 |         info.duration = song['play_length']
22 |         info.add_comment(song['label'])
23 |         info.add_comment(artist['style'])
24 |         info.extra.referer = artist['url']
25 |         info.streams['current'] = {
26 |             'container': 'mp3',
27 |             'profile': 'current',
28 |             'src': [(artist['picture'], song['url'])],
29 |         }
30 |         yield info
31 | 
32 | class DoubanMusic(Extractor):
33 |     name = 'Douban Music (豆瓣音乐)'
34 | 
35 |     def prepare_mid(self):
36 |         return match1(self.url, 's(?:id)?=(\d+)')
37 | 
38 |     def prepare(self):
39 |         return next(get_info_list(self.mid))
40 | 
41 |     def list_only(self):
42 |         return 'site.douban' in self.url and not match(self.url, 's=\d+') or \
43 |                 match(self.url, 'sid=\d+,\d')
44 | 
45 |     def prepare_list(self):
46 |         if 'site.douban' in self.url:
47 |             sids = matchall(get_content(self.url), 'sid="(\d+)"')
48 |         else:
49 |             sids = matchall(match1(self.url, 'sid=([\d,]+)') or '', '(\d+)')
50 | 
51 |         sids, osids = [], sids
52 |         for sid in osids:
53 |             if sid not in sids:
54 |                 sids.append(sid)
55 |         self.set_index(None, sids)
56 |         return get_info_list(','.join(sids))
57 | 
58 | site = DoubanMusic()
59 | 


--------------------------------------------------------------------------------
/ykdl/extractors/douyin/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | def get_extractor(url):
 7 |     if '/v.' in url or 'iesdouyin.' in url:
 8 |         url = get_location(url)
 9 |     if '/live.' in url or 'amemv.com' in url:
10 |         from . import live as s
11 |     else:
12 |         from . import video as s
13 | 
14 |     return s.site, url
15 | 


--------------------------------------------------------------------------------
/ykdl/extractors/douyin/live.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .. import _byted
 5 | 
 6 | 
 7 | class Douyin(Extractor):
 8 |     name = '抖音直播 (Douyin)'
 9 | 
10 |     quality_2_profile_id = {
11 |          'ORIGION': ['原画', 'OG'],
12 |         'FULL_HD1': ['蓝光', 'BD'],
13 |              'HD1': ['超清', 'TD'],
14 |              'SD1': ['高清', 'HD'],
15 |              'SD2': ['标清', 'SD']
16 |      }
17 | 
18 |     def prepare(self):
19 |         info = MediaInfo(self.name, True)
20 | 
21 |         if 'amemv.com' in self.url:
22 |             data = get_response('https://webcast.amemv.com/webcast/room/reflow/info/',
23 |                                 params={
24 |                                     'verifyFp': '',
25 |                                      'type_id': 0,
26 |                                      'live_id': 1,
27 |                                  'sec_user_id': '',
28 |                                       'app_id': 1128,
29 |                                      'msToken': '',
30 |                                      'X-Bogus': '',  # 1
31 |                                      'room_id': match1(self.url, '/reflow/(\d+)')
32 |                                 }).json()
33 |             video_info = data['data'].get('room')
34 |         else:
35 |             html = _byted.get_content(self.url)
36 |             data = matchall(html, 'self.__pace_f.push\(\[\d,("[a-z]:.+?")\]\)</script>')[-1]
37 |             data = json.loads(data)
38 |             self.logger.debug('data: \n%s', data)
39 |             data = json.loads(match1(data, '(\[.+\])'))[-1]
40 | 
41 |             try:
42 |                 video_info = data['state']['roomStore']['roomInfo'].get('room')
43 |             except KeyError:
44 |                 video_info = data['/webcast/reflow/:id'].get('room')
45 | 
46 |         assert video_info and video_info['status'] == 2, 'live is off!!!'
47 | 
48 |         title = video_info['title']
49 |         info.artist = nickName = video_info['owner']['nickname']
50 |         info.title = '{title} - {nickName}'.format(**vars())
51 | 
52 |         stream_info = video_info['stream_url']
53 |         stream_urls = []
54 |         if 'flv_pull_url' in stream_info:
55 |             for ql, url in stream_info['flv_pull_url'].items():
56 |                 stream_urls.append(['flv', ql, url])
57 |             orig = stream_info.get('rtmp_pull_url')
58 |             if orig and orig not in stream_info['flv_pull_url'].values():
59 |                 stream_urls.append(['flv', 'ORIGION', orig])
60 |         if 'hls_pull_url_map' in stream_info:
61 |             for ql, url in stream_info['hls_pull_url_map'].items():
62 |                 stream_urls.append(['m3u8', ql, url])
63 |             orig = stream_info.get('hls_pull_url')
64 |             if orig and orig not in stream_info['hls_pull_url_map'].values():
65 |                 stream_urls.append(['m3u8', 'ORIGION', orig])
66 | 
67 |         for ext, ql, url in stream_urls:
68 |             if not url:
69 |                 continue
70 |             stream_profile, stream_id = self.quality_2_profile_id[ql]
71 |             info.streams[stream_id + '-' + ext[:3]] = {
72 |                 'container': ext,
73 |                 'profile': stream_profile,
74 |                 'src' : [url],
75 |                 'size': Infinity
76 |             }
77 | 
78 |         return info
79 | 
80 | site = Douyin()
81 | 


--------------------------------------------------------------------------------
/ykdl/extractors/douyin/video.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .._byted import generate_mstoken, sign_xbogus
 5 | 
 6 | 
 7 | class Douyin(Extractor):
 8 |     name = '抖音 (Douyin)'
 9 | 
10 |     def prepare_mid(self):
11 |         return match1(self.url, r'\b(?:video/|music/|note/|vid=|aweme_id=|item_ids=)(\d+)')
12 | 
13 |     def prepare(self):
14 |         info = MediaInfo(self.name)
15 | 
16 |         ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
17 |         params = {
18 |             'aweme_id': self.mid,
19 |             'aid': 6383,
20 |             'version_name': '23.5.0',
21 |             'device_platform': 'webapp',
22 |             'os_version': 10
23 |         }
24 |         params['X-Bogus'] = sign_xbogus(urlencode(params), ua)
25 |         data = get_response('https://www.douyin.com/aweme/v1/web/aweme/detail/',
26 |                             params=params,
27 |                             headers={
28 |                                 'User-Agent': ua,
29 |                                 'Cookie': {'msToken': generate_mstoken()},
30 |                                 'Referer': 'https://www.douyin.com/'
31 |                             }).json()
32 |         assert data['status_code'] == 0, data['status_msg']
33 |         assert data['aweme_detail'], data['filter_detail']
34 | 
35 |         data = data['aweme_detail']
36 |         aweme_type = data['aweme_type']
37 |         # TikTok [0, 51, 55, 58, 61, 150]
38 |         if aweme_type not in [2, 68, 150, 0, 4, 51, 55, 58, 61]:
39 |             print('new type', aweme_type)
40 |         music_image = aweme_type in [2, 68, 150]  # video [0, 4, 51, 55, 58, 61]
41 |         title = data['desc']
42 |         nickName = data['author'].get('nickname', '')
43 |         uid = data['author'].get('unique_id') or \
44 |                 data['author']['short_id']
45 | 
46 |         info.title = '{title} - {nickName}(@{uid})'.format(**vars())
47 |         info.artist = nickName
48 |         info.duration = data['duration'] // 1000
49 | 
50 |         ext = 'mp4'
51 |         url = data['video']['play_addr']['url_list'][0] \
52 |                         .replace('playwm', 'play')
53 |         if music_image or 'music' in url:
54 |             ext = 'mp3'
55 |             url = data['video']['cover']['url_list'][0], url
56 |         info.streams['current'] = {
57 |             'container': ext,
58 |             'profile': data['video']['ratio'].upper(),
59 |             'src': [url]
60 |         }
61 |         return info
62 | 
63 | site = Douyin()
64 | 


--------------------------------------------------------------------------------
/ykdl/extractors/douyu/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | def get_extractor(url):
 5 |     if 'v.douyu' in url or 'vmobile.douyu' in url:
 6 |         from . import video as s
 7 |     else:
 8 |         from . import live as s
 9 | 
10 |     return s.site, url
11 | 


--------------------------------------------------------------------------------
/ykdl/extractors/douyu/live.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from .._common import *
  4 | from .util import get_h5enc, ub98484234
  5 | 
  6 | 
  7 | class Douyutv(Extractor):
  8 |     name = '斗鱼直播 (DouyuTV)'
  9 | 
 10 |     profile_2_id = {
 11 |         '原画':    'OG',
 12 |         '蓝光10M': 'BD10M',
 13 |         '蓝光8M':  'BD8M',
 14 |         '蓝光4M':  'BD4M',
 15 |         '蓝光':    'BD',
 16 |         '超清':    'TD',
 17 |         '高清':    'HD',
 18 |         '流畅':    'SD'
 19 |      }
 20 | 
 21 |     def prepare_mid(self):
 22 |         html = get_content(self.url)
 23 |         mid = match1(html, '\$ROOM\.room_id\s*=\s*(\d+)',
 24 |                            'room_id\s*=\s*(\d+)',
 25 |                            '"room_id.?":(\d+)',
 26 |                            'data-onlineid=(\d+)',
 27 |                            '(房间已被关闭)')
 28 |         assert mid != '房间已被关闭', '房间已被关闭'
 29 |         return mid
 30 | 
 31 |     def prepare(self):
 32 |         info = MediaInfo(self.name, True)
 33 | 
 34 |         add_header('Referer', 'https://www.douyu.com')
 35 |         html = get_content(self.url)
 36 | 
 37 |         title = match1(html, 'Title-head\w*">([^<]+)<')
 38 |         artist = match1(html, 'Title-anchorName\w*" title="([^"]+)"')
 39 |         if not title or not artist:
 40 |             room_data = get_response(
 41 |                     'https://open.douyucdn.cn/api/RoomApi/room/' + self.mid
 42 |                     ).json()
 43 |             if room_data['error'] == 0:
 44 |                 room_data = room_data['data']
 45 |                 title = room_data['room_name']
 46 |                 artist = room_data['owner_name']
 47 | 
 48 |         info.title = '{title} - {artist}'.format(**vars())
 49 |         info.artist = artist
 50 | 
 51 |         js_enc = get_h5enc(html, self.mid)
 52 |         params = {
 53 |             'cdn': '',
 54 |             'iar': 0,
 55 |             'ive': 0,
 56 |         }
 57 |         ub98484234(js_enc, self.mid, self.logger, params)
 58 | 
 59 |         def get_live_info(rate=0):
 60 |             params['rate'] = rate
 61 |             live_data = get_response(
 62 |                         'https://www.douyu.com/lapi/live/getH5Play/' + self.mid,
 63 |                         data=params).json()
 64 |             if live_data['error']:
 65 |                 return live_data['msg']
 66 | 
 67 |             live_data = live_data['data']
 68 |             real_url = '/'.join([live_data['rtmp_url'], live_data['rtmp_live']])
 69 |             rate_2_profile = {rate['rate']: rate['name']
 70 |                               for rate in live_data['multirates']}
 71 |             stream_profile = rate_2_profile[live_data['rate']]
 72 |             if '原画' in stream_profile:
 73 |                 stream_id = 'OG'
 74 |             else:
 75 |                 stream_id = self.profile_2_id[stream_profile]
 76 |             info.streams[stream_id] = {
 77 |                 'container': match1(live_data['rtmp_live'], '\.(\w+)\?'),
 78 |                 'profile': stream_profile,
 79 |                 'src' : [real_url],
 80 |                 'size': Infinity
 81 |             }
 82 | 
 83 |             error_msges = []
 84 |             if rate == 0:
 85 |                 rate_2_profile.pop(0, None)
 86 |                 rate_2_profile.pop(live_data['rate'], None)
 87 |                 for rate in rate_2_profile:
 88 |                     error_msg = get_live_info(rate)
 89 |                     if error_msg:
 90 |                         error_msges.append(error_msg)
 91 |             if error_msges:
 92 |                 return ', '.join(error_msges)
 93 | 
 94 |         error_msg = get_live_info()
 95 |         if error_msg:
 96 |             self.logger.debug('error_msg:\n\t' + error_msg)
 97 | 
 98 |         return info
 99 | 
100 |     def prepare_list(self):
101 |         html = get_content(self.url)
102 |         return matchall(html, 'class="hroom_id" value="([^"]+)',
103 |                               'data-room_id="([^"]+)')
104 | 
105 | site = Douyutv()
106 | 


--------------------------------------------------------------------------------
/ykdl/extractors/douyu/util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | assert JSEngine, "No JS Interpreter found, can't extract douyu live/video!"
 6 | 
 7 | 
 8 | # REF: https://cdnjs.com/libraries/crypto-js
 9 | js_md5 = get_pkgdata_str(__name__, 'crypto-js-md5.min.js',
10 |         'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.9-1/crypto-js.min.js')
11 | 
12 | def get_h5enc(html, rid):
13 |     js_enc = match1(html, '(var vdwdae325w_64we =[\s\S]+?)\s*</script>')
14 |     if js_enc is None or 'ub98484234(' not in js_enc:
15 |         data = get_response('https://www.douyu.com/swf_api/homeH5Enc',
16 |                             params={'rids': rid}).json()
17 |         assert data['error'] == 0, data['msg']
18 |         js_enc = data['data']['room' + rid]
19 |     return js_enc
20 | 
21 | def ub98484234(js_enc, rid, logger, params):
22 |     names_dict = {
23 |         'debugMessages': get_random_name(8),
24 |         'decryptedCodes': get_random_name(8),
25 |         'patchCode': get_random_name(8),
26 |         'resoult': get_random_name(8),
27 |         '_ub98484234': get_random_name(8),
28 |         'workflow': match1(js_enc, 'function ub98484234\(.+?\Weval\((\w+)\);'),
29 |     }
30 |     js_dom = '''
31 |     {debugMessages} = {{{decryptedCodes}: []}};
32 |     if (!this.window) {{window = {{}};}}
33 |     if (!this.document) {{document = {{}};}}
34 |     '''.format(**names_dict)
35 |     js_patch = ['''
36 |     function {patchCode}(workflow) {{
37 |         let testVari = /(\w+)=(\w+)\([\w\+]+\);.*?(\w+)="\w+";/.exec(workflow);
38 |         if (testVari && testVari[1] == testVari[2]) {{
39 |             workflow += `${{testVari[1]}}[${{testVari[3]}}] = function() {{return true;}};`;
40 |         }}
41 |         let subWorkflow = /(?:\w+=)?eval\((\w+)\)/.exec(workflow);
42 |         if (subWorkflow) {{
43 |             let subPatch = `
44 |                 {debugMessages}.{decryptedCodes}.push('sub workflow: ' + subWorkflow);
45 |                 subWorkflow = {patchCode}(subWorkflow);
46 |             `.replace(/subWorkflow/g, subWorkflow[1]) + subWorkflow[0];
47 |             workflow = workflow.replace(subWorkflow[0], subPatch);
48 |         }}
49 |         return workflow;
50 |     }}
51 |     '''.format(**names_dict), '''
52 |     {debugMessages}.{decryptedCodes}.push({workflow});
53 |     eval({patchCode}({workflow}));
54 |     '''.format(**names_dict)]
55 |     js_debug = '''
56 |     var {_ub98484234} = ub98484234;
57 |     ub98484234 = function(p1, p2, p3) {{
58 |         try {{
59 |             let resoult = {_ub98484234}(p1, p2, p3);
60 |             {debugMessages}.{resoult} = resoult;
61 |         }} catch(e) {{
62 |             {debugMessages}.{resoult} = e.message;
63 |         }}
64 |         return {debugMessages};
65 |     }};
66 |     '''.format(**names_dict)
67 | 
68 |     js_ctx = JSEngine()
69 |     js_ctx.append(js_md5)
70 |     js_ctx.append(js_dom)
71 |     if names_dict['workflow']:
72 |         js_ctx.append(js_patch[0])
73 |         js_ctx.append(js_enc.replace('eval({workflow});'.format(**names_dict), js_patch[1]))
74 |     else:
75 |         js_ctx.append(js_enc)
76 |     js_ctx.append(js_debug)
77 | 
78 |     did = get_random_uuid_hex()
79 |     tt = str(int(time.time()))
80 |     ub98484234 = js_ctx.call('ub98484234', rid, did, tt)
81 |     ub98484234 = {
82 |         'decryptedCodes': ub98484234[names_dict['decryptedCodes']],
83 |         'resoult': ub98484234[names_dict['resoult']]
84 |     }
85 |     logger.debug('ub98484234: %s', ub98484234)
86 |     ub98484234 = ub98484234['resoult']
87 |     params.update({
88 |         'v': match1(ub98484234, 'v=(\d+)'),
89 |         'did': did,
90 |         'tt': tt,
91 |         'sign': match1(ub98484234, 'sign=(\w{32})')
92 |     })
93 | 


--------------------------------------------------------------------------------
/ykdl/extractors/douyu/video.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .util import get_h5enc, ub98484234
 5 | 
 6 | 
 7 | class DouyutvVideo(Extractor):
 8 |     name = '斗鱼视频 (DouyuTV)'
 9 | 
10 |     profile_2_id = {
11 |          'super': 'OG',  # Need Login
12 |           'high': 'TD',
13 |         'normal': 'HD'
14 |     }
15 | 
16 |     def prepare_mid(self):
17 |         return match1(self.url, 'show/(\w+)')
18 | 
19 |     def prepare(self):
20 |         info = MediaInfo(self.name)
21 | 
22 |         if self.url is None or 'vmobile' in self.url:
23 |             self.url = 'https://v.douyu.com/show/' + self.mid
24 | 
25 |         html = get_content(self.url)
26 |         info.title = match1(html, 'title>(.+?)-斗鱼视频<')
27 |         vid = match1(html, '"point_id":\s?(\d+)')
28 |         assert vid, "can't find video!!!"
29 | 
30 |         js_enc = get_h5enc(html, vid)
31 |         params = {'vid': self.mid}
32 |         ub98484234(js_enc, vid, self.logger, params)
33 | 
34 |         add_header('Referer', self.url)
35 |         data = get_response('https://v.douyu.com/api/stream/getStreamUrl',
36 |                             {'Cookie': 'dy_did=' + params['did']},
37 |                             data=params).json()
38 |         assert data['error'] == 0, data
39 | 
40 |         for stream_profile, st_date in data['data']['thumb_video'].items():
41 |             if not st_date:
42 |                 continue
43 |             stream_id = self.profile_2_id[stream_profile]
44 |             info.streams[stream_id] = {
45 |                 'container': 'm3u8',
46 |                 'profile': stream_profile,
47 |                 'src': [st_date['url']]
48 |             }
49 | 
50 |         return info
51 | 
52 | site = DouyutvVideo()
53 | 


--------------------------------------------------------------------------------
/ykdl/extractors/generalembed.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from ._common import *
  4 | 
  5 | 
  6 | '''
  7 | refer to http://open.youku.com/tools
  8 | '''
  9 | youku_embed_patterns = [
 10 |     'youku\.com/v_show/id_([a-zA-Z0-9=]+)',
 11 |     'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf',
 12 |     'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)',
 13 |     'player\.youku\.com/embed/([a-zA-Z0-9=]+)',
 14 |     'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\'',
 15 |     'data-youku=\"[a-zA-Z0-9,:]+vid:([a-zA-Z0-9=]+)\"'
 16 | ]
 17 | 
 18 | '''
 19 | v.qq.com
 20 | '''
 21 | qq_embed_patterns = [
 22 |     'v\.qq\.com[a-zA-Z0-9\/\?\.\;]+vid=([a-zA-Z0-9]+)',
 23 |     'TPout\.swf[a-zA-Z0-9=\?\&_]+vid=([a-zA-Z0-9]+)'
 24 | ]
 25 | 
 26 | 
 27 | '''
 28 | tv.sohu.com
 29 | '''
 30 | sohu_embed_patterns = [
 31 |     'tv\.sohu\.com[a-zA-Z0-9\/\?=]+\&vid=([a-zA-Z0-9]+)\&',
 32 |     'share\.vrs\.sohu\.com\/my\/v.swf[&+=a-zA-z0-9]+&id=(\d+)',
 33 |     'my\.tv\.sohu\.com\/[a-zA-Z0-9\/]+/(\d+)'
 34 | ]
 35 | 
 36 | '''
 37 | Ku6
 38 | '''
 39 | ku6_embed_url = [
 40 |     '(http://v.ku6vms.com/[^\"]+)'
 41 | ]
 42 | 
 43 | ku6_embed_patterns = [
 44 |     'http://player.ku6.com/refer/(.*)/v.swf'
 45 | ]
 46 | '''
 47 | 163
 48 | '''
 49 | netease_embed_patterns = [
 50 |     'v\.163\.com\/[0-9a-zA-Z\/\?\.]+topicid=([^&]+)&amp\;vid=([^&]+)',
 51 |     'topicid=([a-zA-Z0-9]+)&amp;vid=([a-zA-Z0-9]+)&amp'
 52 | ]
 53 | 
 54 | '''
 55 | iqiyi
 56 | '''
 57 | iqiyi_embed_patterns = [
 58 |     'definitionID=([^&]+)&tvId=([^&]+)'
 59 | ]
 60 | 
 61 | '''
 62 | Letv Cloud
 63 | '''
 64 | lecloud_embed_patterns = [
 65 |     '{"uu":"([^\"]+)","vu":"([^\"]+)"',
 66 |     'bcloud.swf\?uu=([^&]+)&amp;vu=([^&]+)',
 67 |     'uu=([^&]+)&amp;vu=([^&]+)'
 68 | ]
 69 | 
 70 | '''
 71 | ifeng
 72 | '''
 73 | ifeng_embed_patterns = [
 74 |     'v\.ifeng\.com\/[a-zA-Z\=\/\?\&\.]+guid=([^\"&]+)'
 75 | ]
 76 | 
 77 | '''
 78 | weibo
 79 | '''
 80 | weibo_embed_patterns = [
 81 |     'http://video.weibo.com/player/1034:(\w{32})\w*'
 82 | ]
 83 | 
 84 | '''
 85 | Sina
 86 | '''
 87 | sina_embed_patterns = [
 88 |     'http://video.sina.com.cn/share/video/(\d+).swf'
 89 | ]
 90 | 
 91 | '''
 92 | Bilibili
 93 | '''
 94 | bilibili_embed_patterns = [
 95 |     'flashvars="aid=(\d+)'
 96 | ]
 97 | 
 98 | class GeneralEmbed(EmbedExtractor):
 99 |     name = 'GeneralEmbed (通用嵌入视频)'
100 | 
101 |     def prepare_playlist(self):
102 | 
103 |         def append_media_info(site, mid):
104 |             media_info = self.new_media_info({
105 |                 'site': site,
106 |                 'mid': mid
107 |             })
108 |             if media_info not in self.media_info_list:
109 |                 self.media_info_list.append(media_info)
110 | 
111 |         html = get_content(self.url)
112 | 
113 |         for mid in matchall(html, *youku_embed_patterns):
114 |             append_media_info('youku', mid)
115 | 
116 |         for mid in matchall(html, *qq_embed_patterns):
117 |             append_media_info('qq.video', mid)
118 | 
119 |         for mid in matchall(html, *sohu_embed_patterns):
120 |             append_media_info('sohu.my', mid)
121 | 
122 |         for url in matchall(html, *ku6_embed_url):
123 |             flashvars = matchall(get_content(url),'vid=([^&]+)',
124 |                                                   'style=([^&]+)',
125 |                                                   'sn=([^&]+)')
126 |             data = get_response(
127 |                     'http://v.ku6vms.com/phpvms/player/forplayer/vid/'
128 |                     '{}/style/{}/sn/{}'
129 |                     .format(*flashvars)).json()
130 |             mid = data['ku6vid']
131 |             append_media_info('ku6', mid)
132 | 
133 |         for mid in matchall(html, *ku6_embed_patterns):
134 |             append_media_info('ku6', mid)
135 | 
136 |         for mid in matchall(html, *netease_embed_patterns):
137 |             append_media_info('netease.video', mid)
138 | 
139 |         for mid in matchall(html, *iqiyi_embed_patterns):
140 |             append_media_info('iqiyi', mid)
141 | 
142 |         for mid in matchall(html, *lecloud_embed_patterns):
143 |             append_media_info('le.letvcloud', mid)
144 | 
145 |         for mid in matchall(html, *ifeng_embed_patterns):
146 |             append_media_info('ifeng.news', mid)
147 | 
148 |         for mid in matchall(html, *weibo_embed_patterns):
149 |             append_media_info('weibo', 'http://weibo.com/p/' + mid)
150 | 
151 |         for mid in matchall(html, *sina_embed_patterns):
152 |             append_media_info('sina.video', mid)
153 | 
154 |         for mid in matchall(html, *bilibili_embed_patterns):
155 |             append_media_info('bilibili.video', mid)
156 | 
157 |     parser = EmbedExtractor.parser_list
158 | 
159 | site = GeneralEmbed()
160 | 


--------------------------------------------------------------------------------
/ykdl/extractors/generalsimple.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from ._common import *
  4 | from .singlemultimedia import contentTypes
  5 | 
  6 | 
  7 | # TODO: subtitles support
  8 | # REF: https://developer.mozilla.org/zh-CN/docs/Web/API/WebVTT_API
  9 | 
 10 | pattern_ext = r'''(?ix)
 11 | ["'](
 12 |     (?:https?:|\\?/)[^"'#]+?\.
 13 |     (
 14 |         m3u8?                       | # HLS
 15 |         mpd                         | # DASH
 16 |         mp4|webm                    | # video/audio
 17 |         f4v|flv|ts                  | # video
 18 |         mov|qt|m4[pv]|og[mv]        | # video
 19 |         ogg|vid|3gp|mpe?g           | # video/audio
 20 |         mp3|flac|wave?|oga|aac|weba   # audio
 21 |     )
 22 |     /?(?:[\?&].+?)?
 23 | )["'#]
 24 | '''
 25 | pattern_src = r'''(?ix)
 26 | <(?:video|audio|source)[^>]+?
 27 | src=["']?((?:https?:|\\?/)[^"' ]+)["' ]
 28 | [^>]*?
 29 | (?:
 30 |     type=["']((?:video|audio|application)/[^"']+)["']
 31 |     |
 32 |     [^>](?!type)*>
 33 | )
 34 | '''
 35 | 
 36 | class GeneralSimple(Extractor):
 37 |     name = 'GeneralSimple (通用简单)'
 38 | 
 39 |     def list_only(self):
 40 |         return True
 41 | 
 42 |     def prepare_list(self):
 43 |         html = get_content(self.url)
 44 |         title = match1(html, '<meta property="og:title" content="([^"]+)',
 45 |                              '''video_title\s*[:=]\s*['"](.+?)['"],''',
 46 |                              '<title>(.+?)</title>')
 47 |         _title, domain = matchm(title, '(.+) [-|*] (.+)$')
 48 |         if domain and domain.lower() in self.url.split('/')[2]:
 49 |             title = _title
 50 | 
 51 |         streams = get_kt_playlist(html)
 52 |         if streams:
 53 |             info = MediaInfo(self.name)
 54 |             info.title = title
 55 |             info.streams = streams
 56 |             info.extra.referer = self.url
 57 |             yield info
 58 |             return
 59 | 
 60 |         for i in range(2):
 61 |             urls = matchall(html, pattern_src)
 62 |             if urls:
 63 |                 urls = [(i and url or unescape(url), ctype, None) for url, ctype in urls]
 64 |                 break
 65 |             urls = matchall(html, pattern_ext)
 66 |             if urls:
 67 |                 urls = [(i and url or unescape(url), None, ext) for url, ext in urls]
 68 |                 break
 69 |             if i == 0:
 70 |                 html = unquote(unescape(html))
 71 | 
 72 |         urls = set(urls)
 73 |         self.set_index(0, len(urls))
 74 |         for i, (url, ctype, ext) in enumerate(urls):
 75 |             info = MediaInfo(self.name)
 76 |             info.title = len(urls) == 1 and title or f'{title}_{i+1}'
 77 |             url = literalize(url, True)
 78 |             url = match1(url, '.+(https?://.+)') or url  # redirect clear
 79 |             if url[:2] == '//':
 80 |                 url = self.url[:self.url.find('/')] + url
 81 |             elif url[0] == '/':
 82 |                 url = self.url[:self.url.find('/', 9)] + url
 83 |             if '?' not in url and '&' in url:
 84 |                 url = url.replace('&', '?', 1)
 85 |             if ext is None or ctype:
 86 |                 ctype = str(ctype).lower()
 87 |                 ext = contentTypes.get(ctype) or url_info(url)[1] or (
 88 |                             ctype.startswith('audio') and 'mp3' or 'mp4')
 89 |             if ext[:3] == 'm3u':
 90 |                 info.streams = load_m3u8_playlist(url, headers={'Referer': self.url})
 91 |             else:
 92 |                 info.streams['current'] = {
 93 |                     'container': ext,
 94 |                     'profile': 'current',
 95 |                     'src': [url],
 96 |                     'size': 0
 97 |                 }
 98 |             info.extra.referer = self.url
 99 |             yield info
100 | 
101 | site = GeneralSimple()
102 | 


--------------------------------------------------------------------------------
/ykdl/extractors/heibaizhibo.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | assert JSEngine, "No JS Interpreter found, can't extract heibaizhibo!"
 7 | 
 8 | js_m = get_pkgdata_str(__name__, 'heibaizhibo.m.js',
 9 |                        'https://pichb2.huoxinglaike.com/nuxt/static/m.js')
10 | 
11 | class Heibai(Extractor):
12 |     name = '黑白直播'
13 | 
14 |     def prepare(self):
15 |         info = MediaInfo(self.name, True)
16 | 
17 |         js_ctx = JSEngine('if (!this.window) {window = {};}')
18 |         js_ctx.append(js_m)
19 | 
20 |         vid = match1(self.url, '/live/.*?(\d+)')
21 |         if vid is None:
22 |             html = get_content(self.url)
23 |             js_data = match1(html, 'window.__NUXT__=(.+?)</script>')
24 |             data = js_ctx.eval(js_data)
25 |             self.logger.debug('data:\n%s', data)
26 |             data = data['data'][0]
27 |             data = data.get('videoInfo', data)
28 |         else:
29 |             data = get_response('https://www.heibaizhibo.com/api/index/live',
30 |                                 params={'id': vid}).json()
31 |             msg = data['message']
32 |             assert '成功' in msg, msg
33 |             data = data['data']['detail']
34 | 
35 |         try:
36 |             qllist = data['hd']
37 |         except KeyError:
38 |             # anchor
39 |             qllist = data['hdlist']
40 |             title = data['anchorInfo']['title']
41 |             artist = data['anchorInfo']['nickname']
42 |         else:
43 |             title = '[{}] {}({})'.format(
44 |                     data['eventName'], data['homeName'], data['awayName'])
45 |             assert data['playCode'], 'live video is offline!'
46 |             data = data['playCode'][0]
47 |             artist = data['gtvDesc'] or data['name']
48 | 
49 |         info.title = '{title} - {artist}'.format(**vars())
50 |         info.artist = artist
51 |         params = {
52 |             'gtvId': data.get('gtvId'),
53 |             'id': data.get('id', 0),
54 |             'type': 3,
55 |             'source': 2,
56 |             'liveType': 3,  # 1: rtmp, 2: m3u8, 3: flv
57 |         }
58 |         if not params['gtvId']:
59 |             del params['gtvId']
60 | 
61 |         for ql in qllist:
62 |             params['defi'] = ql['defi']
63 |             data_live = get_response(
64 |                 'https://sig.heibaizhibo.com/signal-front/live/matchLiveInfo?',
65 |                 params=params).json()
66 |             msg = data_live['msg']
67 |             assert '成功' in msg, msg
68 |             data_live = data_live['data'][0]
69 |             assert data_live['score'] >= 0, 'live video is offline!'
70 |             url = js_ctx.call('vp', data_live['liveUrl'])
71 |             stream_id = ql['defi'].upper()
72 |             info.streams[stream_id] = {
73 |                 'container': 'flv',
74 |                 'profile': ql['name'],
75 |                 'src' : [url],
76 |                 'size': Infinity
77 |             }
78 |             break  # seems the same quality?
79 | 
80 |         return info
81 | 
82 | site = Heibai()
83 | 


--------------------------------------------------------------------------------
/ykdl/extractors/huajiao/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | def get_extractor(url):
 5 |     if 'com/v' in url:
 6 |         from . import video as s
 7 |     else:
 8 |         from . import live as s
 9 | 
10 |     return s.site, url
11 | 


--------------------------------------------------------------------------------
/ykdl/extractors/huajiao/live.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class Huajiao(Extractor):
 7 |     name = 'huajiao (花椒直播)'
 8 | 
 9 |     def prepare_mid(self):
10 |         html = get_content(self.url)
11 |         return match1(html, '"sn":"([^"]+)')
12 | 
13 |     def prepare(self):
14 |         info = MediaInfo(self.name, True)
15 |         html = get_content(self.url)
16 |         t_a = match1(html, '"keywords" content="([^"]+)')
17 |         info.title = t_a.split(',')[0]
18 |         info.artist = t_a.split(',')[1]
19 | 
20 |         replay_url = match1(html, '"m3u8":\s?("[^"]+)"')
21 |         if replay_url and len(replay_url) > 2:
22 |             replay_url = json.loads(replay_url)
23 |             info.live = False
24 |             info.streams = load_m3u8_playlist(replay_url)
25 |             return info
26 | 
27 |         channel = match1(html, '"channel":"([^"]+)')
28 |         encoded_json = get_response('http://g2.live.360.cn/liveplay',
29 |                                     params={
30 |                                         'stype': 'flv',
31 |                                         'channel': channel,
32 |                                         'bid': 'huajiao',
33 |                                         'sn': self.mid,
34 |                                         'sid': get_random_uuid_hex('SID'),
35 |                                         '_rate': 'xd',
36 |                                         'ts': time.time(),
37 |                                         'r': random.random(),
38 |                                         '_ostype': 'flash',
39 |                                         '_delay': 0,
40 |                                         '_sign': 'null',
41 |                                         '_ver': 13
42 |                                     }).content
43 |         decoded_json = unb64(encoded_json[0:3] + encoded_json[6:])
44 |         self.logger.debug('decoded_json:\n%s', decoded_json)
45 |         data = json.loads(decoded_json)
46 |         info.live = True
47 |         info.streams['current'] = {
48 |             'container': 'flv',
49 |             'profile': 'current',
50 |             'src' : [data['main']],
51 |             'size': Infinity
52 |         }
53 |         return info
54 | 
55 | site = Huajiao()
56 | 


--------------------------------------------------------------------------------
/ykdl/extractors/huajiao/video.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class HuajiaoVideo(Extractor):
 7 |     name = 'huajiao video (花椒小视频)'
 8 | 
 9 |     def get_data(self, type):
10 |         html = get_content(self.url)
11 |         data = match1(html, '_DATA.{type} = (.+?[}}\]]);'.format(**vars()))
12 |         self.logger.debug('%s data:\n%s', type, data)
13 |         return json.loads(data)
14 | 
15 |     def generate_info(self, data):
16 |         info = MediaInfo(self.name)
17 |         info.artist = data['user_name']
18 |         info.title = data['video_name']
19 |         info.duration = data.get('duration')
20 |         info.streams['current'] = {
21 |             'container': 'mp4',
22 |             'profile': 'current',
23 |             'src': [data['video_url']]
24 |         }
25 |         return info
26 | 
27 |     def prepare_feed(self):
28 |         data = self.get_data('feed')
29 |         feed = data['feed']
30 |         feed['user_name'] = data['author']['nickname']
31 |         return feed
32 | 
33 |     def prepare(self):
34 |         return self.generate_info(self.prepare_feed())
35 | 
36 |     def prepare_list(self):
37 |         info = self.prepare_feed()
38 |         infos = self.get_data('list')
39 | 
40 |         vid = info['vid']
41 |         vids = [i['vid'] for i in infos]
42 |         if vid not in vids:
43 |             vids.insert(0, vid)
44 |             infos.insert(0, info)
45 |         self.set_index(vid, vids)
46 | 
47 |         for info in infos:
48 |             yield self.generate_info(info)
49 | 
50 | site = HuajiaoVideo()
51 | 


--------------------------------------------------------------------------------
/ykdl/extractors/huya/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | def get_extractor(url):
 5 |     if 'v.huya' in url:
 6 |         from . import video as s
 7 |     else:
 8 |         from . import live as s
 9 | 
10 |     return s.site, url
11 | 


--------------------------------------------------------------------------------
/ykdl/extractors/huya/live.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from .._common import *
  4 | 
  5 | 
  6 | class HuyaLive(Extractor):
  7 |     name = 'Huya Live (虎牙直播)'
  8 | 
  9 |     def profile_2_id(self, profile):
 10 |         id = match1(profile, '(\d+K)')
 11 |         br = match1(profile, '(\d+M)')
 12 |         if id is None:
 13 |             if profile.startswith(('蓝光', 'HDR')):
 14 |                 id = 'BD'
 15 |             else:
 16 |                 id = {
 17 |                     '超清': 'TD',
 18 |                     '高清': 'HD',
 19 |                     '流畅': 'SD'
 20 |                 }[profile]
 21 |         if br:
 22 |             id += br
 23 |         if 'HDR' in profile:
 24 |             id += '-HDR'
 25 |         return id
 26 | 
 27 |     def prepare(self):
 28 |         info = MediaInfo(self.name, True)
 29 | 
 30 |         html  = get_content(self.url)
 31 | 
 32 |         data = match1(html, 'stream: ({.+)\n.*?};')
 33 |         assert data, "can't found video!!"
 34 |         self.logger.debug('data:\n%s', data)
 35 |         data = json.loads(data)
 36 |         assert data['vMultiStreamInfo'], 'live video is offline'
 37 | 
 38 |         room_info = data['data'][0]['gameLiveInfo']
 39 |         info.title = '{}「{} - {}」'.format(
 40 |             room_info['roomName'], room_info['nick'], room_info['introduction'])
 41 |         info.artist = room_info['nick']
 42 |         liveSourceType = room_info['liveSourceType']
 43 | 
 44 |         stream_info_list = data['data'][0]['gameStreamInfoList']
 45 |         random.shuffle(stream_info_list)
 46 |         random.shuffle(stream_info_list)
 47 |         while stream_info_list:
 48 |             stream_info = stream_info_list.pop()
 49 |             sUrl = stream_info['sFlvUrl']
 50 |             if sUrl:
 51 |                 info.add_comment(stream_info['sCdnType'])
 52 |                 break
 53 |         sStreamName = stream_info['sStreamName']
 54 |         sUrlSuffix = stream_info['sFlvUrlSuffix']
 55 |         _url = '{sUrl}/{sStreamName}.{sUrlSuffix}?'.format(**vars())
 56 | 
 57 |         params = dict(parse_qsl(unescape(stream_info['sFlvAntiCode'])))
 58 |         params.setdefault('t', '100')  # 102
 59 |         ct = int((int(params['wsTime'], 16) + random.random()) * 1000)
 60 |         lPresenterUid = stream_info['lPresenterUid']
 61 |         if liveSourceType and not sStreamName.startswith(str(lPresenterUid)):
 62 |             uid = int(lPresenterUid)
 63 |         else:
 64 |             uid = int(ct % 1e10 * 1e3 % 0xffffffff)
 65 |         u1 = uid & 0xffffffff00000000
 66 |         u2 = uid & 0xffffffff
 67 |         u3 = uid & 0xffffff
 68 |         u = u1 | u2 >> 24 | u3 << 8
 69 |         params.update({
 70 |             'ctype': 'huya_live', # !!!!
 71 |             'u': str(u),
 72 |             'seqid': str(ct + uid),
 73 |             'ver': '1',
 74 |         })
 75 |         fm = unb64(params['fm']).split('_', 1)[0]
 76 |         ss = hash.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
 77 | 
 78 |         for si in data['vMultiStreamInfo']:
 79 |             stream_profile = si['sDisplayName']
 80 |             stream_id = self.profile_2_id(stream_profile)
 81 |             rate = si['iBitRate']
 82 |             if rate:
 83 |                 params['ratio'] = rate
 84 |             else:
 85 |                 params.pop('ratio', None)
 86 |             params['wsSecret'] = hash.md5('_'.join(
 87 |                         [fm, params['u'], sStreamName, ss, params['wsTime']]))
 88 |             url = _url + urlencode(params, safe=',*')
 89 |             info.streams[stream_id] = {
 90 |                 'container': 'flv',
 91 |                 'profile': stream_profile,
 92 |                 'src': [url],
 93 |                 'size': Infinity
 94 |             }
 95 |         fake_headers.update({
 96 |             'Accept': '*/*',
 97 |             'Origin': 'https://www.huya.com',
 98 |             'Referer': 'https://www.huya.com/',
 99 |             'Sec-Fetch-Dest': 'empty',
100 |             'Sec-Fetch-Mode': 'cors',
101 |             'Sec-Fetch-Site': 'same-site',
102 |         })
103 |         info.extra['header'] = fake_headers
104 |         return info
105 | 
106 | site = HuyaLive()
107 | 


--------------------------------------------------------------------------------
/ykdl/extractors/huya/video.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class HuyaVideo(Extractor):
 7 |     name = 'huya video (虎牙视频)'
 8 | 
 9 |     quality_2_id_profile = {
10 |         'yuanhua': ['BD', '原画'],
11 |            '1300': ['TD', '超清'],
12 |           #'TODO': ['HD', '高清'],
13 |             '350': ['SD', '流畅']
14 |     }
15 | 
16 |     def prepare_mid(self):
17 |         mid = match1(self.url, 'play/(\d+)')
18 |         if mid is None:
19 |             html = get_content(self.url)
20 |             mid = match1(html, 'vid = (\d+)', 'data-vid="(\d+)')
21 |         return mid
22 | 
23 |     def prepare(self):
24 |         info = MediaInfo(self.name)
25 | 
26 |         html = get_content(self.url)
27 |         info.title = match1(html, '<h1 class="video-title">(.+?)</h1>')
28 |         info.artist = match1(html, '<div class="video-author">[\s\S]+?<h3>(.+?)</h3>')
29 | 
30 |         t1 = int(time.time() * 1000)
31 |         t2 = t1 + random.randrange(5, 10)
32 |         rnd = str(random.random()).replace('.', '')
33 |         data = get_response('https://v-api-player-ssl.huya.com/',
34 |                         params={
35 |                             'callback': 'jQuery1124{rnd}_{t1}'.format(**vars()),
36 |                             'r': 'vhuyaplay/video',
37 |                             'vid': self.mid,
38 |                             'format': 'mp4,m3u8',
39 |                             '_': t2
40 |                         }).json()
41 |         assert data['code'] == 1, data['message']
42 |         data = data['result']['items']
43 | 
44 |         for stream_date in data:
45 |             ext = stream_date['format']
46 |             quality =stream_date['definition']
47 |             stream_id, stream_profile = self.quality_2_id_profile[quality]
48 |             stream_id += '-' + ext
49 |             url = stream_date['transcode']['urls'][0]
50 |             info.streams[stream_id] = {
51 |                 'container': ext,
52 |                 'profile': stream_profile,
53 |                 'src' : [url],
54 |                 'size': int(stream_date['size'])
55 |             }
56 | 
57 |         return info
58 | 
59 | site = HuyaVideo()
60 | 


--------------------------------------------------------------------------------
/ykdl/extractors/ifeng/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | def get_extractor(url):
 5 |     if 'video' in url:
 6 |         from . import video as s
 7 |     elif 'gongkaike' in url:
 8 |         from . import gongkaike as s
 9 |     else:
10 |         from . import news as s
11 | 
12 |     return s.site, url
13 | 


--------------------------------------------------------------------------------
/ykdl/extractors/ifeng/gongkaike.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .news import Ifeng
 5 | 
 6 | 
 7 | class IfengOpenC(Ifeng):
 8 |     name = '凤凰公开课 (ifeng open course)'  # 404
 9 | 
10 |     def prepare(self):
11 |         info = MediaInfo(self.name)
12 | 
13 |         xml = get_content(
14 |                 'http://vxml.ifengimg.com/video_info_new/{}/{}/{}.xml'
15 |                 .format(self.mid[-2], self.mid[-2:], self.mid))
16 | 
17 |         info.title = match1(xml, 'SE_Title="([^"]+)')
18 |         urls = matchall(xml, 'playurl="([^"]+)')
19 |         urls = ['http://ips.ifeng.com/' + u[7:] for u in urls ]
20 |         info.streams['current'] = {
21 |             'container': 'mp4',
22 |             'profile': 'current',
23 |             'src': urls
24 |         }
25 | 
26 |         return info
27 | 
28 | site = IfengOpenC()
29 | 


--------------------------------------------------------------------------------
/ykdl/extractors/ifeng/news.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class Ifeng(Extractor):
 7 |     name = '凤凰新闻 (ifeng news)'  # EXPIRED
 8 | 
 9 |     types_2_id_profile = {
10 |           '1M': ['TD', '超清'],
11 |         '500k': ['HD', '高清'],
12 |         '350k': ['SD', '标清']
13 |     }
14 | 
15 |     def prepare_mid(self):
16 |         mid = match1(self.url, '#([a-zA-Z0-9\-]+)',
17 |                                '/([a-zA-Z0-9\-]+).shtml')
18 |         if mid is None:
19 |             html = get_content(self.url)
20 |             mid = match1(html, r'\bvid"?: "([^"]+)')
21 |         return mid
22 | 
23 |     def prepare(self):
24 |         info = MediaInfo(self.name)
25 | 
26 |         doc = get_response(
27 |                 'http://vxml.ifengimg.com/video_info_new/{}/{}/{}.xml'
28 |                 .format(self.mid[-2], self.mid[-2:], self.mid)).xml()
29 |         info.title = doc.getElementsByTagName('item')[0].getAttribute('Name')
30 |         videos = doc.getElementsByTagName('videos')
31 |         for v in videos[0].getElementsByTagName('video'):
32 |             ext = v.getAttribute('mediaType')
33 |             _t = v.getAttribute('type')
34 |             _u = v.getAttribute('VideoPlayUrl')
35 |             stream_id, stream_profile = self.types_2_id_profile[_t]
36 |             info.streams[stream_id] = {
37 |                 'container': ext,
38 |                 'profile': stream_profile,
39 |                 'src': [_u]
40 |                 }
41 | 
42 |         return info
43 | 
44 | site = Ifeng()
45 | 


--------------------------------------------------------------------------------
/ykdl/extractors/ifeng/video.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class IfengVideo(Extractor):
 7 |     name = '凤凰视频 (ifeng video)'  # Expired
 8 | 
 9 |     def prepare(self):
10 |         return self.url[-13: -6]
11 | 
12 |     def prepare(self):
13 |         info = MediaInfo(self.name)
14 | 
15 |         info.title = self.name + '-' + self.mid
16 |         data = get_response(
17 |                 'http://tv.ifeng.com/html5/{self.mid}/video.json'
18 |                 .format(**vars())).json()
19 |         if 'bqSrc' in data:
20 |             info.streams['SD'] = {
21 |                 'container': 'mp4',
22 |                 'profile': '标清',
23 |                 'src': [data['bqSrc']]
24 |             }
25 |         if 'gqSrc' in data:
26 |             info.streams['HD'] = {
27 |                 'container': 'mp4',
28 |                 'profile': '高清',
29 |                 'src': [data['gqSrc']]
30 |             }
31 |         return info
32 | 
33 | site = IfengVideo()
34 | 


--------------------------------------------------------------------------------
/ykdl/extractors/iqilu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class Iqilu(SimpleExtractor):
 7 |     name = '齐鲁网 (iqilu)'
 8 | 
 9 |     def init(self):
10 |         self.title_pattern = '<meta name="description" content="(.+?)"\W'
11 |         self.artist_pattern = '<title>.+?_([^_]+?频道)_山东网络台_齐鲁网</title>'
12 |         self.url_pattern = '"mp4-wrapper"[^"]+"(http[^"]+)"'
13 | 
14 |     def l_assert(self):
15 |         assert match(self.url, 'https?://v\.iqilu\.com/\w+')
16 | 
17 |     def reprocess(self):
18 |         self.info.title = '[{self.info.artist}] {self.info.title}'.format(**vars())
19 | 
20 | site = Iqilu()
21 | 


--------------------------------------------------------------------------------
/ykdl/extractors/iqiyi/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import get_location
 4 | 
 5 | import re
 6 | 
 7 | def get_extractor(url):
 8 |     if 'gamelive' in url:
 9 |         url = get_location(url)
10 |     if 'pps.' in url:
11 |         from .. import pps as s
12 |     elif 'live.iqiyi' in url:
13 |         from . import live as s
14 |     else:
15 |         from . import video as s
16 | 
17 |     return s.site, url
18 | 


--------------------------------------------------------------------------------
/ykdl/extractors/iqiyi/live.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .util import cmd5x_iqiyi3 as cmd5x
 5 | 
 6 | # May be closed, see x.pps.tv
 7 | 
 8 | 
 9 | def getlive(vid):
10 |     tm = time.time()
11 |     host = 'https://live.video.iqiyi.com'
12 |     dfp = get_random_id(66)
13 |     params = {
14 |         'lp': vid,
15 |         'src': '01010031010000000000',
16 |         'uid': '',
17 |         'rateVers': 'PC_QIYI_3',
18 |         'k_uid': get_random_id(24, 'k_uid'),
19 |         'qdx': 'n',
20 |         'qdv': 3,
21 |         'qd_v': 1,
22 |         'dfp': dfp,
23 |         'v': 1,
24 |         'k_err_retries': 0,
25 |         'tm': int(tm + 1),
26 |     }
27 |     src = '/live?' + urlencode(params)
28 |     vf = cmd5x(src)
29 |     req_url = '{host}{src}&vf={vf}'.format(**vars())
30 |     st = int(tm * 1000)
31 |     et = int((tm + 1296000) * 1000)
32 |     c_dfp = '__dfp={dfp}@{et}@{st}'.format(**vars())
33 |     return get_response(req_url, {'Cookie': c_dfp}).json()
34 | 
35 | class IqiyiLive(Extractor):
36 |     name = '爱奇艺直播 (IqiyiLive)'
37 | 
38 |     type_2_id = {
39 |         #'': '4K',
40 |         'RESOLUTION_1080P': 'BD',
41 |         'RESOLUTION_720P': 'TD',
42 |         'HIGH_DEFINITION': 'HD',
43 |         'SMOOTH': 'SD',
44 |         #'': 'LD'
45 |     }
46 | 
47 |     def prepare_mid(self):
48 |         html = get_content(self.url)
49 |         return match1(html, '"qpId":(\d+)')
50 | 
51 |     def prepare(self):
52 |         info = MediaInfo(self.name, True)
53 | 
54 |         data = getlive(self.mid)
55 |         assert data['code'] == 'A00000', data.get('msg', "can't play this live video!")
56 |         data = data['data']
57 |         assert data['liveType'] != 2, 'this live is off!'
58 |         info.title = data['name']
59 | 
60 |         for stream in data['streams']:
61 |             stream_type = stream['steamType']  # typo 'streamType' to 'steamType'
62 |             stream_id = self.type_2_id[stream_type]
63 | 
64 |             if stream['formatType'] == 'HLFLV':
65 |                 stream_params = stream['url'].split('?')[-1]
66 |                 stream_params_dict = dict(parse_qsl(stream_params))
67 |                 if stream_params_dict['hl_sttp'] != 'flv':
68 |                     continue
69 |                 params = {
70 |                     'streamName': stream['streamName'],
71 |                     'streamParams': stream_params,
72 |                     'hl_stid': stream_params_dict['hl_stid'],
73 |                     'hl_stft': stream_params_dict['hl_stft'],
74 |                     'hl_stapp': stream_params_dict['hl_stapp']
75 |                 }
76 |                 url = get_response('https://flvlive.video.iqiyi.com/{hl_stapp}/'
77 |                                    '{streamName}.{hl_stft}?{streamParams}'
78 |                                    .format(**params)).json()['l']
79 |                 url = url.replace('{streamName}.'.format(**params),
80 |                                   '{hl_stid}.'.format(**params))
81 |                 ext = 'flv'
82 |             elif stream_id in info.streams:
83 |                 continue
84 |             elif stream['formatType'] == 'TS':
85 |                 url = stream['url']
86 |                 ext = 'm3u8'
87 | 
88 |             stream_profile = stream['screenSize']
89 |             info.streams[stream_id] = {
90 |                 'container': ext,
91 |                 'profile': stream_profile,
92 |                 'src' : [url],
93 |                 'size': Infinity
94 |             }
95 | 
96 |         return info
97 | 
98 | site = IqiyiLive()
99 | 


--------------------------------------------------------------------------------
/ykdl/extractors/iqiyi/util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | js_ctx = None
 7 | 
 8 | def init_jsengine():
 9 |     global js_ctx
10 |     if js_ctx is None:
11 |         assert JSEngine, "No JS Interpreter found, can't use cmd5x!"
12 |         js_ctx = JSEngine(init_global=True)
13 | 
14 |         # REF: https://zsaim.github.io/2019/08/23/Iqiyi-cmd5x-Analysis/
15 |         #      https://raw.githubusercontent.com/ZSAIm/ZSAIm.github.io/master/misc/2019-08-23/iqiyi_cmd5x.js
16 |         js = get_pkgdata_str(__name__, 'cmd5x.js',
17 |                     'https://raw.githubusercontent.com/zhangn1985/ykdl/master/ykdl/extractors/iqiyi/cmd5x.js')
18 |         js_ctx.append(js)
19 | 
20 |         # REF: https://github.com/lldy/js
21 |         js = get_pkgdata_str(__name__, 'cmd5x_iqiyi3.js',
22 |                     'https://raw.githubusercontent.com/zhangn1985/ykdl/master/ykdl/extractors/iqiyi/cmd5x_iqiyi3.js')
23 |         js_ctx.append(js)
24 | 
25 | def md5(s):
26 |     return hash.md5(s)
27 | 
28 | def md5x(s):
29 |     #sufix = ''
30 |     #for j in range(8):
31 |     #    for k in range(4):
32 |     #        v4 = 13 * (66 * k + 27 * j) % 35
33 |     #        if ( v4 >= 10 ):
34 |     #            v8 = v4 + 88
35 |     #        else:
36 |     #            v8 = v4 + 49
37 |     #        sufix += chr(v8)
38 |     return md5(s + '1j2k2k3l3l4m4m5n5n6o6o7p7p8q8q9r')
39 | 
40 | def cmd5x(s):
41 |     # the param src below uses salt h2l6suw16pbtikmotf0j79cej4n8uw13
42 |     #    01010031010000000000
43 |     #    01010031010010000000
44 |     #    01080031010000000000
45 |     #    01080031010010000000
46 |     #    03020031010000000000
47 |     #    03020031010010000000
48 |     #    03030031010000000000
49 |     #    03030031010010000000
50 |     #    02020031010000000000
51 |     #    02020031010010000000
52 |     #if len(s) < 6:
53 |     #    return '0'
54 |     #return md5(s + 'h2l6suw16pbtikmotf0j79cej4n8uw13')
55 |     # out of date
56 | 
57 |     init_jsengine()
58 |     return js_ctx.call('cmd5x_exports.cmd5x', s)
59 | 
60 | def cmd5x_iqiyi3(s):
61 |     # used for live
62 |     init_jsengine()
63 |     return js_ctx.call('cmd5x', s)
64 | 


--------------------------------------------------------------------------------
/ykdl/extractors/ixigua.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | from . import _byted
 5 | 
 6 | 
 7 | class IXiGua(Extractor):
 8 |     name = '西瓜视频 (IXiGua)'
 9 | 
10 |     @staticmethod
11 |     def profile_2_id(profile):
12 |         if profile[-1] == 'p':
13 |             return {
14 |                '1080p': 'BD',
15 |                 '720p': 'TD',
16 |                 '480p': 'HD',
17 |                 '360p': 'SD',
18 |              }[profile]
19 |         if profile[-1] == 'k':
20 |             return profile.upper()
21 |         assert 0, 'unsupported profile: %r' % profile
22 | 
23 |     def prepare(self):
24 |         info = MediaInfo(self.name)
25 | 
26 |         html = _byted.get_content(self.url)
27 |         data = match1(html, 'window._SSR_HYDRATED_DATA=(.+?)</script>')
28 |         self.logger.debug('data: \n%s', data)
29 |         data = json.loads(data.replace('undefined', 'null'))
30 | 
31 |         video_info = data['anyVideo']['gidInformation']['packerData']
32 | 
33 |         if 'video' in video_info:
34 |             video_info = video_info['video']
35 |             info.title = video_info['title']
36 |             info.artist = video_info['user_info']['name']
37 |         else:
38 |             albumInfo = video_info['albumInfo']
39 |             al_title = albumInfo['title']
40 |             info.artist = albumInfo['userInfo']['name']
41 |             for c in (*albumInfo['areaList'],
42 |                        albumInfo['year'],
43 |                       *albumInfo['tagList'],
44 |                       *[a['name'] for a in albumInfo.get('actorList', [])]):
45 |                 info.add_comment(c)
46 |             ep_title = video_info['episodeInfo']['title']
47 |             if al_title in ep_title:
48 |                 info.title = ep_title
49 |             else:
50 |                 info.title = '{al_title} - {ep_title}'.format(**vars())
51 | 
52 |         videoResource = video_info['videoResource']['normal']
53 |         info.duration = videoResource['video_duration']
54 | 
55 |         for v in videoResource['video_list'].values():
56 |             stream_profile = v['definition']
57 |             stream_id = self.profile_2_id(stream_profile)
58 |             info.streams[stream_id] = {
59 |                 'container': v['vtype'],
60 |                 'profile': stream_profile,
61 |                 'src' : [unb64(v['backup_url_1'])],  # main_url status 403
62 |                 'size': v['size']
63 |             }
64 | 
65 |         return info
66 | 
67 |     def prepare_list(self):
68 |         albumId, episodeId = matchall(self.url, '.ixigua.com/(\d+)(?:.+?id=(\d+))?')[0]
69 |         data = get_response('https://www.ixigua.com/api/albumv2/details',
70 |                             headers={'Referer': 'https://www.ixigua.com/'},
71 |                             params={'albumId': albumId}).json()
72 |         assert data['code'] == 200, "can't fetch playlist!"
73 | 
74 |         ep_ids = [b for a, b in sorted((ep['seq'], ep['episodeId'])
75 |                                        for ep in data['data']['playlist'])]
76 |         self.set_index(episodeId, ep_ids)
77 |         for ep_id in ep_ids:
78 |             yield 'https://www.ixigua.com/{albumId}?id={ep_id}'.format(**vars())
79 | 
80 | site = IXiGua()
81 | 


--------------------------------------------------------------------------------
/ykdl/extractors/joy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class Joy(Extractor):
 7 | 
 8 |     name = '激动网 (Joy)'
 9 | 
10 |     def prepare_mid(self):
11 |         return match1(self.url, 'resourceId=([0-9]+)')
12 | 
13 |     def prepare(self):
14 |         info = MediaInfo(self.name)
15 | 
16 |         data= get_response('https://api.joy.cn/v1/video',
17 |                            params={'id': self.mid}).json()
18 |         assert data['code'] > 0, data['message']
19 |         data = data['data']
20 | 
21 |         info.title = data['title']
22 |         url = data['res_url']
23 |         _, ext, _ = url_info(url)
24 | 
25 |         info.streams['current'] = {
26 |             'container': ext,
27 |             'profile': 'current',
28 |             'src': [url]
29 |         }
30 |         return info
31 | 
32 | site = Joy()
33 | 


--------------------------------------------------------------------------------
/ykdl/extractors/kankanews.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | # TODO: Live & TV
 7 | 
 8 | class KankanNews(Extractor):
 9 |     name = '看看新闻 (kankannews)'
10 | 
11 |     def prepare(self):
12 |         info = MediaInfo(self.name)
13 | 
14 |         html = get_content(self.url)
15 |         vid = match1(html, 'omsid="(\d+)"')
16 |         assert vid, 'No omsid has been found!!'
17 | 
18 |         info.artist = match1(html, 'keyboard:"(.+?)"')
19 |         info.title = info.artist + \
20 |                      match1(html, '<title>视频(.+?)_[^_]+_看看新闻</title>')
21 | 
22 |         params = [
23 |             ('nonce', get_random_str(8).lower()),
24 |             ('omsid', vid),
25 |             ('platform', 'pc'),
26 |             ('timestamp', int(time.time())),
27 |             ('version', '1.0')
28 |         ]
29 |         sign = hash.md5(hash.md5(urlencode(params) +
30 |                                  '&28c8edde3d61a0411511d3b1866f0636'))
31 |         params.append(('sign', sign))
32 |         data = get_response('https://api-app.kankanews.com/kankan/pc/getvideo',
33 |                             params=params).json()
34 |         assert data['code'] == '10000', data['error']['message']
35 |         data = data['result']['video']
36 | 
37 |         info.streams['current'] = {
38 |             'container': 'mp4',
39 |             'profile': 'current',
40 |             'src' : [data['videourl']],
41 |             'size': int(data['filesize'])
42 |         }
43 |         return info
44 | 
45 | site = KankanNews()
46 | 


--------------------------------------------------------------------------------
/ykdl/extractors/ku6.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class Ku6(SimpleExtractor):
 7 |     name = '酷6 (Ku6)'
 8 | 
 9 |     def init(self):
10 |         self.url_pattern = 'flvURL: "([^"]+)'
11 |         self.title_pattern = 'title = "([^"]+)'
12 |         pass
13 | 
14 |     def list_only(self):
15 |         return match(self.url, 'https://www.ku6.com/detail/\d+')
16 | 
17 |     def prepare_list(self):
18 |         html = get_content(self.url)
19 |         videos = matchall(html, "'title': '(.+?)',[\s\S]+?'playUrl': '(.+?)',")
20 |         videos.reverse()
21 |         self.set_index(None, videos)
22 |         for title, url in videos:
23 |             info = MediaInfo(self.name)
24 |             info.title = title
25 |             info.streams['current'] = {
26 |                 'container': 'mp4',
27 |                 'src': [url]
28 |             }
29 |             yield info
30 | 
31 | site = Ku6()
32 | 


--------------------------------------------------------------------------------
/ykdl/extractors/kuwo.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class Kuwo(Extractor):
 7 |     name = 'KuWo (酷我音乐)'
 8 | 
 9 |     def prepare_mid(self):
10 |         return match1(self.url, '/play_detail/(\d+)')
11 | 
12 |     def prepare(self):
13 |         info = MediaInfo(self.name)
14 |         install_cookie()
15 | 
16 |         if not self.is_list:
17 |             resp = get_response('https://www.kuwo.cn/favicon.ico?v=1')
18 |         kw_token = get_cookie('www.kuwo.cn', '/', 'kw_token').value
19 |         params = {
20 |             'mid': self.mid,
21 |             'httpsStatus': 1,
22 |             'reqId': get_random_uuid()
23 |         }
24 |         data = get_response('https://www.kuwo.cn/api/www/music/musicInfo',
25 |                             headers={'csrf': kw_token},
26 |                             params=params).json()
27 |         assert data.get('code') == 200, data['message']
28 |         data = data['data']
29 | 
30 |         pay = data['isListenFee']
31 |         if pay:
32 |             if self.is_list:  # just skip pay when extract from list
33 |                 self.logger.warning('Skip pay song: %s', self.mid)
34 |                 return
35 |             raise AssertionError('Pay song: %s' % self.mid)
36 | 
37 |         albumpic = data['albumpic']
38 |         album = data['album']
39 |         title = data['name']
40 |         info.title = album in title and title or '{title} - {album}'.format(**vars())
41 |         info.artist = data['artist']
42 |         info.album = data['album']
43 |         info.duration = data['duration']
44 |         info.add_comment(data['albuminfo'])
45 | 
46 |         params['type'] = 'music'
47 |         data = get_response('https://www.kuwo.cn/api/v1/www/music/playUrl',
48 |                             params=params).json()
49 |         assert data.get('code') == 200, data['message']
50 | 
51 |         url = data['data']['url']
52 |         info.streams['current'] = {
53 |             'container': 'mp3',
54 |             'profile': 'current',
55 |             'src': [(albumpic, url)]
56 |         }
57 |         return info
58 | 
59 |     def list_only(self):
60 |         return 'playlist_detail' in self.url
61 | 
62 |     def prepare_list(self):
63 |         install_cookie()
64 |         html = get_content(self.url)
65 |         return matchall(html, 'href="/play_detail/(\d+)"')
66 | 
67 | site = Kuwo()
68 | 


--------------------------------------------------------------------------------
/ykdl/extractors/laifeng.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | import datetime
 6 | 
 7 | 
 8 | class Laifeng(Extractor):
 9 |     name = 'laifeng (来疯直播)'
10 | 
11 |     def prepare(self):
12 |         info = MediaInfo(self.name, True)
13 | 
14 |         html = get_content(self.url)
15 |         info.artist = match1(html, 'anchorName:\s*\'([^\']+)',
16 |                                    '"anchorName":\s*"([^"]+)"')
17 |         info.title = info.artist + '的直播房间'
18 | 
19 |         Alias = match1(html, 'initAlias:\'([^\']+)' ,'"ln":\s*"([^"]+)"')
20 |         Token = match1(html, 'initToken: \'([^\']+)', '"tk":\s*"([^"]+)"')
21 |         ts = datetime.datetime.utcnow().isoformat().split('.')[0] + 'Z'
22 |         data = get_response('http://lapi.lcloud.laifeng.com/Play',
23 |                             params={
24 |                                 'AppId': 101,
25 |                                 'StreamName': Alias,
26 |                                 'Action': 'Schedule',
27 |                                 'Token': Token,
28 |                                 'Version': 2.0,
29 |                                 'CallerVersion': 3.3,
30 |                                 'Caller': 'flash',
31 |                                 'Format': 'HttpFlv',
32 |                                 'Timestamp': ts,
33 |                                 'rd': random.randint(10000, 99999),
34 |                             }).json()
35 |         assert data['Code'] == 'Success', data['Message']
36 | 
37 |         stream_url = data['HttpFlv'][0]['Url']
38 |         info.streams['current'] = {
39 |             'container': 'flv',
40 |             'profile': 'current',
41 |             'src' : [stream_url],
42 |             'size': Infinity
43 |         }
44 |         return info
45 | 
46 | site = Laifeng()
47 | 


--------------------------------------------------------------------------------
/ykdl/extractors/le.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from ._common import *
  4 | 
  5 | 
  6 | def calcTimeKey(t):
  7 |     ror = lambda val, r_bits: ((val & (2**32-1)) >> r_bits%32) | \
  8 |                               (val << (32-(r_bits%32)) & (2**32-1))
  9 |     magic = 185025305
 10 |     return ror(t, magic % 17) ^ magic
 11 | 
 12 | def decode_m3u8(data):
 13 |     version = data[0:5]
 14 |     if version.lower() == b'vc_01':
 15 |         #get real m3u8
 16 |         loc2 = bytearray(data[5:])
 17 |         length = len(loc2)
 18 |         loc4 = [0]*(2*length)
 19 |         for i in range(length):
 20 |             loc4[2*i] = loc2[i] >> 4
 21 |             loc4[2*i+1]= loc2[i] & 15;
 22 |         loc6 = loc4[len(loc4)-11:]+loc4[:len(loc4)-11]
 23 |         loc7 = bytearray(length)
 24 |         for i in range(length):
 25 |             loc7[i] = (loc6[2 * i] << 4) +loc6[2*i+1]
 26 |         return loc7
 27 |     else:
 28 |         # directly return
 29 |         return data
 30 | 
 31 | headers = {
 32 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) '
 33 |                   'AppleWebKit/603.1.30 (KHTML, like Gecko) '
 34 |                   'Version/10.1 Safari/603.1.30'
 35 | }
 36 | 
 37 | class Letv(Extractor):
 38 |     name = '乐视视频 (Letv)'
 39 | 
 40 |     stream_2_id_profile = {
 41 |         '1080p': ['BD', '1080P'],
 42 |          '1300': ['TD', '超清'],
 43 |          '1000': ['HD', '高清'],
 44 |          '720p': ['SD', '标清'],
 45 |           '350': ['LD', '流畅']
 46 |     }
 47 | 
 48 |     __STREAM_TEMP__ = []
 49 | 
 50 |     def prepare_mid(self):
 51 |         return match1(self.url, '/vplay/(\d+).html', '#record/(\d+)')
 52 | 
 53 |     def prepare(self):
 54 |         info = MediaInfo(self.name)
 55 |         stream_temp = {st: None for st in self.stream_2_id_profile.keys()}
 56 |         self.__STREAM_TEMP__.append(stream_temp)
 57 | 
 58 |         #normal process
 59 |         data = get_response('https://player-pc.le.com/mms/out/video/playJson',
 60 |                             params={
 61 |                                 'id': self.mid,
 62 |                                 'platid': 1,
 63 |                                 'splatid': 105,
 64 |                                 'format': 1,
 65 |                                 'tkey': calcTimeKey(int(time.time())),
 66 |                                 'domain': 'www.le.com',
 67 |                                 'region': 'cn',
 68 |                                 'source': 1000,
 69 |                                 'accessyx': 1
 70 |                             },
 71 |                             headers=headers).json()['msgs']['playurl']
 72 | 
 73 |         info.title = data['title']
 74 |         info.duration = data['duration']
 75 |         for stream, sdp in data['dispatch'].items():
 76 |             s_url = data['domain'][0] + sdp[0]
 77 |             data2 = get_response(s_url,
 78 |                                  params={
 79 |                                      'm3v': 1,
 80 |                                      'termid': 1,
 81 |                                      'format': 1,
 82 |                                      'hwtype': 'un',
 83 |                                      'ostype': 'MacOS10.12.4',
 84 |                                      'p1': 1,
 85 |                                      'p2': 10,
 86 |                                      'p3': '-',
 87 |                                      'expect': '3',
 88 |                                      'tn': random.random(),
 89 |                                      'vid': self.mid,
 90 |                                      'uuid': hash.sha1(s_url) + '_0',
 91 |                                      'tss': 'ios'
 92 |                                  },
 93 |                                  headers=headers).json()
 94 | 
 95 |             # hold on ! more things to do
 96 |             # to decode m3u8 (encoded)
 97 |             m3u8 = get_content(data2['location'],
 98 |                                params={
 99 |                                    'r': int(time.time() * 1000),
100 |                                    'appid': 500
101 |                                },
102 |                                headers=headers, encoding=decode_m3u8)
103 |             stream_id, stream_profile = self.stream_2_id_profile[stream]
104 |             info.streams[stream_id] = {
105 |                 'container': 'm3u8',
106 |                 'profile': stream_profile
107 |             }
108 |             stream_temp[stream] = NamedTemporaryFile(mode='w+b', suffix='.m3u8')
109 |             stream_temp[stream].write(m3u8)
110 |             info.streams[stream_id]['src'] = [stream_temp[stream].name]
111 |             stream_temp[stream].flush()
112 | 
113 |         return info
114 | 
115 |     def list_only(self):
116 |         return bool(match1(self.url, '/tv/\d+.html'))
117 | 
118 |     def prepare_list(self):
119 |         if self.list_only():
120 |             mid = None
121 |         else:
122 |             mid = self.mid
123 |             html = get_content(self.url)
124 |             pid = match1(html, r'\bpid: ?(\d+)')
125 |             if pid is None:
126 |                  return
127 |             self.url = 'https://www.le.com/tv/{pid}.html'.format(**vars())
128 | 
129 |         html = get_content(self.url)
130 |         vids = matchall(html, '/vplay/(\d+).html"')
131 |         mids = []
132 |         for vid in vids:
133 |             if vid in mids:
134 |                 continue
135 |             mids.append(vid)
136 |         self.set_index(mid, mids)
137 |         return mids
138 | 
139 | site = Letv()
140 | 


--------------------------------------------------------------------------------
/ykdl/extractors/lizhi.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class Lizhi(Extractor):
 7 |     name = 'Lizhi FM (荔枝电台)'
 8 | 
 9 |     def prepare_mid(self):
10 |         pass
11 | 
12 |     def prepare(self):
13 |         info = MediaInfo(self.name)
14 | 
15 |         html = get_content(self.url)
16 |         self.mid, info.artist, _, info.title = matchm(html,
17 |                 'data-hidden-ph\s?=\s?"(.+?)" '
18 |                 'data-user-name\s?=\s?"(.+?)" '
19 |                 'data-radio-name\s?=\s?"(.+?)" '
20 |                 'data-title\s?=\s?"(.+?)"')
21 |         data = get_response('https://www.lizhi.fm/hidden_ph/{self.mid}'
22 |                             .format(**vars())).json()
23 |         assert data['rcode'] == 0, data['msg']
24 | 
25 |         info.streams['current'] = {
26 |             'container': 'mp3',
27 |             'profile': 'current',
28 |             'src': [data['data']['url']]
29 |         }
30 |         return info
31 | 
32 |     def list_only(self):
33 |         return 'user' in self.url
34 | 
35 |     def prepare_list(self):
36 |         html = get_content(self.url)
37 |         fm = match1(html, 'class="user-info-name">FM(\d+)')
38 |         audio = matchall(html, 'href="(/{fm}/\d+)"'.format(**vars()))
39 |         audio.reverse()
40 |         return ['https://www.lizhi.fm' + a for a in audio]
41 | 
42 | site = Lizhi()
43 | 


--------------------------------------------------------------------------------
/ykdl/extractors/longzhu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class LongzhuLive(Extractor):
 7 |     name = 'Longzhu Live (龙珠直播)'
 8 | 
 9 |     def prepare_mid(self):
10 |         return match1(get_content(self.url), '(?i)"roomid":(\d+)')
11 | 
12 |     def prepare(self):
13 |         info = MediaInfo(self.name, True)
14 | 
15 |         html = get_content(self.url)
16 |         info.title = match1(html, '"title":"([^"]+)', '<title>([^>]+)<')
17 |         info.artist = match1(html, '"Name":"([^"]+)')
18 | 
19 |         data = get_response('http://livestream.longzhu.com/live/getlivePlayurl',
20 |                             params={
21 |                                 'roomId': self.mid,
22 |                                 'utmSr': '',
23 |                                 'platform': 'h5',
24 |                                 'device': 'pc'
25 |                             }).json()['playLines']
26 |         assert data, 'Live is offline!!'
27 | 
28 |         for i in data[0]['urls']:
29 |             ext = i['ext']
30 |             info.streams[ext] = {
31 |                 'container': ext,
32 |                 'profile': i['description'],
33 |                 'src': [i['securityUrl']]
34 |             }
35 | 
36 |         return info
37 | 
38 | site = LongzhuLive()
39 | 


--------------------------------------------------------------------------------
/ykdl/extractors/mgtv.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from ._common import *
  4 | 
  5 | 
  6 | encode_translation = bytes.maketrans(b'+/=', b'_~-')
  7 | decode_translation = bytes.maketrans(b'_~-', b'+/=')
  8 | 
  9 | def encode_tk2(s):
 10 |     s = bytearray(base64.b64encode(s.encode()).translate(encode_translation))
 11 |     s.reverse()
 12 |     return s.decode()
 13 | 
 14 | def decode_tk2(s):
 15 |     if not isinstance(s, bytes):
 16 |         s = s.encode()
 17 |     s = bytearray(s)
 18 |     s.reverse()
 19 |     s = base64.b64decode(s.translate(decode_translation))
 20 |     return s.decode()
 21 | 
 22 | def generate_tk2(did):
 23 |     s = 'did={}|pno=1030|ver=0.3.0301|clit={}'.format(did, int(time.time()))
 24 |     return encode_tk2(s)
 25 | 
 26 | class Hunantv(Extractor):
 27 |     name = '芒果TV (HunanTV)'
 28 | 
 29 |     profile_2_id = {
 30 |       '复刻版': 'BD',
 31 |         '蓝光': 'BD',
 32 |         '超清': 'TD',
 33 |         '高清': 'HD',
 34 |         '标清': 'SD'
 35 |     }
 36 | 
 37 |     def prepare_mid(self):
 38 |         mid = match1(self.url, 'com/[bl]/\d+/(\d+).html',
 39 |                                'com/s/(\d+).html')
 40 |         if mid is None:
 41 |             html = get_content(self.url)
 42 |             if match1(self.url, 'com/h/(\d+).html'):
 43 |                 assert JSEngine, 'No JS Interpreter found!!!'
 44 |                 js_ctx = JSEngine()
 45 |                 js = match1(html, '<script>window.__NUXT__=(.+);</script>')
 46 |                 data = js_ctx.eval(js)
 47 |                 mid = match1(data, "PartId': '(\d+)'")
 48 |             else:
 49 |                 mid = match1(html,
 50 |                              'window.location = "/b/\d+/(\d+).html"',
 51 |                             r'routePath:"\\u002Fl\\u002F\d+\\u002F(\d+).html"',
 52 |                              'vid[=:]\D?(\d+)')
 53 |         return mid
 54 | 
 55 |     def prepare(self):
 56 |         info = MediaInfo(self.name)
 57 |         info.extra.referer = self.url
 58 |         install_cookie()
 59 | 
 60 |         did = get_random_uuid()
 61 |         tk2 = generate_tk2(did)
 62 |         params = {
 63 |             'tk2': tk2,
 64 |             'video_id': self.mid,
 65 |             'type': 'pch5'
 66 |         }
 67 |         data = get_response('https://pcweb.api.mgtv.com/player/video',
 68 |                             params=params).json()
 69 |         assert data['code'] == 200, ('[failed] code: {}, msg: {}'
 70 |                                      .format(data['code'], data['msg']))
 71 |         assert data['data'], '[Failed] Video info not found.'
 72 |         data = data['data']
 73 | 
 74 |         info.title = data['info']['title'] + ' ' + data['info']['desc']
 75 | 
 76 |         params['pm2'] = data['atc']['pm2']
 77 |         data = get_response('https://pcweb.api.mgtv.com/player/getSource',
 78 |                             params=params).json()
 79 |         assert data['code'] == 200, ('[failed] code: {}, msg: {}'
 80 |                                      .format(data['code'], data['msg']))
 81 |         assert data['data'], '[Failed] Video source not found.'
 82 |         data = data['data']
 83 | 
 84 |         domain = data['stream_domain'][0]
 85 |         for lstream in data['stream']:
 86 |             lurl = lstream['url']
 87 |             if lurl:
 88 |                 url = get_response(domain + lurl,
 89 |                                    params={'did': did}).json()['info']
 90 |                 stream_profile = lstream['name']
 91 |                 stream_id = self.profile_2_id[stream_profile]
 92 |                 info.streams[stream_id] = {
 93 |                     'container': 'm3u8',
 94 |                     'profile': stream_profile,
 95 |                     'src': [url]
 96 |                 }
 97 | 
 98 |         return info
 99 | 
100 | site = Hunantv()
101 | 


--------------------------------------------------------------------------------
/ykdl/extractors/miaopai.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | # BROKEN
 7 | 
 8 | api_info1 = 'https://n.miaopai.com/api/aj_media/info.json?smid={}&appid=530&_cb={}'
 9 | api_info2 = 'http://api.miaopai.com/m/v2_channel.json?fillType=259&scid={}&vend='
10 | api_stream = 'http://gslb.miaopai.com/stream/{}.json?vend='
11 | 
12 | class Miaopai(Extractor):
13 | 
14 |     name = '秒拍 (Miaopai)'
15 | 
16 |     def prepare_mid(self):
17 |         mid = match1(self.url, '/media/([^\./]+)')
18 |         if mid is None:
19 |             html = get_content(self.url)
20 |             mid = match1(html, 's[cm]id ?= ?[\'"]([^\'"]+)[\'"]')
21 |         return mid
22 | 
23 |     def prepare(self):
24 |         info = MediaInfo(self.name)
25 |         title = None
26 | 
27 |         if 'show' in self.url:
28 |             new_url = get_location(self.url)
29 |             if new_url != self.url:
30 |                 self.logger.debug('redirect to' + new_url)
31 |                 self.url = new_url
32 | 
33 |         if len(self.mid) > 24:
34 |             add_header('Referer', self.url)
35 |             cb = '_jsonp{}'.format(get_random_str(10).lower())
36 |             data = get_response(api_info1.format(self.mid, cb)).json()
37 |             data = json.loads(json_html[json_html.find('{'):-2])
38 |             assert data['code'] == 200, data['msg']
39 | 
40 |             data = data['data']
41 |             title = data['description']
42 |             url = data['meta_data'][0]['play_urls']['m']
43 |             _, ext, _ = url_info(url)
44 |         
45 |         else:
46 |             try:
47 |                 data = get_response(api_info2.format(self.mid)).json()
48 |                 assert data['status'] == 200, data['msg']
49 | 
50 |                 data = data['result']
51 |                 title = data['ext']['t']
52 |                 scid = data['scid'] or self.mid
53 |                 ext = data['stream']['and']
54 |                 base = data['stream']['base']
55 |                 vend = data['stream']['vend']
56 |                 url = '{base}{scid}.{ext}?vend={vend}'.format(**vars())
57 |             except:
58 |                 # fallback
59 |                 data = get_response(api_stream.format(self.mid)).json()
60 |                 assert data['status'] == 200, data['msg']
61 | 
62 |                 data = data['result'][0]
63 |                 ext = None
64 |                 scheme = data['scheme']
65 |                 host = data['host']
66 |                 path = data['path']
67 |                 sign = data['sign']
68 |                 url = '{scheme}{host}{path}{sign}'.format(**vars())
69 | 
70 |         if not title:
71 |             html = get_content(self.url)
72 |             title = match1(html, '<meta name="description" content="([^"]+)">')
73 |         info.title = title
74 | 
75 |         info.streams['current'] = {
76 |             'container': ext or 'mp4',
77 |             'profile': 'current',
78 |             'src': [url]
79 |         }
80 |         return info
81 | 
82 |     def prepare_list(self):
83 |         html = get_content(self.url)
84 |         video_list = match1(html, 'video_list=\[([^\]]+)')
85 |         return matchall(video_list, '"([^",]+)')
86 | 
87 | site = Miaopai()
88 | 


--------------------------------------------------------------------------------
/ykdl/extractors/netease/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | def get_extractor(url):
 5 |     if 'v.163.com/movie/' in url:
 6 |         url = url.replace('v.163', 'open.163')
 7 |     if 'cc.163' in url:
 8 |         from . import livecc as s
 9 |     elif 'live.163' in url:
10 |         from . import live as s
11 |     elif 'open.163' in url or '/opencourse/' in url:
12 |         from . import openc as s
13 |     elif 'music.163' in url:
14 |         from . import music as s
15 |         return s.get_extractor(url)
16 |     elif '3g.163' in url:
17 |         from . import m3g as s
18 |     else:
19 |         from . import video as s
20 | 
21 |     return s.site, url
22 | 


--------------------------------------------------------------------------------
/ykdl/extractors/netease/live.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class NeteaseLive(Extractor):
 7 |     name = '网易直播 (163)'
 8 | 
 9 |     def prepare_mid(self):
10 |         return match1(self.url, 'room/(\d+)')
11 | 
12 |     def prepare(self):
13 |         info = MediaInfo(self.name, True)
14 | 
15 |         data = get_response(
16 |             'https://data.live.126.net/liveAll/{self.mid}.json'.format(**vars()),
17 |             params={'tt': int(time.time() * 1000)}
18 |         ).json()
19 |         assert 'liveVideoUrl' in data, 'live video is offline'
20 | 
21 |         info.title = data['roomName']
22 |         try:
23 |             info.artist = data['sourceinfo']['tname']
24 |         except KeyError:
25 |             pass
26 |         info.duration = duration = data.get('duration')
27 |         info.add_comment = data['channal']['name']
28 | 
29 |         url = data['liveVideoUrl']
30 |         info.streams['current'] = {
31 |             'container': url.split('.')[-1],
32 |             'profile': 'current',
33 |             'src': [url],
34 |             not duration and 'size': Infinity
35 |         }
36 |         return info
37 | 
38 | site = NeteaseLive()
39 | 


--------------------------------------------------------------------------------
/ykdl/extractors/netease/livecc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class NeteaseLive(Extractor):
 7 |     name = '网易CC直播 (163)'
 8 | 
 9 |     profile_2_id = {
10 |         '原画': 'OG',
11 |         '蓝光': 'BD',
12 |         '超清': 'TD',
13 |         '高清': 'HD',
14 |         '标清': 'SD',
15 |     }
16 | 
17 |     quality_2_profile = {
18 |      'blueray': '蓝光',
19 |        'ultra': '超清',
20 |         'high': '高清',
21 |     'standard': '标清',
22 |     }
23 | 
24 |     def prepare_mid(self):
25 |         return match1(self.url, '\D/(\d+)/?$')
26 | 
27 |     def prepare(self):
28 |         info = MediaInfo(self.name, True)
29 | 
30 |         html = get_content(self.url, headers={'Referer': 'https://cc.163.com/'})
31 |         data = match1(html, '<script id="__NEXT_DATA__".*?>(.*?)</script>')
32 |         #self.logger.debug('data:\n%s', data)  # too long
33 |         data = json.loads(data)
34 | 
35 |         def get_live_info(vbr=0):
36 |             params = vbr and {'vbr': vbr} or None
37 |             data = get_response('http://cgi.v.cc.163.com/video_play_url/{self.mid}'
38 |                                 .format(**vars()), params=params).json()
39 | 
40 |             stream_profile = data['vbrname_mapping'][data['pc_vbr_sel']]
41 |             stream_id = self.profile_2_id[stream_profile]
42 |             info.streams[stream_id] = {
43 |                 'container': 'flv',
44 |                 'profile': stream_profile,
45 |                 'src' : [data['videourl']],
46 |                 'size': Infinity
47 |             }
48 | 
49 |             if vbr == 0:
50 |                 vbr_sel = data['vbr_sel']
51 |                 for vbr in data['vbr_list']:
52 |                     if vbr != vbr_sel:
53 |                         get_live_info(vbr)
54 | 
55 |         try:
56 |             # project, select first living room
57 |             data = data['props']['pageProps']['data']
58 |             rooms = data['module_infos'][0]['content']
59 | 
60 |         except KeyError:
61 |             data = data['props']['pageProps']['roomInfoInitData']
62 |             assert 'micfirst' in data, 'unsupported live!'
63 | 
64 |             info.title = data['live']['title']
65 |             info.artist = data['micfirst']['nickname']
66 | 
67 |             try:
68 |                 streams = data['live']['quickplay']['resolution']
69 |             except KeyError:
70 |                 get_live_info()
71 |             else:
72 |                 for quality, stream in streams.items():
73 |                     stream_profile = self.quality_2_profile[quality]
74 |                     stream_id = self.profile_2_id[stream_profile]
75 |                     cdn = stream['cdn']
76 |                     cdn.pop('wy', None)  # UDP
77 |                     url = random.choice(list(cdn.values()))
78 |                     info.streams[stream_id] = {
79 |                         'container': 'flv',
80 |                         'profile': stream_profile,
81 |                         'src' : [url],
82 |                         'size': Infinity
83 |                     }
84 | 
85 |         else:
86 |             for room in rooms:
87 |                 if room['is_living']:
88 |                     self.mid = room['ccid']
89 |                     info.artist = room['name']
90 |                     break
91 |             info.title = data['share_title']
92 |             get_live_info()
93 | 
94 |         return info
95 | 
96 | site = NeteaseLive()
97 | 


--------------------------------------------------------------------------------
/ykdl/extractors/netease/m3g.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class m3g(SimpleExtractor):
 7 |     name = '网易手机网 (163 3g)'
 8 | 
 9 |     def init(self):
10 |         self.url_patterns = ['"contentUrl":"([^"]+)"', '<video\s+data-src="([^"]+)"']
11 |         self.title_pattern = 'class="title">(.+?)</'
12 | 
13 |     def get_url(self):
14 |         if self.url_patterns:
15 |             v_url = []
16 |             for url in matchall(self.html, *self.url_patterns):
17 |                 if url[:2] == '//':
18 |                     url = 'http:' + url
19 |                 if url not in v_url:
20 |                     v_url.append(url)
21 |             self.v_url = v_url
22 | 
23 | site = m3g()
24 | 


--------------------------------------------------------------------------------
/ykdl/extractors/netease/music/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | def get_extractor(url):
 5 |     add_header('Referer', 'http://music.163.com/')
 6 | 
 7 |     if '/program' in url:
 8 |         from . import program as s
 9 |     elif '/dj' in url:
10 |         from . import program as s
11 |     elif '/mv' in url:
12 |         from . import mv as s
13 |     else:
14 |         from . import music as s
15 | 
16 |     return s.site, url
17 | 


--------------------------------------------------------------------------------
/ykdl/extractors/netease/music/music.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ..._common import *
 4 | from .musicbase import NeteaseMusicBase
 5 | 
 6 | 
 7 | class NeteaseMusic(NeteaseMusicBase):
 8 |     name = 'Netease Music (网易云音乐)'
 9 |     api_url = 'http://music.163.com/api/song/detail/'
10 | 
11 |     def get_music(self, data):
12 |         return data['songs'][0]
13 | 
14 |     def prepare_list(self):
15 |         params = {
16 |             'id': self.mid,
17 |             'csrf_token': ''
18 |         }
19 |         if 'album' in self.url:
20 |             data =  get_response('http://music.163.com/api/album/' + self.mid,
21 |                                  params=params).json()
22 |             playlist = data['album']['songs']
23 |         elif 'playlist' in self.url or 'toplist' in self.url:
24 |             data =  get_response('http://music.163.com/api/playlist/detail',
25 |                                  params=params).json()
26 |             playlist = data['result']['tracks']
27 |         elif 'artist' in self.url:
28 |             data =  get_response('http://music.163.com/api/artist/' + self.mid,
29 |                                  params=params).json()
30 |             playlist = data['hotSongs']
31 | 
32 |         mids = [p['id'] for p in playlist]
33 |         self.set_index(self.mid, mids)
34 |         return mids
35 | 
36 | site = NeteaseMusic()
37 | 


--------------------------------------------------------------------------------
/ykdl/extractors/netease/music/musicbase.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ..._common import *
 4 | 
 5 | from Crypto.Cipher import AES
 6 | 
 7 | 
 8 | #consts here
 9 | first_key = '0CoJUm6Qyw8W8jud'
10 | iv = '0102030405060708'
11 | 
12 | def pksc7_padding(string):
13 |     aes_block_size = 16
14 |     padding_size = aes_block_size - len(string) % 16
15 |     return string.ljust(len(string)+padding_size, chr(padding_size))
16 | 
17 | def make_json_data(url_id):
18 |     fixed = {}
19 |     fixed['br'] = 128000
20 |     fixed['csrf_token'] = '' #in the cookie
21 |     fixed['ids'] = '[{}]'.format(url_id)
22 |     return json.dumps(fixed, separators=(',', ':'))
23 | 
24 | def RSA_string(input_str):
25 |     modular = 157794750267131502212476817800345498121872783333389747424011531025366277535262539913701806290766479189477533597854989606803194253978660329941980786072432806427833685472618792592200595694346872951301770580765135349259590167490536138082469680638514416594216629258349130257685001248172188325316586707301643237607
26 |     exp = 65537
27 | 
28 |     #first do LE packing
29 |     to_number = 0
30 |     rev_str = input_str[::-1]
31 |     for i in rev_str:
32 |         to_number = to_number * 256 + ord(i)
33 |     #then calc ras with exp and modular
34 |     encSecKey = hex(pow(to_number, exp, modular))[2:]
35 |     return encSecKey.rjust(256, '0')
36 | 
37 | def AES_128_CBC_b64_wrapper(data, key, iv):
38 |     obj = AES.new(key.encode(), AES.MODE_CBC, iv.encode())
39 |     input_data = pksc7_padding(data)
40 |     out = obj.encrypt(input_data.encode())
41 |     return base64.b64encode(out).decode()
42 | 
43 | def netease_req(ids='468490608', snd_key=None, encSecKey=None):
44 |     data = make_json_data(ids)
45 |     if snd_key is None:
46 |         snd_key = get_random_str(16, 'snd_key')
47 |         encSecKey = RSA_string(snd_key)
48 |     first_pass = AES_128_CBC_b64_wrapper(data, first_key, iv)
49 |     second_pass = AES_128_CBC_b64_wrapper(first_pass, snd_key, iv)
50 | 
51 |     payload = {}
52 |     payload['params'] = second_pass
53 |     payload['encSecKey'] = encSecKey
54 | 
55 |     return payload
56 | 
57 | class NeteaseMusicBase(Extractor):
58 | 
59 |     def prepare_mid(self):
60 |         return match1(self.url, r'\bid=(\w+)', 'song/(\d+)')
61 | 
62 |     def prepare(self):
63 |         info = MediaInfo(self.name)
64 | 
65 |         data = get_response(self.api_url, params={
66 |                                               'id': self.mid,
67 |                                              'ids': self.mid,
68 |                                       'csrf_token': ''
69 |                                           }).json()
70 |         data = self.get_music(data)
71 |         self.logger.debug('data:\n%s', data)
72 | 
73 |         info.title = data['name']
74 |         info.artist = data['artists'][0]['name']
75 | 
76 |         real_id = data['id']
77 |         snd_key = get_random_str(16, 'snd_key')
78 |         encSecKey = RSA_string(snd_key)
79 |         payload = netease_req(real_id, snd_key, encSecKey)
80 |         data = get_response(
81 |             'http://music.163.com/weapi/song/enhance/player/url?csrf_token=',
82 |             data=payload).json()['data'][0]
83 |         self.logger.debug('mp3 data:\n%s', data)
84 | 
85 |         info.streams['current'] =  {
86 |             'container': data['type'],
87 |             'profile': 'current',
88 |             'src' : [data['url']],
89 |             'size': data['size']
90 |         }
91 | 
92 |         return info
93 | 


--------------------------------------------------------------------------------
/ykdl/extractors/netease/music/mv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ..._common import *
 4 | 
 5 | 
 6 | class NeteaseMv(Extractor):
 7 |     name = 'Netease MV (网易音乐 MV)'
 8 | 
 9 |     resolution_2_id_profile = {
10 |         '1080': ['BD', '1080P'],
11 |          '720': ['TD', '超清'],
12 |          '480': ['HD', '高清'],
13 |          '240': ['SD', '标清']
14 |     }
15 | 
16 |     def prepare_mid(self):
17 |         return match1(self.url, '\?id=(.*)', 'mv/(\d+)')
18 | 
19 |     def prepare(self):
20 |         info = MediaInfo(self.name)
21 | 
22 |         data = get_response('http://music.163.com/api/mv/detail/',
23 |                           params={
24 |                               'id': self.mid,
25 |                              'ids': self.mid,
26 |                       'csrf_token': ''
27 |                           }).json()['data']
28 | 
29 |         info.title = data['name']
30 |         info.artist = data['artistName']
31 |         for resolution in self.resolution_2_id_profile.keys():
32 |             if resolution in data['brs']:
33 |                 stream_id, stream_profile = self.resolution_2_id_profile[id]
34 |                 info.streams[stream_id] = {
35 |                     'container': 'mp4',
36 |                     'profile': stream_profile,
37 |                     'src': [data['brs'][id]]
38 |                 }
39 | 
40 |         return info
41 | 
42 | site = NeteaseMv()
43 | 


--------------------------------------------------------------------------------
/ykdl/extractors/netease/music/program.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ..._common import *
 4 | from .musicbase import NeteaseMusicBase
 5 | 
 6 | 
 7 | class NeteaseDj(NeteaseMusicBase):
 8 |     name = 'Netease Dj (网易电台)'
 9 |     api_url = 'http://music.163.com/api/dj/program/detail/'
10 | 
11 |     def get_music(self, data):
12 |         return data['program']['mainSong']
13 | 
14 |     def prepare_list(self):
15 |         if 'djradio' in self.url:
16 |             data =  get_response(
17 |                         'http://music.163.com/api/dj/program/byradio/',
18 |                         params={
19 |                             'radioId': self.mid,
20 |                             'ids': self.mid,
21 |                             'csrf_token': '',
22 |                         }).json()
23 |             mids = [p['id'] for p in data['programs']]
24 |             self.set_index(self.mid, mids)
25 |             return mids
26 | 
27 | site = NeteaseDj()
28 | 


--------------------------------------------------------------------------------
/ykdl/extractors/netease/openc.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from .._common import *
  4 | 
  5 | 
  6 | assert JSEngine, "No JS Interpreter found, can't extract netease openCourse!"
  7 | 
  8 | class OpenC(Extractor):
  9 |     name = '网易公开课 (163 openCourse)'
 10 | 
 11 |     sopported_stream_types = [
 12 |         ['TD', 'Shd', '超清'], 
 13 |         ['HD',  'Hd', '高清'], 
 14 |         ['SD',  'Sd', '标清']
 15 |     ]
 16 |     name2lang = {
 17 |         '中文': 'zh',
 18 |         '英文': 'en'
 19 |     }
 20 | 
 21 |     def list_only(self):
 22 |         return self.mid[1] is None
 23 | 
 24 |     @staticmethod
 25 |     def format_mid(mid):
 26 |         if not isinstance(mid, tuple):
 27 |             mid = mid, None
 28 |         mid = mid[:2]
 29 |         assert len(mid) == 2 and mid[0]
 30 |         return mid
 31 | 
 32 |     def prepare_mid(self):
 33 |         return match1(self.url, r'\bpid=(\w+)'), match1(self.url, r'\bmid=(\w+)')
 34 | 
 35 |     @functools.cache
 36 |     def parse_html(self, url):
 37 |         html = get_content(url)
 38 |         js = match1(html, 'window\.__NUXT__=(.+);</script>')
 39 |         data = JSEngine().eval(js)
 40 |         self.logger.debug('data: \n%s', data)
 41 |         return data
 42 | 
 43 |     def prepare_data(self):
 44 |         url = 'https://open.163.com/newview/movie/free?pid={}'.format(self.mid[0])
 45 |         data = self.parse_html(url)
 46 |         try:
 47 |             self.url = data['data'][0]['playUrl']
 48 |         except KeyError:
 49 |             return data
 50 |         else:
 51 |             self.mid = None
 52 |             return self.prepare_data()
 53 | 
 54 |     def prepare(self):
 55 |         info = MediaInfo(self.name)
 56 | 
 57 |         data = self.prepare_data()
 58 |         moiveList = data['state']['movie']['moiveList']
 59 |         if not moiveList:
 60 |             return
 61 | 
 62 |         mid = self.mid[1]
 63 |         for movie in moiveList:
 64 |             if movie['mid'] == mid:
 65 |                 break
 66 |         assert movie['mid'] == mid, "can't found mid %r" % mid
 67 | 
 68 |         title = data['data'][0]['title']
 69 |         mtitle = movie['title'].rpartition(title)[-1]
 70 |         if mtitle:
 71 |             for sp in ['：', '】']:
 72 |                 t1, _, t2 = mtitle.partition(sp)
 73 |                 if title.startswith(t1):
 74 |                     mtitle = t2
 75 |                     break
 76 |         if mtitle:
 77 |             p = movie['pNumber']
 78 |             pc = len(moiveList)
 79 |             if pc > 1 and not mtitle[0].isdecimal() and str(p) not in mtitle:
 80 |                 pl = 0
 81 |                 while pc:
 82 |                     pl += 1
 83 |                     pc //= 10
 84 |                 mtitle = ('{:0>%dd} {}' % pl).format(p, mtitle)
 85 |             title = '{title} - {mtitle}'.format(**vars())
 86 |         school_info = data['data'][0]
 87 |         school = school_info['school']
 88 |         director = school_info['director']
 89 |         if director and director != 'null':
 90 |             if director != school :
 91 |                 director = '[{school}] {director}'.format(**vars())
 92 |         else:
 93 |             director = school
 94 |         if school not in title:
 95 |             title = '[{school}] {title}'.format(**vars())
 96 |         info.title = title
 97 |         info.artist = director
 98 | 
 99 |         for stream_id, tp, stream_profile in self.sopported_stream_types:
100 |             for ext in ['mp4', 'm3u8']:
101 |                 for orig in ['', 'Orign']:
102 |                     if stream_id in info.streams:
103 |                         continue
104 |                     url = movie['{ext}{tp}Url{orig}'.format(**vars())]
105 |                     if not url:
106 |                         continue
107 |                     size = movie['{ext}{tp}Size{orig}'.format(**vars())]
108 |                     info.streams[stream_id] = {
109 |                         'container': ext,
110 |                         'profile': stream_profile,
111 |                         'src' : [url],
112 |                         'size': size
113 |                     }
114 | 
115 |         nlang = 0
116 |         for sub in movie['subList']:
117 |             name = sub['subName']
118 |             if not name:
119 |                 if nlang:
120 |                     name = movie['subtitle']
121 |                 else:
122 |                     name = '中文'
123 |                 nlang += 1
124 |             lang = self.name2lang[name]
125 |             info.subtitles.append({
126 |                 'lang': lang,
127 |                 'name': name,
128 |                 'format': 'srt',
129 |                 'src' : sub['subUrl'],
130 |                 'size': sub['subSize']
131 |             })
132 | 
133 |         return info
134 | 
135 |     def prepare_list(self):
136 |         data = self.prepare_data()
137 |         pid, mid = self.mid
138 |         mids = [movie['mid'] for movie in data['state']['movie']['moiveList']]
139 |         self.set_index(mid, mids)
140 |         for mid in mids:
141 |             yield pid, mid
142 | 
143 | site = OpenC()
144 | 


--------------------------------------------------------------------------------
/ykdl/extractors/netease/video.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class NeteaseVideo(Extractor):
 7 |     name = '网易视频 (163)'
 8 | 
 9 |     def prepare_mid(self):
10 |         return match1(self.url, '(\w+)\.html')
11 | 
12 |     def prepare(self):
13 |         info = MediaInfo(self.name)
14 | 
15 |         data = get_response('https://so.v.163.com/v6/video/videodetail.do',
16 |                             params={
17 |                                'vid': self.mid,
18 |                                'adapter': 1
19 |                             }).json()
20 |         assert data['code'] == 1, data['msg']
21 |         data = data['data']
22 | 
23 |         info.title = data['title']
24 |         info.artist = data.get('username')
25 |         info.add_comment(data['keywords'])
26 |         info.streams['current'] = {
27 |             'container': 'mp4',
28 |             'profile': 'current',
29 |             'src': [data['url']]
30 |         }
31 | 
32 |         return info
33 | 
34 | site = NeteaseVideo()
35 | 


--------------------------------------------------------------------------------
/ykdl/extractors/pps.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | def gsign(params):
 7 |     s = []
 8 |     for key in sorted(params.keys()):
 9 |         s.append('{}:{}'.format(key, params[key]))
10 |     s.append('w!ytDgy#lEXWoJmN4HPf')
11 |     return hash.sha1(''.join(s))
12 | 
13 | def getlive(mid, rate='source'):
14 |     params = {
15 |         'type_id': 1,
16 |         'vid': 1,
17 |         'anchor_id': mid,
18 |         'app_key': 'show_web_h5',
19 |         'version': '1.0.0',
20 |         'platform': '1_10_101',
21 |         'time': int(time.time()),
22 |         'netstat': 'wifi',
23 |         'device_id': get_random_id(32, 'device'),
24 |         'bit_rate_type': rate,
25 |         'protocol': 5,
26 |     }
27 |     params['sign'] = gsign(params)
28 |     return get_response('https://m-glider-xiu.pps.tv/v2/stream/get.json',
29 |                         data=params).json()
30 | 
31 | class PPS(Extractor):
32 |     name = '奇秀（Qixiu)'
33 | 
34 |     rate_2_id_profile = {
35 |         'source': ['TD', '超清'],
36 |           'high': ['HD', '高清'],
37 |         'smooth': ['SD', '标清']
38 |     }
39 | 
40 |     def prepare_mid(self):
41 |         html = get_content(self.url)
42 |         return match1(html, '"user_id":"([^"]+)",')
43 | 
44 |     def prepare(self):
45 |         info = MediaInfo(self.name, True)
46 | 
47 |         html = get_content(self.url)
48 |         title = json.loads(match1(html, '"room_name":("[^"]*"),'))
49 |         artist = json.loads(match1(html, '"nick_name":("[^"]+"),'))
50 |         info.title = '{title} - {artist}'.format(**vars())
51 |         info.artist = artist
52 | 
53 |         def get_live_info(rate='source'):
54 |             data = getlive(self.mid, rate)
55 |             if data['code'] != 'A00000':
56 |                 return data.get('msg')
57 | 
58 |             data = data['data']
59 |             url = data.get('https_flv') or data.get('flv') or data.get('rtmp')
60 |             if url:
61 |                 url = url.replace('rtmp://', 'http://')
62 |                 ran = random.randrange(1e4)
63 |                 sep = '?' in url and '&' or '?'
64 |                 url = '{url}{sep}ran={ran}'.format(**vars())
65 |                 stream_id, stream_profile = self.rate_2_id_profile[rate]
66 |                 info.streams[stream_id] = {
67 |                     'container': 'flv',
68 |                     'profile': stream_profile,
69 |                     'src' : [url],
70 |                     'size': Infinity
71 |                 }
72 | 
73 |             error_msges = []
74 |             if rate == 'source':
75 |                 rate_list = data['rate_list']
76 |                 if 'source' in rate_list:
77 |                     rate_list.remove('source')
78 |                     for rate in rate_list:
79 |                         error_msg = get_live_info(rate)
80 |                         if error_msg:
81 |                             error_msges.append(error_msg)
82 |             if error_msges:
83 |                 return ', '.join(error_msges)
84 | 
85 |         error_msg = get_live_info()
86 |         if error_msg:
87 |             self.logger.debug('error_msg:\n\t' + error_msg)
88 | 
89 |         return info
90 | 
91 | site = PPS()
92 | 


--------------------------------------------------------------------------------
/ykdl/extractors/pptv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class PPTV(Extractor):
 7 |     # https://tv.pptv.com/
 8 |     name = 'PPTV (PP聚力)'
 9 | 
10 |     id_2_profile = {
11 |         'BD': '蓝光',
12 |         'TD': '超清',
13 |         'HD': '高清',
14 |         'SD': '高清',
15 |         'LD': '流畅'
16 |     }
17 | 
18 |     def prepare_mid(self):
19 |         html = get_content(self.url)
20 |         return match1(html, '"(?:c|ps)id":"?(\d+)')
21 | 
22 |     def prepare(self):
23 |         info = MediaInfo(self.name)
24 | 
25 |         #key = gen_key(int(time.time()) - 60)
26 |         data = get_response('https://web-play.pptv.com/webplay3-0-{self.mid}.xml'
27 |                             .format(**vars()),
28 |                             params={
29 |                                 'zone': 8,
30 |                                 'version': 4,
31 |                                 'username': '',
32 |                                 'ppi': '302c3333',
33 |                                 'type': 'ppbox.launcher',
34 |                                 'pageUrl': 'http://v.pptv.com',
35 |                                 'o': 0,
36 |                                 'referrer': '',
37 |                                 'kk': '',
38 |                                 'scver': 1,
39 |                                 'appplt': 'flp',
40 |                                 'appid': 'pptv.flashplayer.vod',
41 |                                 'appver': '3.4.3.3',
42 |                                 'nddp': 1
43 |                             }).xml()['root']
44 |         assert 'error' not in data, data['error'][0]['@message']
45 | 
46 |         info.title = data['channel'][0]['@nm']
47 |         for item, dt, drag in zip(data['channel'][0]['file'][0]['item'],
48 |                                   data['dt'],
49 |                                   data.get('dragdata') or data['drag']):
50 |             host = dt['sh']
51 |             rid = dt['@rid']
52 |             params = urlencode({
53 |                 #'key': key,  # it is now useless
54 |                 'k': unquote(dt['key'][0]['#text']),
55 |                 'fpp.ver': '1.3.0.23',
56 |                 'type': 'ppbox.launcher'
57 |             })
58 |             urls = []
59 |             for seg in drag['sgm']:
60 |                 no = seg['@no']
61 |                 urls.append('http://{host}/{no}/{rid}?{params}'.format(**vars()))
62 | 
63 |             stream_id = format_vps(item['@width'], item['@height'])[0]
64 |             stream_profile = self.id_2_profile[stream_id]
65 |             info.streams[stream_id] = {
66 |                 'container': 'mp4',
67 |                 'profile': stream_profile,
68 |                 'src' : urls,
69 |                 'size': int(item['@filesize'])
70 |             }
71 | 
72 |         return info
73 | 
74 | site = PPTV()
75 | 


--------------------------------------------------------------------------------
/ykdl/extractors/qq/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | def get_extractor(url):
 5 |     if 'live.qq' in url:
 6 |         from . import live as s
 7 |     elif 'egame.qq' in url:
 8 |         from . import egame as s
 9 |     else:
10 |         from . import video as s
11 | 
12 |     return s.site, url
13 | 


--------------------------------------------------------------------------------
/ykdl/extractors/qq/egame.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | assert JSEngine, "No JS Interpreter found, can't extract egame live!"
 7 | 
 8 | 
 9 | class QQEGame(Extractor):
10 |     name = 'QQ EGAME (企鹅电竟)'
11 | 
12 |     lv_2_id = {
13 |         10: 'BD10M',
14 |          8: 'BD8M',
15 |          6: 'BD6M',
16 |          4: 'BD4M',
17 |          3: 'TD',
18 |          2: 'HD',
19 |          1: 'SD',
20 |     }
21 | 
22 |     @staticmethod
23 |     def format_mid(mid):
24 |         mid = fullmatch(mid, '\d+')
25 |         assert mid
26 |         return mid
27 | 
28 |     def prepare_mid(self):
29 |         return match1(self.url, '/(\d+)')
30 | 
31 |     def prepare(self):
32 |         info = MediaInfo(self.name, True)
33 | 
34 |         if self.url is None:
35 |             self.url = 'https://egame.qq.com/' + self.mid
36 |         html = get_content(self.url)
37 | 
38 |         js_nuxt = match1(html, '<script>window.__NUXT__=(.+?)</script>')
39 |         js_ctx = JSEngine()
40 |         data = js_ctx.eval(js_nuxt)
41 |         self.logger.debug('data:\n%s', data)
42 | 
43 |         state = data.get('state', {})
44 |         error = data.get('error') or state.get('errors')
45 |         assert not error, 'error: {error}!!'.format(**vars())
46 | 
47 |         liveInfo = state['live-info']['liveInfo']
48 |         videoInfo = liveInfo['videoInfo']
49 |         profileInfo = liveInfo['profileInfo']
50 |         assert profileInfo['isLive'], 'error: live show is not on line!!'
51 | 
52 |         title = videoInfo['title']
53 |         info.artist = artist = profileInfo['nickName']
54 |         info.title = '{title} - {artist}'.format(**vars())
55 | 
56 |         for s in videoInfo['streamInfos']:
57 |             stream_id = self.lv_2_id[s['levelType']]
58 |             info.streams[stream_id] = {
59 |                 'container': 'flv',
60 |                 'profile': s['desc'],
61 |                 'src' : [s['playUrl']],
62 |                 'size': Infinity
63 |             }
64 | 
65 |         return info
66 | 
67 | 
68 | site = QQEGame()
69 | 


--------------------------------------------------------------------------------
/ykdl/extractors/qq/live.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class QQLive(Extractor):
 7 |     name = 'QQ Live (企鹅直播)'
 8 | 
 9 |     def prepare_mid(self):
10 |         mid = match1(self.url, '/(\d+)')
11 |         if mid is None:
12 |             html = get_content(self.url)
13 |             mid = match1(html, '"room_id":(\d+)')
14 |         return mid
15 | 
16 |     def prepare(self):
17 |         info = MediaInfo(self.name, True)
18 | 
19 |         #from upstream!!
20 |         data = get_response(
21 |             'http://www.qie.tv/api/v1/room/{self.mid}'.format(**vars())).json()
22 |         assert data['error'] == 0, '{error}: {data}'.format(**data)
23 | 
24 |         livedata = data['data']
25 |         assert livedata['show_status'] == '1', 'live is offline!!'
26 | 
27 |         info.title = livedata['room_name']
28 |         info.artist = livedata['nickname']
29 | 
30 |         info.streams['current'] = {
31 |             'container': 'flv',
32 |             'profile': 'current',
33 |             'src' : ['{rtmp_url}/{rtmp_live}'.format(**livedata)],
34 |             'size': Infinity
35 |         }
36 |         return info
37 | 
38 | site = QQLive()
39 |             
40 | 


--------------------------------------------------------------------------------
/ykdl/extractors/sina/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | def get_extractor(url):
 5 |     if 'open.sina' in url:
 6 |         from . import openc as s
 7 |     elif '.ivideo.sina' in url:
 8 |         from . import embed as s
 9 |     else:
10 |         from . import video as s
11 | 
12 |     return s.site, url
13 | 


--------------------------------------------------------------------------------
/ykdl/extractors/sina/embed.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | class Embed(Extractor):
 7 |     name = '新浪视频 (sina)'
 8 | 
 9 |     def prepare(self):
10 |         info = MediaInfo(self.name)
11 | 
12 |         vid = match1(self.url, '/(\d+)\.mp4', 'vid=(\d+)')
13 |         url = 'https://ask.ivideo.sina.com.cn/v_play_ipad.php?' + urlencode({'vid': vid})
14 | 
15 |         info.streams['current'] = {
16 |             'container': 'mp4',
17 |             'profile': 'current',
18 |             'src': [url]
19 |         }
20 |         return info
21 | 
22 | site = Embed()
23 | 


--------------------------------------------------------------------------------
/ykdl/extractors/sina/openc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | def get_k(vid, rand):
 7 |     t = str(int(time.time()) >> 6)
 8 |     s = '{vid}Z6prk18aWxP278cVAH{t}{rand}'.format(**vars())
 9 |     return hash.md5(s)[:16] + t
10 | 
11 | class OpenC(Extractor):
12 |     name = 'Sina openCourse (新浪公开课)'
13 | 
14 |     def format_mid(self, mid):
15 |         # [0] course id
16 |         # [1] lesson id
17 |         if not isinstance(mid, tuple):
18 |             mid = mid, None
19 |         mid = mid[:2]
20 |         if len(mid) == 1:
21 |             mid += (None, )
22 |         cid, lid = mid
23 |         cid = fullmatch(cid, '\d+')
24 |         lid = fullmatch(lid, '\d+')
25 |         assert cid
26 |         return cid, lid
27 | 
28 |     def prepare_mid(self):
29 |         mid = matchm(self.url, '/course/id_(\d+)/lesson_(\d+)',
30 |                                '/course/id_(\d+)')
31 |         if mid[0]:
32 |             return mid
33 | 
34 |     def list_only(self):
35 |         return not self.mid[1]
36 | 
37 |     def prepare(self):
38 |         info = MediaInfo(self.name)
39 | 
40 |         cid, lid = self.mid
41 |         if lid is None:
42 |             url = 'https://open.sina.com.cn/course/id_{cid}/'
43 |         else:
44 |             url = 'https://open.sina.com.cn/course/id_{cid}/lesson_{lid}/'
45 |         html = get_content(url.format(**vars()))
46 |         vid = match1(html, 'playVideo\("(\d+)')
47 |         info.artist = match1(html, '讲师：(.+?)<br/>')
48 | 
49 |         assert vid, "can't find vid!"
50 | 
51 |         rand = str(random.random())[:18]
52 |         data = get_response('http://ask.ivideo.sina.com.cn/v_play.php',
53 |                             params={
54 |                                'vid': vid,
55 |                                'ran': rand,
56 |                                'p': 'i',
57 |                                'k': get_k(vid, rand),
58 |                             }).xml()['root']
59 | 
60 |         info.title = data['vname']
61 |         urls = []
62 |         size = 0
63 |         for durl in data['durl']:
64 |             urls.append(durl['url'])
65 |             size += durl['filesize']
66 | 
67 |         info.streams['current'] = {
68 |             'container': 'hlv',
69 |             'profile': 'current',
70 |             'src' : urls,
71 |             'size': size
72 |         }
73 |         return info
74 | 
75 |     def prepare_list(self):
76 |         cid, lid = self.mid
77 |         url = 'https://open.sina.com.cn/course/id_{cid}/'
78 |         html = get_content(url.format(**vars()))
79 |         lids = [None] + matchall(html, '/lesson_(\d+)/">')
80 |         self.set_index(lid, lids)
81 |         for lid in lids:
82 |             yield cid, lid
83 | 
84 | site = OpenC()
85 | 


--------------------------------------------------------------------------------
/ykdl/extractors/sina/video.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | def get_realurl(url, vid):
 7 |     params = urlencode({'vid': vid})
 8 |     url = '{url}?{params}'.format(**vars())
 9 |     if get_location(url) != url:
10 |         return url  # redirect url will be expired, keep origin
11 |     html = get_content(url)
12 |     print(html)
13 |     return matchall(html, 'CDATA\[([^\]]+)')[1]
14 | 
15 | class Sina(Extractor):
16 |     name = '新浪视频 (sina)'
17 | 
18 |     def prepare_mid(self):
19 |         mid = match1(self.url, 'video_id=(\d+)', '(\d{5,})\.swf')
20 |         if mid:
21 |             return mid
22 |         html = get_content(self.url)
23 |         return match1(html, '[vV]ideo_?[iI]d[\'"]?\s*[:=]\s*[\'"]?(\d+)')
24 | 
25 |     def prepare(self):
26 |         info = MediaInfo(self.name)
27 | 
28 |         data = get_response('https://s.video.sina.com.cn/video/h5play',
29 |                             params={'video_id': self.mid}).json()
30 |         assert data['code'] == 1, data['message']
31 |         data = data['data']
32 | 
33 |         info.title = data['title']
34 |         info.duration = int(data['length']) // 1000
35 | 
36 |         for t in ['mp4', 'flv', 'hlv', '3gp']:
37 |             video_info = data['videos'].get(t)
38 |             if video_info:
39 |                 break
40 | 
41 |         for profile in video_info:
42 |             v = video_info[profile]
43 |             url = get_realurl(v['file_api'], v['file_id'])
44 |             info.streams[profile] = {
45 |                 'container': v['type'],
46 |                 'profile': profile,
47 |                 'src': [url]
48 |             }
49 | 
50 |         return info
51 | 
52 |     def prepare_list(self):
53 |         html = get_content(self.url)
54 |         return matchall(html, 'video_id: ([^,]+)')
55 | 
56 | site = Sina()
57 | 


--------------------------------------------------------------------------------
/ykdl/extractors/singlemultimedia.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from ._common import *
  4 | 
  5 | 
  6 | # TODO: add more supported types and move to ykdl.util
  7 | # REF: https://www.iana.org/assignments/media-types/media-types.xhtml
  8 | 
  9 | contentTypes = {
 10 |     'audio/basic': 'au',
 11 |     'audio/mpeg': 'mp3',
 12 |     'audio/x-aiff': 'aif',
 13 |     'audio/x-pn-realaudio': 'ra',
 14 |     'audio/x-wav': 'wav',
 15 |     'video/3gpp': '3gp',
 16 |     'video/3gpp2': '3p2',
 17 |     'video/avi': 'avi',
 18 |     'video/mp2t': 'ts',
 19 |     'video/mp4': 'mp4',
 20 |     'video/mpeg': 'mp2v',
 21 |     'video/mpeg4': 'mp4',
 22 |     'video/mpg': 'mpg',
 23 |     'video/ogg': 'ogg',
 24 |     'video/quicktime': 'mov',
 25 |     'video/vnd.mpegurl': 'mxu',
 26 |     'video/vnd.ms-playready.media.pyv': 'pyv',
 27 |     'video/vnd.rn-realvideo': 'rv',
 28 |     'video/vnd.uvvu.mp4': 'uvu',
 29 |     'video/vnd.vivo': 'viv',
 30 |     'video/webm': 'webm',
 31 |     'video/x-f4v': 'f4v',
 32 |     'video/x-fli': 'fli',
 33 |     'video/x-flv': 'flv',
 34 |     'video/x-ivf': 'IVF',
 35 |     'video/x-sgi-movie': 'movie',
 36 |     'video/x-m4v': 'm4v',
 37 |     'video/x-mpeg': 'mpe',
 38 |     'video/x-mpg': 'mpa',
 39 |     'video/x-msvideo': 'avi',
 40 |     'video/x-ms-asf': 'asf',
 41 |     'video/x-ms-wm': 'wm',
 42 |     'video/x-ms-wmv': 'wmv',
 43 |     'video/x-ms-wmx': 'wmx',
 44 |     'video/x-ms-wvx': 'wvx',
 45 |     'application/x-mpegurl': 'm3u8',
 46 |     'application/vnd.apple.mpegurl': 'm3u8',
 47 |     'application/vnd.rn-realmedia': 'rm',
 48 |     'application/vnd.rn-realmedia-secure': 'rms',
 49 |     'application/vnd.rn-realmedia-vbr': 'rmvb',
 50 | }
 51 | 
 52 | extNames = {
 53 |     # video
 54 |     'm2ts',  'mts',  'm2t', 'ts',   'mkv',  'avi',  # contain
 55 |     'mpeg',  'mpg',  'm1v', 'mpv',  'dat',          # MPEG-1
 56 |     'mpeg2', 'mpg2', 'm2v', 'mpv2', 'mp2v', 'vob',  # MPEG-2
 57 |     'mpeg4', 'mpg4', 'm4v', 'mp4',  'mp4v',         # H.264/MPEG-4 AVC
 58 |     'flv',   'f4v',                   # Flash Video # H.264/MPEG-4 AVC
 59 |     '3gpp',  '3gp2', '3gp', '3g2',                  # H.264/MPEG-4 AVC
 60 |     'h264',  'x264', '264', 'avc',                  # H.264/MPEG-4 AVC
 61 |     'h265',  'x265', '265', 'hevc',                 # H.265/HEVC
 62 |     'webm',                                         # WebM
 63 |     'ogv',                                          # Ogg Media File
 64 |     'rm',  'rmvb',                                  # Real Video
 65 |     'mov', 'hdmov', 'qt',                           # QuickTime 
 66 |     'asf', 'wmv',   'wm',                           # Windows Media Video
 67 |     # audio
 68 |     'mpa',  'mp1', 'm1a', 'mp2', 'm2a', 'mp3', 'm4a',
 69 |     'weba', 'f4a', 'ra',  'ogg', 'oga', 'wav', 'wma',
 70 |     'flac', 'ape', 'mka', 'dts', 'aac', 'ac3', 'opus'
 71 |     # picture
 72 |     'jpeg', 'jpe', 'jpg', 'jpc', 'jp2', 'j2k',
 73 |     'tiff', 'bmp', 'png', 'gif', 'jbg', 'webp',
 74 |     # HLS
 75 |     'm3u',
 76 |     *contentTypes.values()
 77 | }
 78 | 
 79 | class Multimedia(Extractor):
 80 |     name = 'Multimedia (多媒体文件)'
 81 | 
 82 |     def prepare(self):
 83 |         resinfo = get_head_response(url).info()
 84 |         # Get file type
 85 |         ext = self.url.split('?')[0].split('.')[-1]
 86 |         if ext not in extNames:
 87 |             ctype = resinfo.get('Content-Type', '').lower()
 88 |             if ctype.startswith('image/'):
 89 |                 ext = ctype[6:]
 90 |             else:
 91 |                 ext = contentTypes.get(ctype)
 92 |         assert ext in extNames, 'This link is not a Multimedia file!'
 93 | 
 94 |         # Get title
 95 |         title = resinfo.get_filename()
 96 |         if title is None:
 97 |             title = self.url.split('?')[0].split('/')[-1]
 98 |         if title.endswith('.' + ext):
 99 |             title = title[0 : -len(ext) - 1]
100 | 
101 |         info = MediaInfo(self.name)
102 |         info.title = title
103 |         if ext[:3] == 'm3u':
104 |             info.streams = load_m3u8_playlist(self.url)
105 |         else:
106 |             info.streams['current'] = {
107 |                 'container': ext,
108 |                 'profile': 'current',
109 |                 'src': [self.url],
110 |                 'size': int(resinfo.get('Content-Length', 0))
111 |             }
112 |         return info
113 | 
114 | site = Multimedia()
115 | 


--------------------------------------------------------------------------------
/ykdl/extractors/sohu/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | 
 5 | 
 6 | def get_extractor(url):
 7 |     path_b64 = match1(url, 'tv.sohu.com/v/(\w+=*)')
 8 |     if path_b64:
 9 |         path = unb64(path_b64, urlsafe=True)
10 |         if fullmatch(path, '[a-z]{2}/\d+/\d+\.shtml'):
11 |             url = 'https://my.tv.sohu.com/' + path
12 | 
13 |     if 'my.tv.sohu.com' in url:
14 |         from . import my as s
15 |         return s.site, url
16 |     else:
17 |         from . import tv as s
18 |         return s.site, url
19 | 
20 |     raise NotImplementedError(url)
21 | 


--------------------------------------------------------------------------------
/ykdl/extractors/sohu/my.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .sohubase import SohuBase
 4 | 
 5 | 
 6 | class MySohu(SohuBase):
 7 |     name = '搜狐自媒体 (MySohu)'
 8 | 
 9 |     apiurl = 'http://my.tv.sohu.com/play/videonew.do'
10 |     apiparams = {
11 |         'vid': '',
12 |         'referer': 'http://my.tv.sohu.com/'
13 |     }
14 | 
15 | site = MySohu()
16 | 


--------------------------------------------------------------------------------
/ykdl/extractors/sohu/sohubase.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from .._common import *
  4 | 
  5 | 
  6 | class SohuBase(Extractor):
  7 | 
  8 |     supported_stream_types = [
  9 |         #'h2654kVid',
 10 |         #'h2654mVid',
 11 |         #'h265oriVid',
 12 |         #'h265superVid',
 13 |         #'h265highVid',
 14 |         #'h265norVid',
 15 |         'h2644kVid',
 16 |         'oriVid',
 17 |         'superVid',
 18 |         'highVid',
 19 |         'norVid'
 20 |     ]
 21 |     types_2_id = {
 22 |         'h2654kVid': '4K',
 23 |         'h2654mVid': '4K',
 24 |         'h2644kVid': '4K',
 25 |         'h265oriVid': 'BD',
 26 |         'h265superVid': 'TD',
 27 |         'h265highVid': 'HD',
 28 |         'h265norVid': 'SD',
 29 |         'oriVid': 'BD',
 30 |         'superVid': 'TD',
 31 |         'highVid': 'HD',
 32 |         'norVid': 'SD'
 33 |     }
 34 |     id_2_profile = {
 35 |         '4K': '4K',
 36 |         'BD': '原画',
 37 |         'TD': '超清',
 38 |         'HD': '高清',
 39 |         'SD': '标清'
 40 |     }
 41 | 
 42 |     def parser_info(self, info, data, stream, lvid, uid):
 43 |         if not 'allot' in data or lvid != data['id']:
 44 |             return
 45 |         stream_id = self.types_2_id[stream]
 46 |         stream_profile = self.id_2_profile[stream_id]
 47 |         host = data['allot']
 48 |         data = data['data']
 49 |         size = sum(map(int, data['clipsBytes']))
 50 |         urls = []
 51 |         assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
 52 |         for new, ck, in zip(data['su'], data['ck']):
 53 |             if urlparse(new).netloc == '':
 54 |                 url = get_response('https://{host}/ip'.format(**vars()),
 55 |                                    params={
 56 |                                        'ch': data['ch'],
 57 |                                        'num': data['num'],
 58 |                                        'new': new,
 59 |                                        'key': ck,
 60 |                                        'uid': uid,
 61 |                                        'prod': 'h5n',
 62 |                                        'pt': 1,
 63 |                                        'pg': 2,
 64 |                                    }).json()['servers'][0]['url']
 65 |             else:
 66 |                 url = new
 67 |             urls.append(url)
 68 |         info.streams[stream_id] = {
 69 |             'container': 'mp4',
 70 |             'profile': stream_profile,
 71 |             'src' : urls,
 72 |             'size': size
 73 |         }
 74 | 
 75 |     def fetch_info(self, vid):
 76 |         self.apiparams['vid'] = vid
 77 |         return get_response(self.apiurl, params=self.apiparams).json()
 78 | 
 79 |     def prepare_mid(self):
 80 |         mid = match1(self.url, '\d/(\d+)\.s?html',
 81 |                               r'\b[bv]?id=(\d+)',
 82 |                                'share_play.html#(\d+)_')
 83 |         if mid is None:
 84 |             html = get_content(self.url)
 85 |             mid = match1(html, r'\b[bv]id\s*[=:]\s*["\']?(\d+)',
 86 |                                r'(?:&|\x26)[bv]?id=(\d+)'
 87 |                                 '/(\d+)/v\.swf')
 88 |         return mid
 89 | 
 90 |     def prepare(self):
 91 |         info = MediaInfo(self.name)
 92 |         # this is needless now, uid well be registered in the the following code
 93 |         #info.extra['header'] = 'Range: '
 94 | 
 95 |         data = self.fetch_info(self.mid)
 96 |         assert data['status'] == 1, data
 97 | 
 98 |         # report
 99 |         now = time.time()
100 |         uid = int(now * 1000)
101 |         get_response('http://z.m.tv.sohu.com/h5_cc.gif',
102 |                      params={
103 |                          'vid': self.mid,
104 |                          'url': self.url,
105 |                          'refer': self.url,
106 |                          't': int(now),
107 |                          'uid': uid,
108 |                          #'nid': nid,
109 |                          #'pid': pid,
110 |                          #'screen': '1366x768',
111 |                          #'channeled': channeled,
112 |                          #'MTV_SRC': MTV_SRC,
113 |                          #'position': 'page_adbanner',
114 |                          #'op': 'click',
115 |                          #'details': '{}',
116 |                          #'os': 'linux',
117 |                          #'platform': 'linux',
118 |                          #'passport': '',
119 |                      })
120 | 
121 |         _data = data['data']
122 |         info.title = _data['tvName']
123 |         for stream in self.supported_stream_types:
124 |             lvid = _data.get(stream)
125 |             if lvid == 0 or not lvid:
126 |                 continue
127 |             if lvid != self.mid:
128 |                 data = self.fetch_info(lvid)
129 |             self.parser_info(info, data, stream, lvid, uid)
130 | 
131 |         return info
132 | 


--------------------------------------------------------------------------------
/ykdl/extractors/sohu/tv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .._common import *
 4 | from .sohubase import SohuBase
 5 | 
 6 | 
 7 | class TvSohu(SohuBase):
 8 |     name = '搜狐视频 (TvSohu)'
 9 | 
10 |     apiurl = 'http://hot.vrs.sohu.com/vrs_flash.action'
11 |     apiparams = {'vid': ''}
12 | 
13 |     def list_only(self):
14 |         return bool(match(self.url, 'tv.sohu.com/s\d{4}/[a-z]'))
15 | 
16 |     def prepare_list(self):
17 |         html = get_content(self.url)
18 |         plid = match1(html, r'\bplaylistId\s*=\s*["\']?(\d+)')
19 |         data = get_response('https://pl.hd.sohu.com/videolist',
20 |                             params={
21 |                                 'playlistid': plid,
22 |                                 'order': 0,
23 |                                 'cnt': 1,
24 |                                 'withLookPoint': 1,
25 |                                 'preVideoRule': 3,
26 |                                 'ssl': 1,
27 |                                 'callback': '__get_videolist',
28 |                                 '_': int(time.time() * 1000)
29 |                             }).json()
30 |         mids = [str(v['vid']) for v in data['videos']]
31 |         mid = not self.list_only and self.mid or None
32 |         self.set_index(mid, mids)
33 |         return mids
34 | 
35 | site = TvSohu()
36 | 


--------------------------------------------------------------------------------
/ykdl/extractors/tudou.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | from .youku import Youku
 5 | 
 6 | class Tudou(Youku):
 7 |     name = 'Tudou (土豆)'
 8 | 
 9 |     def prepare(self):
10 |         if match1(self.url, '(new-play|video)\.tudou\.com/') is None:
11 |             self.url = get_location(self.url)
12 |         return Youku.prepare(self)
13 | 
14 | site = Tudou()
15 | 


--------------------------------------------------------------------------------
/ykdl/extractors/weibo.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from ._common import *
  4 | 
  5 | 
  6 | class Weibo(Extractor):
  7 |     name = '微博 (Weibo)'
  8 | 
  9 |     quality_2_id = {
 10 |            '4': '4K',
 11 |            '2': '2K',
 12 |         '1080': 'BD',
 13 |          '720': 'TD',
 14 |          '480': 'HD',
 15 |          '360': 'SD'
 16 |     }
 17 | 
 18 |     def prepare_mid(self):
 19 |         patterns = '(?:object|f)_?id"?\s*[=:]\s*"?(\d{4}:(?:\d{16}|\w{32}))\W', \
 20 |                    'media_id=(\d{16}|\w{32})'
 21 |         mid = match1(self.url, '\D(\d{4}:(?:\d{16}|\w{32}))(?:\W|$)',
 22 |                                *patterns)
 23 |         if mid:
 24 |             return mid
 25 | 
 26 |         rurl = get_location(self.url)
 27 |         page = match1(rurl, 'https?://[^/]+(/\d+/\w+)')
 28 |         if page is None or match1(page, '/(\d+)$'):
 29 |             html = get_content(rurl.replace('//weibo.', '//hk.weibo.')
 30 |                                    .replace('/user/', '/'))
 31 |             mid = match1(html, *patterns)
 32 |             if mid:
 33 |                 return mid
 34 |             page = match1(html, '"og:url".+weibo.com(/\d+/\w+)')
 35 | 
 36 |         assert page, 'can not find any video!!!'
 37 |         self.url = 'https://weibo.com' + page
 38 | 
 39 |         html = get_content(self.url)
 40 |         return match1(html, *patterns)
 41 | 
 42 |     def prepare(self):
 43 |         if 'passport.weibo' in self.url:
 44 |             url = parse_qs(self.url.split('?', 1)[-1]).get('url')
 45 |             assert url, 'lost the url param in a link of "passport.weibo"'
 46 |             self.url = url[0]
 47 | 
 48 |         info = MediaInfo(self.name)
 49 |         add_header('User-Agent', 'Baiduspider')
 50 | 
 51 |         if '.weibocdn.com' not in self.url:
 52 |             rurl = get_location(self.url)
 53 |             assert '/sorry?' not in rurl, 'can not find any video!!!'
 54 | 
 55 |         def append_stream(stream_profile, stream_quality, url):
 56 |             stream_id = self.quality_2_id[stream_quality]
 57 |             info.streams[stream_id] = {
 58 |                 'container': 'mp4',
 59 |                 'profile': stream_profile,
 60 |                 'src': [url]
 61 |             }
 62 | 
 63 |         try:
 64 |             self.mid
 65 |         except AssertionError:
 66 |             html = get_content(self.url)
 67 |             streams = match1(html, 'quality_label_list=([^"]+)').split('&')[0]
 68 |             if streams:
 69 |                 streams = json.loads(unquote(streams))
 70 |                 for stream in streams:
 71 |                     stream_quality = stream['quality_label'].upper()
 72 |                     stream_profile = stream['quality_desc'] + ' ' + stream_quality
 73 |                     stream_quality = match1(stream_quality, '(\d+)')
 74 |                     append_stream(stream_profile, stream_quality, stream['url'])
 75 |             else:
 76 |                 url = match1(html, 'action-data="[^"]+?&video_src=([^"&]+)')
 77 |                 if url:
 78 |                     info.streams['current'] = {
 79 |                         'container': 'mp4',
 80 |                         'profile': 'current',
 81 |                         'src': [unquote(url)]
 82 |                     }
 83 |             if info.streams:
 84 |                 info.title = match1(html, '<meta content="([^"]+)" name="description"').split('\n')[0]
 85 |                 info.artist = match1(html, '<meta content="([^"]+)" name="keywords"').split(',')[0]
 86 |                 i = info.title.find('】') + 1
 87 |                 if i:
 88 |                     info.title = info.title[:i]
 89 |                 return info
 90 | 
 91 |         if ':' not in self.mid:
 92 |             self.mid = '1034:' + self.mid  # oid, the prefix is not necessary and would not be checked
 93 |         vdata = get_response('https://weibo.com/tv/api/component',
 94 |                     headers={
 95 |                         'Referer': 'https://weibo.com/tv/show/' + self.mid
 96 |                     },
 97 |                     data={
 98 |                         'data': json.dumps({
 99 |                             'Component_Play_Playinfo': {'oid': self.mid}
100 |                         })
101 |                     }).json()['data']['Component_Play_Playinfo']
102 | 
103 |         for stream_profile, url in vdata['urls'].items():
104 |             if url:
105 |                 stream_quality = match1(stream_profile, '(\d+)')
106 |                 append_stream(stream_profile, stream_quality, 'https:' + url)
107 | 
108 |         info.title = vdata['title']
109 |         info.artist = vdata['author']
110 |         info.duration = vdata['duration']
111 |         info.add_comment(vdata['text'])
112 |         return info
113 | 
114 | site = Weibo()
115 | 


--------------------------------------------------------------------------------
/ykdl/extractors/yinyuetai.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class YinYueTai(Extractor):
 7 |     name = '音悦台 (YinYueTai)'
 8 | 
 9 |     def prepare_mid(self):
10 |         return match1(self.url,'\Wid=(\d+)')
11 | 
12 |     def prepare(self):
13 |         info = MediaInfo(self.name)
14 |         info.extra.referer = 'https://www.yinyuetai.com/'
15 | 
16 |         data = get_response('https://data.yinyuetai.com/video/getVideoInfo',
17 |                             params={'id': self.mid}).json()
18 |         assert not data['delFlag'], 'MTV has been deleted!'
19 | 
20 |         info.title = data['videoName']
21 |         info.artist = data['artistName']
22 | 
23 |         url = data['videoUrl']
24 |         info.streams['current'] = {
25 |             'container': url_info(url)[1],
26 |             'profile': 'current',
27 |             'src': [url]
28 |         }
29 | 
30 |         return info
31 | 
32 | site = YinYueTai()
33 | 


--------------------------------------------------------------------------------
/ykdl/extractors/yizhibo.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class Yizhibo(Extractor):
 7 |     name = 'Yizhibo (一直播)'
 8 | 
 9 |     def prepare_mid(self):
10 |         return self.url[self.url.rfind('/')+1:].split('.')[0]
11 | 
12 |     def prepare(self):
13 |         info = MediaInfo(self.name)
14 |         info.live = True
15 | 
16 |         data = get_response(
17 |                     'http://www.yizhibo.com/live/h5api/get_basic_live_info',
18 |                     params={'scid': self.mid}).json()
19 |         assert content['result'] == 1, 'Error : ' + data['result']
20 |         data = data['data']
21 | 
22 |         info.title = data['live_title']
23 |         info.artist = data['nickname']
24 |         info.streams['current'] = {
25 |             'container': 'm3u8',
26 |             'profile': 'current',
27 |             'src' : [data['play_url']],
28 |             'size': Infinity
29 |         }
30 |         return info
31 | 
32 | site = Yizhibo()
33 | 


--------------------------------------------------------------------------------
/ykdl/extractors/zhangyu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class ZYLive(SimpleExtractor):
 7 |     name = 'ZhangYu Live (章鱼直播)'
 8 | 
 9 |     def init(self):
10 |         self.headers['User-Agent'] = (
11 |                     'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) '
12 |                     'AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 '
13 |                     'Mobile/9A334 Safari/7534.48.3')
14 |         self.live = True
15 |         self.title_pattern = '<title>([^<]+)'
16 |         self.url_pattern = "<video _src='([^']+)"
17 |         self.artist_pattern = 'videoTitle = "([^"]+)'
18 | 
19 | site = ZYLive()
20 | 


--------------------------------------------------------------------------------
/ykdl/extractors/zhanqi.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class Zhanqi(Extractor):
 7 |     # live is down, all are playback
 8 |     name = '战旗 (zhanqi)'
 9 | 
10 |     def prepare(self):
11 |         info = MediaInfo(self.name)
12 |         install_cookie()
13 | 
14 |         html = get_content(self.url)
15 |         data = json.loads(match1(html, 'oPageConfig.oVideo = ({.+?});',
16 |                                        'oPageConfig.oRoom = ({.+?});'))
17 |         info.title = data['title']
18 |         info.artist = data['nickname']
19 |         if data.get('protocol') == 'hls':
20 |             info.streams = load_m3u8_playlist(data['playUrl'])
21 |             return info
22 | 
23 |         vid = data['videoId']
24 |         gid = get_response('https://www.zhanqi.tv/api/public/room.viewer',
25 |                            params={'uid': data['uid']}
26 |                            ).json()['data']['gid']
27 |         chain_key = get_response(
28 |                         'https://www.zhanqi.tv/api/public/burglar/chain',
29 |                         data={
30 |                             'stream': vid + '.flv',
31 |                             'cdnKey': 202,
32 |                             'platform': 128
33 |                         }).json()['data']['key']
34 |         pn = str(int(time.time() * 1e6))[-11:]
35 |         cdn_host = random.choice(get_response(
36 |                         'https://umc.danuoyi.alicdn.com/dns_resolve_https',
37 |                         params={
38 |                             'app': 'zqlive',
39 |                             'host_key': 'alhdl-cdn.zhanqi.tv',
40 |                             'stream': vid,
41 |                             'playNum': pn,
42 |                             'protocol': 'hdl',
43 |                             #'client_ip': '',
44 |                             'gId': gid,
45 |                             'platform': 128
46 |                         }).json()['redirect_domain'])
47 | 
48 |         # valid stream suffix: 1080p 720p 408p 360p
49 |         url = ('https://{cdn_host}/alhdl-cdn.zhanqi.tv/zqlive/'
50 |                '{vid}.flv?{chain_key}&'.format(**vars())
51 |               + urlencode({
52 |                     'playNum': '{pn}',
53 |                     'gId': gid,
54 |                     'ipFrom': 1,
55 |                     'clientIp': '',
56 |                     'fhost': 'h5',
57 |                     'platform': 128
58 |                 }))
59 |         info.streams['current'] = {
60 |             'container': 'flv',
61 |             'profile': 'current',
62 |             'src': [url]
63 |         }
64 |         return info
65 | 
66 | site = Zhanqi()
67 | 


--------------------------------------------------------------------------------
/ykdl/extractors/zhuafan.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ._common import *
 4 | 
 5 | 
 6 | class JustFunLive(Extractor):
 7 |     name = '抓饭直播 (JustFun Live)'
 8 | 
 9 |     def prepare_mid(self):
10 |         return match1(self.url, 'live/(\d+)')
11 | 
12 |     def prepare(self):
13 |         info = MediaInfo(self.name, True)
14 | 
15 |         try:
16 |             data = get_response(
17 |                     'https://www.zhuafan.tech/live-channel-info/channel/v2/info',
18 |                     params={
19 |                         'cid': self.mid,
20 |                         'decrypt': 1
21 |                     }).json()
22 |         except:
23 |             html = get_content(self.url)
24 |             data = match1(html, 'window\.__INITIAL_STATE__ = ({.+})</script>')
25 |             self.logger.debug('data:\n%s', data)
26 |             data = json.loads(data)['channel']
27 | 
28 |         assert data['playStatusCode'] == 0, data['playStatusCodeDesc']
29 | 
30 |         info.artist = data['uname']
31 |         info.title = data['cname']
32 | 
33 |         info.streams['OG-FLV'] = {
34 |             'container': 'flv',
35 |             'profile': 'current',
36 |             'src' : [data['httpsPlayInfo']],
37 |             'size': Infinity
38 |         }
39 |         info.streams['OG-HLS'] = {
40 |             'container': 'm3u8',
41 |             'profile': 'current',
42 |             'src' : [data['hlsPlayInfo']],
43 |             'size': Infinity
44 |         }
45 | 
46 |         return info
47 | 
48 | site = JustFunLive()
49 | 


--------------------------------------------------------------------------------
/ykdl/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LifeActor/ykdl/44cba0d088787a4c9aed62bba1ff84c7e4066c15/ykdl/util/__init__.py


--------------------------------------------------------------------------------
/ykdl/util/fs.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import sys
  4 | import platform
  5 | from .wrap import hash
  6 | 
  7 | 
  8 | if sys.platform.startswith(('msys', 'cygwin')):
  9 |     system = 'Windows'
 10 | else:
 11 |     system = platform.system()
 12 | 
 13 | translate_table = None
 14 | translate_table_cs = None
 15 | 
 16 | def _ensure_translate_table():
 17 |     global translate_table, translate_table_cs
 18 |     if translate_table is None:
 19 |         ### Visible ###
 20 |         # Control characters
 21 |         # Delete them except Tab and Newline
 22 |         translate_table = dict.fromkeys((*range(0x20), *range(0x7F, 0xA0)))
 23 |         translate_table.update({
 24 |             ord('\t'): ' ',
 25 |             ord('\n'): '-',
 26 |         })
 27 | 
 28 |         # Unicode Category Separator characters
 29 |         # Convert to Space
 30 |         translate_table.update(dict.fromkeys((
 31 |             # Generate:
 32 |             #   import sys
 33 |             #   from unicodedata import category 
 34 |             #   ', '.join((f'0x{u:X}'
 35 |             #              for u in range(0x20, sys.maxunicode)
 36 |             #              if category(chr(u))[0] == 'Z'))
 37 |             0x20, 0xA0,
 38 |             0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
 39 |             0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A,
 40 |             0x2028, 0x2029, 0x202F, 0x205F, 0x3000
 41 |         ), ' '))
 42 | 
 43 |         translate_table_cs = translate_table.copy()
 44 | 
 45 |         ### Legality ###
 46 |         translate_table.update({
 47 |             ord('/'): '／',  # File path component separator
 48 |         })
 49 |         if system == 'Windows':
 50 |             # FAT12 / FAT16 / FAT32 (VFAT LFNs)
 51 |             # exFAT
 52 |             # NTFS / ReFS (Win32 namespace)
 53 |             translate_table.update({
 54 |                 ord('\\'): '＼',
 55 |                 ord(':'): '꞉',
 56 |                 ord('*'): '∗',
 57 |                 ord('?'): '‽',
 58 |                 ord('"'): '″',
 59 |                 ord('<'): '＜',
 60 |                 ord('>'): '＞',
 61 |                 ord('|'): '¦',
 62 |             })
 63 |         elif system == 'Darwin':
 64 |             # HFS+ except longstanding cases
 65 |             if int(platform.release().split('.')[0]) < 17:
 66 |                 translate_table.update({
 67 |                     ord(':'): '꞉',
 68 |                 })
 69 | 
 70 | def legitimize(text, compress='', strip='', trim=82):
 71 |     '''Converts a string to a valid filename.
 72 |     Also see `help(compress_strip)`.
 73 |     '''
 74 |     _ensure_translate_table()
 75 |     text = text.translate(translate_table)
 76 |     text = compress_strip(text, compress, strip, True)
 77 | 
 78 |     assert text, 'the given filename could not be legalized!'
 79 | 
 80 |     result = text[:trim]
 81 |     overflow = text[trim:]
 82 |     if overflow:
 83 |         crc = hash.crc32(overflow)
 84 |         result += '_{crc}'.format(**vars())
 85 |     return result
 86 | 
 87 | def compress_strip(text, compress='', strip='', translated=False):
 88 |     '''Compress same characters, and then strip.
 89 |     Dot, Minus, Underline and whole characters of Unicode Category Separator
 90 |     will always be compressed and stripped.
 91 |     '''
 92 |     if not translated:
 93 |         _ensure_translate_table()
 94 |         text = text.translate(translate_table_cs)
 95 | 
 96 |     compress = set(c for c in compress + '.-_ ')
 97 |     chars = []
 98 |     last_char = None
 99 |     for char in text:
100 |         if not (char is last_char and char in compress):
101 |             chars.append(char)
102 |         last_char = char
103 |     return ''.join(chars).strip(strip + '.-_ ')
104 | 


--------------------------------------------------------------------------------
/ykdl/util/kt_player.py:
--------------------------------------------------------------------------------
 1 | '''Parse & decrypto license code for KVS Player.'''
 2 | # https://www.kernel-scripts.com/en/documentation/player/
 3 | 
 4 | from .match import *
 5 | from ..mediainfo import MediaStreams
 6 | 
 7 | import time
 8 | 
 9 | 
10 | __all__ = ['get_kt_playlist', 'get_kt_media']
11 | 
12 | profile_2_id = {
13 |     '2160P': '2K',
14 |     '1080P': 'BD',
15 |      '720P': 'TD',
16 |      '480P': 'HD',
17 |      '360P': 'SD',
18 |      '240P': 'LD'
19 | }
20 | 
21 | def get_license(html):
22 |     license = match1(html, '''license_code: ['"]\$(\d{15})['"]''')
23 |     if license is None:
24 |         return
25 |     mod = license.replace('0', '1')
26 |     mod = str(4 * abs(int(mod[:8]) - int(mod[-8:])))
27 |     nlicense = []
28 |     plicense = []
29 |     for o in range(8):
30 |         for i in range(4):
31 |             nlicense.append((int(license[o + i]) + int(mod[o])) % 10)
32 |     n = sum(nlicense)
33 |     for i, l in enumerate(nlicense):
34 |         plicense.append((i, (n + i) % 32))
35 |         n -= l
36 |     plicense.reverse()
37 |     return plicense
38 | 
39 | def decrypto(url, license):
40 |     l1 = match1(url, '/([\da-f]{42})/')
41 |     l2 = list(l1)
42 |     for i, l in license:
43 |         l2[i], l2[l] = l2[l], l2[i]
44 |     l2 = ''.join(l2)
45 |     return url.replace(l1, l2)
46 | 
47 | def get_urls(html):
48 |     rnd = int((time.time() - 10) * 1e3)
49 |     license = get_license(html)
50 |     if license is None:
51 |         return
52 |     for url in matchall(html, '''function/0/(http[^'"]+)/?['"]'''):
53 |         url = decrypto(url, license)
54 |         if '?' in url:
55 |             yield f'{url}&rnd={rnd}'
56 |         else:
57 |             yield f'{url}?rnd={rnd}'
58 | 
59 | def get_kt_playlist(html):
60 |     streams = MediaStreams()
61 |     for url in get_urls(html):
62 |         stream_profile = (match1(url, '_(\d+)p?.mp4') or '480') + 'P'
63 |         stream = profile_2_id[stream_profile]
64 |         streams[stream] = {
65 |             'container': 'mp4',
66 |             'profile': stream_profile,
67 |             'src': [url]
68 |         }
69 |     return streams
70 | 
71 | def get_kt_media(html):
72 |     return get_kt_playlist(html)[0]['src']
73 | 


--------------------------------------------------------------------------------
/ykdl/util/log.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | 
 3 | IS_ANSI_TERMINAL = os.getenv('TERM', '').startswith((
 4 |     'eterm-color',
 5 |     'linux',
 6 |     'screen',
 7 |     'vt100',
 8 |     'xterm'
 9 | ))
10 | 
11 | if not IS_ANSI_TERMINAL and os.name == 'nt':
12 |     try:
13 |         import colorama
14 |     except ImportError:
15 |         pass
16 |     else:
17 |         colorama.init()
18 |         IS_ANSI_TERMINAL = True
19 | 
20 | # ANSI escape code
21 | # REF: https://en.wikipedia.org/wiki/ANSI_escape_code
22 | RESET = 0
23 | BOLD = 1
24 | UNDERLINE = 4
25 | NEGATIVE = 7
26 | NO_BOLD = 21
27 | NO_UNDERLINE = 24
28 | POSITIVE = 27
29 | BLACK = 30
30 | RED = 31
31 | GREEN = 32
32 | YELLOW = 33
33 | BLUE = 34
34 | MAGENTA = 35
35 | CYAN = 36
36 | LIGHT_GRAY = 37
37 | DEFAULT = 39
38 | BLACK_BACKGROUND = 40
39 | RED_BACKGROUND = 41
40 | GREEN_BACKGROUND = 42
41 | YELLOW_BACKGROUND = 43
42 | BLUE_BACKGROUND = 44
43 | MAGENTA_BACKGROUND = 45
44 | CYAN_BACKGROUND = 46
45 | LIGHT_GRAY_BACKGROUND = 47
46 | DEFAULT_BACKGROUND = 49
47 | DARK_GRAY = 90                 # xterm
48 | LIGHT_RED = 91                 # xterm
49 | LIGHT_GREEN = 92               # xterm
50 | LIGHT_YELLOW = 93              # xterm
51 | LIGHT_BLUE = 94                # xterm
52 | LIGHT_MAGENTA = 95             # xterm
53 | LIGHT_CYAN = 96                # xterm
54 | WHITE = 97                     # xterm
55 | DARK_GRAY_BACKGROUND = 100     # xterm
56 | LIGHT_RED_BACKGROUND = 101     # xterm
57 | LIGHT_GREEN_BACKGROUND = 102   # xterm
58 | LIGHT_YELLOW_BACKGROUND = 103  # xterm
59 | LIGHT_BLUE_BACKGROUND = 104    # xterm
60 | LIGHT_MAGENTA_BACKGROUND = 105 # xterm
61 | LIGHT_CYAN_BACKGROUND = 106    # xterm
62 | WHITE_BACKGROUND = 107         # xterm
63 | 
64 | def sprint(text, *colors):
65 |     '''Format text with color or other effects into ANSI escaped string.'''
66 |     if IS_ANSI_TERMINAL and colors:
67 |         color = ';'.join(map(str, colors))
68 |         return '\33[{color}m{text}\33[0m'.format(**vars())
69 |     return text
70 | 
71 | import logging
72 | 
73 | _LOG_COLOR_MAP_ = {
74 |     logging.CRITICAL : '31;1',
75 |     logging.ERROR    : RED,
76 |     logging.WARNING  : YELLOW,
77 |     logging.INFO     : LIGHT_GRAY,
78 |     logging.DEBUG    : GREEN,
79 |     logging.NOTSET   : DEFAULT
80 | }
81 | 
82 | _colorFormatter = logging.Formatter('\33[%(color)sm%(levelname)s:%(name)s:%(message)s\33[0m')
83 | 
84 | class ColorHandler(logging.StreamHandler):
85 |     def __init__(self):
86 |         logging.StreamHandler.__init__(self)
87 |         if IS_ANSI_TERMINAL:
88 |             self.formatter = _colorFormatter
89 | 
90 |     def format(self, recode):
91 |         if IS_ANSI_TERMINAL:
92 |             recode.color = _LOG_COLOR_MAP_[recode.levelno]
93 |         return logging.StreamHandler.format(self, recode)
94 | 


--------------------------------------------------------------------------------
/ykdl/util/m3u8.py:
--------------------------------------------------------------------------------
  1 | from logging import getLogger
  2 | 
  3 | from ..mediainfo import MediaStreams
  4 | from .http import get_response
  5 | from .human import format_vps
  6 | 
  7 | logger = getLogger(__name__)
  8 | 
  9 | 
 10 | __all__ = ['live_m3u8', 'crypto_m3u8', 'load_m3u8_playlist', 'load_m3u8']
 11 | 
 12 | 
 13 | def live_error():
 14 |     raise NotImplementedError(
 15 |             'Internal live m3u8 parser and downloader had not '
 16 |             'be implementated! Please use FFmpeg instead.')
 17 | 
 18 | def load_live_m3u8(url):
 19 |     live_error()
 20 | 
 21 | def live_m3u8_lenth():
 22 |     live_error()
 23 | 
 24 | import m3u8
 25 | from urllib.parse import urljoin
 26 | 
 27 | class HTTPClient():
 28 |     hkwargs = {}
 29 |     def download(self, uri, timeout=None, headers={}, *args, **kwargs):
 30 |         # live is disabled, results can be cached safely
 31 |         response = get_response(uri, headers, cache=True, **self.hkwargs)
 32 |         return response.text, urljoin(response.url, '.')
 33 | 
 34 | def _load(uri, **kwargs):
 35 |     '''Support keyword arguments from m3u8.load().
 36 |     Argument "hkwargs" pass on a keyword arguments dict to .http.get_response().
 37 |     '''
 38 |     http_client = kwargs.get('http_client') or HTTPClient
 39 |     hkwargs = kwargs.pop('hkwargs', None)
 40 |     if isinstance(http_client, type):
 41 |         http_client = http_client()
 42 |     if isinstance(http_client, HTTPClient) and hkwargs:
 43 |         headers = hkwargs.pop('headers', None)
 44 |         if headers:
 45 |             if 'headers' in kwargs:
 46 |                 kwargs['headers'].update(headers)
 47 |             else:
 48 |                 kwargs['headers'] = headers
 49 |         http_client.hkwargs = hkwargs
 50 |     kwargs['http_client'] = http_client
 51 |     return m3u8.load(uri, **kwargs)
 52 | 
 53 | def live_m3u8(url, **kwargs):
 54 |     '''Params: as same as _load().'''
 55 |     m = _load(url, **kwargs)
 56 |     ll = m.playlists or m.iframe_playlists
 57 |     if ll:
 58 |         m = _load(ll[0].absolute_uri, **kwargs)
 59 |     return not (m.is_endlist or m.playlist_type == 'VOD')
 60 | 
 61 | def crypto_m3u8(url, **kwargs):
 62 |     '''Params: as same as _load().'''
 63 |     m = _load(url, **kwargs)
 64 |     for k in m.keys:
 65 |         try:
 66 |             assert not k.uri.startswith('skd:'), 'Unsupported FairPlay Streaming'
 67 |         except AttributeError:
 68 |             pass
 69 |     return any(m.keys + m.session_keys)  # ignore method NONE
 70 | 
 71 | def _get_stream_info(l, name):
 72 |     return getattr(getattr(l, 'stream_info',
 73 |                            getattr(l, 'iframe_stream_info', None)),
 74 |                    name)
 75 | 
 76 | def load_m3u8_playlist(url, **kwargs):
 77 |     '''Params: as same as _load().'''
 78 | 
 79 |     def append_stream(stream_id, stream_profile, urls):
 80 |         streams[stream_id] = {
 81 |             'container': 'm3u8',
 82 |             'profile': stream_profile,
 83 |             'src': urls
 84 |         }
 85 | 
 86 |     streams = MediaStreams()
 87 |     m = _load(url, **kwargs)
 88 |     ll = m.playlists or m.iframe_playlists
 89 |     if ll:
 90 |         for l in ll:
 91 |             resolution = _get_stream_info(l, 'resolution')
 92 |             if resolution:
 93 |                 append_stream(*format_vps(*resolution), [l.absolute_uri])
 94 |             else:
 95 |                 bandwidth = str(_get_stream_info(l, 'bandwidth'))
 96 |                 append_stream(bandwidth, bandwidth, [l.absolute_uri])
 97 |     else:
 98 |         append_stream('current','current', [url])
 99 |     return streams
100 | 
101 | def load_m3u8(url, **kwargs):
102 |     '''Params: as same as _load().'''
103 | 
104 |     def load_media(l=None, m=None):
105 |         urls = []
106 |         if l:
107 |             m = _load(l.absolute_uri, **kwargs)
108 |         if m:
109 |             for seg in m.segments:
110 |                 urls.append(seg.absolute_uri)
111 |         return urls
112 | 
113 |     if live_m3u8(url, **kwargs):
114 |         live_error()
115 |     m = _load(url, **kwargs)
116 |     ll = m.playlists or m.iframe_playlists
117 |     if ll:
118 |         ll.sort(key=lambda l: _get_stream_info(l, 'bandwidth'))
119 |         l = ll[-1]
120 |         media = {e.type: e for e in getattr(l, 'media', [])}
121 |         urls = load_media(l=l)
122 |     else:
123 |         media = {}
124 |         urls = load_media(m=m)
125 |     audio = load_media(media.get('AUDIO'))
126 |     subtitle = load_media(media.get('SUBTITLES'))
127 |     if audio and urls[0] == audio[0]:
128 |         audio.clear()
129 |     return urls, audio, subtitle
130 | 


--------------------------------------------------------------------------------
/ykdl/util/match.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | 
  4 | __all__ = ['match', 'fullmatch', 'match1', 'matchm', 'matchall']
  5 | 
  6 | def _format_str(pattern, string):
  7 |     '''Format the target which will be scanned, makes the worker happy.'''
  8 |     strtype = type(pattern)
  9 |     if not isinstance(string, strtype):
 10 |         try:
 11 |             string = strtype(string, 'utf-8')
 12 |         except TypeError:
 13 |             if isinstance(string, bytearray):
 14 |                 string = bytes(string)
 15 |             else:
 16 |                 for n in ('getvalue', 'tobytes', 'read', 'encode', 'decode'):
 17 |                     f = getattr(string, n, None)
 18 |                     if f:
 19 |                         try:
 20 |                             string = f()
 21 |                             break
 22 |                         except:
 23 |                             pass
 24 |                 if not isinstance(string, (str, bytes)):
 25 |                     try:
 26 |                         if isinstance(string, int):  # defense memory burst
 27 |                             raise
 28 |                         string = strtype(string)
 29 |                     except:
 30 |                         string = str(string)
 31 |             if not isinstance(string, strtype):
 32 |                 string = strtype(string, 'utf-8')
 33 |     return string
 34 | 
 35 | def match(obj, *patterns):
 36 |     '''Scans a object for matched some patterns with capture mode (matches first).
 37 | 
 38 |     Params:
 39 |         `obj`, any object which contains string data.
 40 |         `patterns`, arbitrary number of regex patterns.
 41 | 
 42 |     Returns the first Match object, or None.
 43 |     '''
 44 |     for pattern in patterns:
 45 |         string = _format_str(pattern, obj)
 46 |         m = re.search(pattern, string)
 47 |         if m:
 48 |             return m
 49 |     return None
 50 | 
 51 | def fullmatch(obj, *patterns):
 52 |     '''Scans a object for fully matched some patterns (matches first).
 53 | 
 54 |     Params: same as match()
 55 | 
 56 |     Returns the match string, or None.
 57 |     '''
 58 |     for pattern in patterns:
 59 |         string = _format_str(pattern, obj)
 60 |         m = re.fullmatch(pattern, string)
 61 |         if m:
 62 |             return m.string
 63 |     return None
 64 | 
 65 | def match1(obj, *patterns):
 66 |     '''Scans a object for matched some patterns with capture mode.
 67 | 
 68 |     Params: same as match()
 69 | 
 70 |     Returns the first captured substring, or None.
 71 |     '''
 72 |     m = match(obj, *patterns)
 73 |     return m and m.groups()[0]
 74 | 
 75 | def matchm(obj, *patterns):
 76 |     '''Scans a object for matched some patterns with capture mode.
 77 | 
 78 |     Params: same as match()
 79 | 
 80 |     Returns all captured substrings of the first Match object, or same number of
 81 |     None objects.
 82 |     '''
 83 |     m = match(obj, *patterns)
 84 |     return m and m.groups() or (None,) * re.compile(patterns[0]).groups
 85 | 
 86 | 
 87 | def matchall(obj, *patterns):
 88 |     '''Scans a object for matched some patterns with capture mode.
 89 | 
 90 |     Params: same as match()
 91 | 
 92 |     Returns a list of all the captured substring of matches, or a empty list.
 93 |     If a conformity form of captures in the list has be excepted, all the regex
 94 |     patterns MUST include a similar capture mode.
 95 |     '''
 96 |     ret = []
 97 |     for pattern in patterns:
 98 |         string = _format_str(pattern, obj)
 99 |         m = re.findall(pattern, string)
100 |         ret += m
101 | 
102 |     return ret
103 | 


--------------------------------------------------------------------------------
/ykdl/util/xml2dict.py:
--------------------------------------------------------------------------------
  1 | '''A simple XML document parser which used builtin expat, output a dict with
  2 |  Python data type, likes json package.'''
  3 | 
  4 | from logging import getLogger
  5 | 
  6 | 
  7 | logger = getLogger(__name__)
  8 | 
  9 | _cdict = {  # special objects
 10 |      'true': True,
 11 |     'false': False,
 12 |       'NaN': float('nan'),
 13 |       'INF': float('inf'),
 14 |      '-INF': float('-inf')
 15 | }
 16 | xml_schema_instance = 'http://www.w3.org/2001/XMLSchema-instance'
 17 | 
 18 | def _convert(text):
 19 |     if text in _cdict:
 20 |         return _cdict[text]
 21 |     if text.isdecimal():
 22 |         return int(text)
 23 |     if text.count('e') == 1 or text.count('.') == 1:
 24 |         try:
 25 |             f = float(text)
 26 |         except ValueError:
 27 |             pass
 28 |         else:
 29 |             if text.count('e') and not f % 1:
 30 |                 return int(f)  # e.g. 2.2e2 => 220
 31 |             return f
 32 |     return text
 33 | 
 34 | def _get1(l):
 35 |     # unpack standalone element from list
 36 |     if isinstance(l, list) and len(l) == 1:
 37 |         o = l[0]
 38 |         if not isinstance(o, dict):  # contribute to compatibility
 39 |             return o
 40 |     return l
 41 | 
 42 | def xml2dict(source):
 43 |     '''Convert giving XML document to a dict object.'''
 44 |     from xml.parsers import expat
 45 |     # don't expand namespace, handle them ourself
 46 |     parser = expat.ParserCreate(namespace_separator=None)
 47 |     parser.buffer_text = True
 48 |     root = {'#text': []}
 49 |     xml = {  # default properties
 50 |         'version': '1.0',
 51 |         'encoding': 'UTF-8',
 52 |         'standalone': -1,
 53 |         'rootname': 'root',
 54 |         'root': root,
 55 |         }
 56 |     parent_nodes = []
 57 |     isCDATA = False
 58 | 
 59 |     def default(data):
 60 |         if data.strip():
 61 |             logger.debug('Unhandled XML data: %r', data)
 62 | 
 63 |     def startXML(version, encoding, standalone):
 64 |         xml['version'] = version
 65 |         xml['encoding'] = encoding
 66 |         xml['standalone'] = standalone
 67 | 
 68 |     def getNSPrefix(ns):
 69 |         nodes = parent_nodes.copy()
 70 |         while nodes:
 71 |             xmlns = nodes.pop().get('@xmlns')
 72 |             if ns in xmlns:
 73 |                 return xmlns[ns]
 74 | 
 75 |     def sortAttributes(attributes):
 76 |         if not attributes:
 77 |             return {}
 78 |         xmlns = {}
 79 |         attrs = {}
 80 |         for k, v in attributes.items():
 81 |             ks = k.split(':', 1)
 82 |             if ks[0] == 'xmlns':
 83 |                 if len(ks) == 2 :
 84 |                     k = ks[1]
 85 |                     assert k, 'Missing namespace declaration prefix!'
 86 |                 else:
 87 |                     k = ''
 88 |                 xmlns[k] = v
 89 |             else:
 90 |                 attrs['@' + k] = _convert(v)
 91 |         if xmlns:
 92 |             attrs['@xmlns'] = xmlns
 93 |         return attrs
 94 | 
 95 |     def startRoot(name, attributes):
 96 |         xml['rootname'] = name
 97 |         parent_nodes.append(root)
 98 |         if attributes:
 99 |             root.update(sortAttributes(attributes))
100 | 
101 |     def startElement(name, attributes):
102 |         if not parent_nodes:
103 |             return startRoot(name, attributes)
104 |         node = sortAttributes(attributes)
105 |         node['#text'] = []
106 |         parent_node = parent_nodes[-1]
107 |         if name not in parent_node:
108 |             parent_node[name] = []
109 |         parent_node[name].append(node)
110 |         parent_nodes.append(node)
111 | 
112 |     def startCDATA():
113 |         nonlocal isCDATA
114 |         isCDATA = True
115 | 
116 |     def endCDATA():  # void handle to skip default
117 |         pass
118 | 
119 |     def characters(data):
120 |         data = data.strip()
121 |         if data:
122 |             parent_nodes[-1]['#text'].append(data)
123 | 
124 |     def endElement(name):
125 | 
126 |         def replaceNode(data):
127 |             parent_node = parent_nodes[-1][name]
128 |             assert parent_node.pop() is node, 'Unkown error during endElement()'
129 |             parent_node.append(data)
130 | 
131 |         nonlocal isCDATA
132 |         node = parent_nodes.pop()
133 |         if node.get('@xsi:nil') in (True, 1) and \
134 |                                     getNSPrefix('xsi') == xml_schema_instance:
135 |             replaceNode(None)
136 |         else:
137 |             text = node.pop('#text')
138 |             if node:
139 |                 node.update({k: _get1(v) for k, v in node.items()})
140 |             if text:
141 |                 name = parent_nodes and name or None
142 |                 if name and not node and len(text) == 1 and not isCDATA:
143 |                     data = _convert(text[0])  # no attributes & sub-elements
144 |                 else:
145 |                     data = '\n'.join(text)  # prefer a string than a list
146 |                 if name and not node:
147 |                     replaceNode(data)  # no attributes & sub-elements
148 |                 else:
149 |                     node['#text'] = data
150 |             elif not node:
151 |                 replaceNode('')  # placeholder use to keep the structure
152 |         isCDATA = False  # ends here, not CDATA's end
153 | 
154 |     parser.DefaultHandler = default
155 |     parser.XmlDeclHandler = startXML
156 |     parser.StartElementHandler = startElement
157 |     parser.EndElementHandler = endElement
158 |     parser.StartCdataSectionHandler = startCDATA
159 |     parser.EndCdataSectionHandler = endCDATA
160 |     parser.CharacterDataHandler = characters
161 | 
162 |     if isinstance(source, str):
163 |         parser.Parse(source, True)
164 |     elif hasattr(source, 'read'):
165 |         parser.ParseFile(source)
166 |     else:
167 |         for s in source:
168 |             parser.Parse(s, False)
169 |         parser.Parse(type(s)(), True)
170 |     return xml
171 | 


--------------------------------------------------------------------------------
/ykdl/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.8.3.dev'
2 | 


--------------------------------------------------------------------------------