├── .github └── ISSUE_TEMPLATE.md ├── .gitignore ├── CHANGELOG.rst ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.rst ├── cykdl ├── __init__.py └── __main__.py ├── live.mk ├── pyproject.toml ├── setup.cfg ├── setup.py ├── video.mk ├── webykdl ├── README.rst ├── dbpc ├── dbusplayer.py ├── playthread.py └── webykdl.py └── ykdl ├── __init__.py ├── common.py ├── compact.py ├── extractor.py ├── extractors ├── __init__.py ├── _byted.py ├── _byted_X-Bogus.js ├── _byted_acrawler.js ├── _common.py ├── acfun │ ├── __init__.py │ ├── acbase.py │ ├── bangumi.py │ ├── live.py │ └── video.py ├── baomihua.py ├── bilibili │ ├── __init__.py │ ├── bangumi.py │ ├── bilibase.py │ ├── idconvertor.py │ ├── live.py │ ├── util.py │ ├── vc.py │ └── video.py ├── cctv.py ├── douban │ ├── __init__.py │ ├── movie.py │ └── music.py ├── douyin │ ├── __init__.py │ ├── live.py │ └── video.py ├── douyu │ ├── __init__.py │ ├── crypto-js-md5.min.js │ ├── live.py │ ├── util.py │ └── video.py ├── fun.py ├── generalembed.py ├── generalsimple.py ├── heibaizhibo.m.js ├── heibaizhibo.py ├── huajiao │ ├── __init__.py │ ├── live.py │ └── video.py ├── huya │ ├── __init__.py │ ├── live.py │ └── video.py ├── ifeng │ ├── __init__.py │ ├── gongkaike.py │ ├── news.py │ └── video.py ├── iqilu.py ├── iqiyi │ ├── __init__.py │ ├── cmd5x.js │ ├── cmd5x_iqiyi3.js │ ├── live.py │ ├── util.py │ └── video.py ├── ixigua.py ├── joy.py ├── kankanews.py ├── ku6.py ├── kuwo.py ├── laifeng.py ├── le.py ├── lizhi.py ├── longzhu.py ├── mgtv.py ├── miaopai.py ├── netease │ ├── __init__.py │ ├── live.py │ ├── livecc.py │ ├── m3g.py │ ├── music │ │ ├── __init__.py │ │ ├── music.py │ │ ├── musicbase.py │ │ ├── mv.py │ │ └── program.py │ ├── openc.py │ └── video.py ├── pps.py ├── pptv.py ├── qq │ ├── __init__.py │ ├── egame.py │ ├── live.py │ └── video.py ├── sina │ ├── __init__.py │ ├── embed.py │ ├── openc.py │ └── video.py ├── singlemultimedia.py ├── sohu │ ├── __init__.py │ ├── my.py │ ├── sohubase.py │ └── tv.py ├── tudou.py ├── weibo.py ├── yinyuetai.py ├── yizhibo.py ├── youku.py ├── youkujs.py ├── zhangyu.py ├── zhanqi.py └── zhuafan.py ├── mediainfo.py ├── util ├── __init__.py ├── download.py ├── external.py ├── fs.py ├── http.py ├── human.py ├── kt_player.py ├── lazy.py ├── log.py ├── m3u8.py ├── match.py ├── rangefetch_server.py ├── wrap.py └── xml2dict.py └── version.py /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 在提交前,请确保您已经检查了以下内容! 2 | 3 | - [ ] 你的问题无关 VIP 内容。**YKDL** 不会主动支持 VIP 内容,相关问题一概不予受理。 4 | - [ ] 你确实可以在浏览器或移动端中正常播放,但不能使用 **YKDL** 下载。 5 | - [ ] 你的 **YKDL** 为最新发布版本,或开发版本。 6 | - [ ] 你已经阅读并按 https://github.com/SeaHOH/ykdl/wiki 中的指引进行了操作。 7 | - [ ] 你的问题没有在 https://github.com/SeaHOH/ykdl/issues 报告,否则请在原有 issue 下报告。 8 | - [ ] 本问题确实关于 **YKDL** 或其依赖,而不是其他项目。 9 | 10 | 请将错误输出粘贴在下面: 11 | 12 | ``` 13 | [在这里粘贴完整日志] 14 | ``` 15 | 16 | 如果你有其他附言,例如问题只在某个视频发生,或者是一般性讨论或者提出新功能,请在下面添加: 17 | 18 | [在这里输入内容文本] 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build/ 2 | /dist/ 3 | /MANIFEST 4 | dist* 5 | *.egg-info/ 6 | *.py[cod] 7 | 8 | _*/ 9 | *_ 10 | 11 | *.bak 12 | *.download 13 | *.cmt.* 14 | *.3gp 15 | *.asf 16 | *.flv 17 | *.f4v 18 | *.lrc 19 | *.mkv 20 | *.mp3 21 | *.mp4 22 | *.mpg 23 | *.aac 24 | *.ts 25 | *.webm 26 | README.html 27 | 28 | *.DS_Store 29 | *.swp 30 | *~ 31 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | To contributers 2 | 3 | 1, free to approve/merge any extractors pull requests, including new sites support. 4 | 5 | 2, free to git push any changes to extractors without my approve, including new sites support. 6 | 7 | 3, any changes to core, need submit pull requests and approve/merge by me. 8 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | ============================================== 2 | This is a copy of the MIT license. 3 | ============================================== 4 | Copyright (C) 2012 - 2016 Mort Yao 5 | Copyright (C) 2012 Boyu Guo 6 | Copyright (C) 2015 - 2021 Zhang Ning 7 | Copyright (C) 2017 - 2022 SeaHOH 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy of 10 | this software and associated documentation files (the "Software"), to deal in 11 | the Software without restriction, including without limitation the rights to 12 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 13 | of the Software, and to permit persons to whom the Software is furnished to do 14 | so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include README.rst 3 | include CHANGELOG.rst 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include video.mk 2 | include live.mk 3 | 4 | test: 5 | 6 | test_video: test_video1 test_video2 test_video3 7 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | YouKuDownLoader 2 | =============== 3 | 4 | .. image:: https://img.shields.io/pypi/v/ykdl.svg 5 | :target: https://pypi.python.org/pypi/ykdl 6 | 7 | 8 | A video downloader focus on China mainland video sites. 9 | 10 | Origin website: https://github.com/zhangn1985/ykdl 11 | 12 | Later migrated to a new website: https://github.com/SeaHOH/ykdl 13 | 14 | Today it migrated to my repositories, and this is the new website: https://github.com/LifeActor/ykdl 15 | 16 | This project is a fork of 17 | `you-get `_ with below changes. 18 | 19 | - Structured source code. 20 | - Focus on China mainland video sites. 21 | - Dropped supports of Python 3.4 and below 22 | (see `#487 `_). 23 | 24 | Simple installation guide 25 | ------------------------- 26 | 27 | There are some useful software package managers. 28 | 29 | - **macOS/Linux**: `Homebrew `_ 30 | - **Debian/Linux**: APT 31 | - **Windows**: `Chocolatey `_ 32 | 33 | Step: 34 | 0. Dependencies 35 | 36 | | `FFmpeg `_, for merge media files. 37 | | `mpv `_, default media player (optimal compatibility). 38 | 39 | #. `Python 3 `_ 40 | 41 | #. pip and setuptools, make sure they are updated. 42 | 43 | .. code-block:: console 44 | 45 | python3 -m ensurepip 46 | python3 -m pip install pip --upgrade 47 | python3 -m pip install setuptools --upgrade 48 | 49 | #. ykdl from PyPI or GitHub 50 | 51 | .. code-block:: console 52 | 53 | pip3 install ykdl --upgrade 54 | 55 | .. code-block:: console 56 | 57 | pip3 install https://github.com/SeaHOH/ykdl/archive/master.zip --force-reinstall --no-deps 58 | pip3 install https://github.com/SeaHOH/ykdl/archive/master.zip --upgrade 59 | 60 | #. Make sure those folders are in your **PATH**, if they are not, add them. 61 | 62 | | **Windows**: folders of ffmpeg.exe, mpv.exe, and python.exe, 63 | and folder "<**PYTHONHOME**>\\Scripts" 64 | | **others**: "~/.local/bin" or "/usr/local/bin" 65 | 66 | Site status 67 | ----------- 68 | 69 | Please check wiki page: 70 | `sites-status `_ 71 | 72 | Bugs report, features require, and pull requests are welcome. 73 | 74 | Project Ownership Transition 75 | ---------------------------- 76 | 77 | I am pleased to announce that I have taken over the ownership of the YouKuDownLoader project from @SeaHOH today. This transition marks the beginning of an exciting new chapter for the project, and I am committed to ensuring its continued development and success. 78 | 79 | Future Plans 80 | ---------------------------- 81 | 82 | - **Enhanced Features**: I plan to introduce new features that will improve the user experience and expand the capabilities of the downloader. 83 | - **Regular Updates**: Regular updates will be pushed to ensure compatibility with the latest video sites and technologies. 84 | - **Community Engagement**: I will actively engage with the community to gather feedback and suggestions for future enhancements. 85 | - **Bug Fixes**: Addressing existing bugs and issues to ensure a smooth and reliable user experience. 86 | 87 | Your support and contributions are invaluable to the project's success. Together, we can make YouKuDownLoader an even more powerful tool for downloading videos from China mainland video sites. 88 | 89 | Thank you for your continued support! 90 | 91 | --- 92 | 93 | *Note: For any inquiries or suggestions, please feel free to open an issue or reach out directly.* 94 | -------------------------------------------------------------------------------- /cykdl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LifeActor/ykdl/44cba0d088787a4c9aed62bba1ff84c7e4066c15/cykdl/__init__.py -------------------------------------------------------------------------------- /live.mk: -------------------------------------------------------------------------------- 1 | test_live: test_bobo test_douyu test_huomao test_longzhu test_panda test_zhanqi test_bililive test_huyalive test_lelive test_cc test_qqlive 2 | 3 | PYTHON ?= python3 4 | 5 | test_bobo: 6 | ${PYTHON} -m cykdl -i http://www.bobo.com/10003822?f=pHome.Hot_anchor.1 7 | 8 | test_douyu: 9 | ${PYTHON} -m cykdl -i http://www.douyu.com/58428 10 | 11 | test_huomao: 12 | ${PYTHON} -m cykdl -i http://www.huomaotv.cn/live/845 13 | 14 | test_longzhu: 15 | ${PYTHON} -m cykdl -i http://star.longzhu.com/133097?from=challcontent 16 | 17 | test_panda: 18 | ${PYTHON} -m cykdl -i http://www.panda.tv/60995 19 | 20 | test_zhanqi: 21 | ${PYTHON} -m cykdl -i https://www.zhanqi.tv/naigege 22 | 23 | test_bililive: 24 | ${PYTHON} -m cykdl -i http://live.bilibili.com/3 25 | 26 | test_huyalive: 27 | ${PYTHON} -m cykdl -i http://www.huya.com/lengsimo 28 | 29 | test_lelive: 30 | ${PYTHON} -m cykdl -i http://live.le.com/lunbo/play/index.shtml?channel=224 31 | 32 | test_cc: 33 | ${PYTHON} -m cykdl -i http://cc.163.com/30348786/ 34 | 35 | test_qqlive: 36 | ${PYTHON} -m cykdl -i http://live.qq.com/10001075 37 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | # These are the assumed default build requirements from pip: 3 | # https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support 4 | requires = ["setuptools>=43.0.0", "wheel"] 5 | build-backend = "setuptools.build_meta" 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [build] 2 | force = 0 3 | 4 | [global] 5 | verbose = 0 6 | 7 | [egg_info] 8 | tag_build = 9 | tag_date = 0 10 | tag_svn_revision = 0 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from setuptools import setup, find_packages 4 | import os 5 | import re 6 | 7 | 8 | def read_file(*paths): 9 | with open(os.path.join(here, *paths), 'r', encoding='utf-8') as fp: 10 | return fp.read() 11 | 12 | def get_version(): 13 | content = read_file('ykdl', 'version.py') 14 | version_match = re.search('^__version__ = [\'"]([^\'"]+)', content, re.M) 15 | if version_match: 16 | return version_match.group(1) 17 | raise RuntimeError('Unable to find version string.') 18 | 19 | # memo: pycryptodome is not being used now 20 | REQ = [ 21 | 'm3u8>=1.0.0', 22 | 'jsengine>=1.0.5', 23 | "colorama;os_name=='nt'", 24 | ] 25 | EXT = { 26 | 'proxy': ['ExtProxy'], 27 | 'br': ['BrotliCFFI'], 28 | } 29 | EXT['net'] = sum(EXT.values(), []) 30 | EXT['js'] = ['quickjs'] 31 | EXT['all'] = list(set(sum((EXT.values()), []))) 32 | 33 | here = os.path.abspath(os.path.dirname(__file__)) 34 | LONGS = '\n\n'.join(( 35 | read_file('README.rst'), 36 | *read_file('CHANGELOG.rst').split('\n\n\n')[:4], 37 | '`See full change log ' 38 | '`_.\n' 39 | )) 40 | 41 | 42 | setup( 43 | name = 'ykdl', 44 | version = get_version(), 45 | author = 'Zhang Ning', 46 | author_email = 'zhangn1985@gmail.com', 47 | maintainer = 'SeaHOH', 48 | maintainer_email = 'seahoh@gmail.com', 49 | url = 'https://github.com/SeaHOH/ykdl', 50 | license = 'MIT', 51 | description = 'a video downloader written in Python', 52 | long_description = LONGS, 53 | keywords = 'video download youku acfun bilibili', 54 | packages = find_packages(here), 55 | install_requires = REQ, 56 | extras_require = EXT, 57 | platforms = 'any', 58 | zip_safe = True, 59 | package_data = { 60 | 'ykdl': ['extractors/*.js', 'extractors/*/*.js'], 61 | }, 62 | python_requires = '>=3.5', 63 | 64 | classifiers = [ 65 | 'Development Status :: 4 - Beta', 66 | 'Environment :: Console', 67 | 'Intended Audience :: Developers', 68 | 'Intended Audience :: End Users/Desktop', 69 | 'License :: OSI Approved :: MIT License', 70 | 'Operating System :: OS Independent', 71 | 'Programming Language :: Python', 72 | 'Programming Language :: Python :: 3', 73 | 'Programming Language :: Python :: 3.5', 74 | 'Programming Language :: Python :: 3.6', 75 | 'Programming Language :: Python :: 3.7', 76 | 'Programming Language :: Python :: 3.8', 77 | 'Programming Language :: Python :: 3.9', 78 | 'Programming Language :: Python :: 3.10', 79 | 'Topic :: Internet', 80 | 'Topic :: Internet :: WWW/HTTP', 81 | 'Topic :: Multimedia', 82 | 'Topic :: Multimedia :: Sound/Audio', 83 | 'Topic :: Multimedia :: Video', 84 | 'Topic :: Utilities' 85 | ], 86 | entry_points = { 87 | 'console_scripts': ['ykdl=cykdl.__main__:main'] 88 | }, 89 | ) 90 | -------------------------------------------------------------------------------- /video.mk: -------------------------------------------------------------------------------- 1 | test_video1: test_youku test_acfun test_bilibili test_ifeng test_163m test_sohutv test_cctv test_tudou 2 | test_video2: test_iqilu test_iqiyi test_joy test_ku6 test_kuwo test_sina test_qq test_sohumy test_baomihua 3 | test_video3: test_xiami test_yinyuetai test_baidu test_douban test_huya test_163v test_le test_mgtv 4 | 5 | PYTHON ?= python3 6 | 7 | test_youku: 8 | ${PYTHON} -m cykdl -i http://v.youku.com/v_show/id_XMTYwMDIxNDI2MA==.html 9 | 10 | test_acfun: 11 | ${PYTHON} -m cykdl -i http://www.acfun.cn/v/ac213736 12 | 13 | test_bilibili: 14 | ${PYTHON} -m cykdl -i http://bangumi.bilibili.com/anime/2539/play#63470 15 | 16 | test_baomihua: 17 | ${PYTHON} -m cykdl -i http://www.baomihua.com/user/24204_36300935 18 | 19 | test_cctv: 20 | ${PYTHON} -m cykdl -i http://tv.cctv.com/2016/06/08/VIDEa0Y5V5HY9MLeoVM5tcQC160608.shtml -t 300 21 | 22 | test_ifeng: 23 | ${PYTHON} -m cykdl -i http://v.ifeng.com/video_8632601.shtml 24 | 25 | test_iqilu: 26 | ${PYTHON} -m cykdl -i http://v.iqilu.com/shpd/rmxf/2016/0607/4332820.html 27 | 28 | test_iqiyi: 29 | ${PYTHON} -m cykdl -i http://www.iqiyi.com/v_19rrle48gg.html 30 | 31 | test_joy: 32 | ${PYTHON} -m cykdl -i http://www.joy.cn/video?resourceId=60239051 33 | 34 | test_ku6: 35 | ${PYTHON} -m cykdl -i http://www.ku6.com/video/detail?id=lfx8PD61clQ0knUJQad1R4Mbu2w 36 | 37 | test_kuwo: 38 | ${PYTHON} -m cykdl -i http://www.kuwo.cn/yinyue/7119332?catalog=yueku2016 39 | 40 | test_lizhi: 41 | ${PYTHON} -m cykdl -i http://www.lizhi.fm/202840/29101368624039686 42 | 43 | test_sina: 44 | ${PYTHON} -m cykdl -i 'http://video.sina.com.cn/ent/#250623748' -t 300 45 | 46 | test_xiami: 47 | ${PYTHON} -m cykdl -li http://www.xiami.com/album/2100285370?spm=a1z1s.3057849.0.0.hAuVwv 48 | 49 | test_yinyuetai: 50 | ${PYTHON} -m cykdl -i http://v.yinyuetai.com/video/2832181?f=SY-MKDT-MVSB-1 51 | 52 | test_baidu: 53 | ${PYTHON} -m cykdl -li http://music.baidu.com/album/266327865?pst=shoufa 54 | 55 | test_douban: 56 | ${PYTHON} -m cykdl -li https://music.douban.com/artists/player/?sid=660498,647629,647625,633870,622482,600594,589516,588385,583322,580114,576350 57 | 58 | test_huya: 59 | ${PYTHON} -m cykdl -i http://v.huya.com/play/2209082.html 60 | 61 | test_le: 62 | ${PYTHON} -m cykdl -i http://www.le.com/ptv/vplay/26859747.html -t 300 63 | 64 | test_163v: 65 | ${PYTHON} -m cykdl -i http://v.163.com/paike/VBI038VCL/VBNERA654.html 66 | 67 | test_163m: 68 | ${PYTHON} -m cykdl -li http://music.163.com/playlist?id=396542983 69 | 70 | test_qq: 71 | ${PYTHON} -m cykdl -i http://v.qq.com/cover/q/qsm7nxzwbnzc4dp.html?vid=m0305m0ur33 72 | 73 | test_sohutv: 74 | ${PYTHON} -m cykdl -i http://tv.sohu.com/20160607/n453456746.shtml 75 | 76 | test_sohumy: 77 | ${PYTHON} -m cykdl -i http://my.tv.sohu.com/pl/9090402/84077110.shtml 78 | 79 | test_mgtv: 80 | ${PYTHON} -m cykdl -i http://www.mgtv.com/v/2/293140/c/3269011.html 81 | 82 | test_tudou: 83 | ${PYTHON} -m cykdl -i http://video.tudou.com/v/XMjc2MTg1MzIzNg==.html 84 | -------------------------------------------------------------------------------- /webykdl/README.rst: -------------------------------------------------------------------------------- 1 | WebYKDL 2 | =============== 3 | a Flask web interface for YKDL 4 | 5 | playerthread: a threaded mpv player class 6 | 7 | dbusplayer: a dbus interface for playerthread 8 | 9 | dbpc: cmdline interface for dbusplayer 10 | 11 | webykdl: flask web interface for ykdl 12 | -------------------------------------------------------------------------------- /webykdl/dbpc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys, json, os.path 5 | from pydbus import SessionBus 6 | bus = SessionBus() 7 | 8 | player = bus.get("github.zhangn1985.dbplay") 9 | 10 | if len(sys.argv) == 1: 11 | print(sys.argv[0] + " URI") 12 | 13 | for u in sys.argv[1:]: 14 | i = json.dumps({"urls": [os.path.abspath(u)]}) 15 | player.play(i) 16 | -------------------------------------------------------------------------------- /webykdl/dbusplayer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from gi.repository import GLib 5 | from pydbus import SessionBus 6 | import json 7 | 8 | from playthread import Mpvplayer 9 | 10 | loop = GLib.MainLoop() 11 | player = Mpvplayer() 12 | player.start() 13 | 14 | class DBUSPlayerService(object): 15 | ''' 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | ''' 26 | 27 | def play(self, playinfo): 28 | player.play(playinfo) 29 | def stop(self): 30 | player.stop() 31 | def exit(self): 32 | player.exit() 33 | loop.quit() 34 | 35 | bus = SessionBus() 36 | bus.publish('github.zhangn1985.dbplay', DBUSPlayerService()) 37 | loop.run() 38 | -------------------------------------------------------------------------------- /webykdl/playthread.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from threading import Thread 4 | from time import sleep 5 | import json 6 | 7 | from ykdl.util.external import launch_player 8 | 9 | 10 | class Mpvplayer(Thread): 11 | def __init__(self): 12 | Thread.__init__(self) 13 | self.playlist = [] 14 | self.name = 'mpv playback thread' 15 | self.handle = None 16 | self.__exit__ = False 17 | 18 | def play(self, obj): 19 | self.playlist.append(obj) 20 | return 0 21 | 22 | def stop(self): 23 | if self.handle: 24 | self.handle.terminate() 25 | 26 | def exit(self): 27 | self.__exit__ = True 28 | self.stop() 29 | 30 | def run(self): 31 | while not self.__exit__: 32 | if len(self.playlist) == 0: 33 | sleep(10) 34 | continue 35 | o = self.playlist[0] 36 | self.playlist.remove(o) 37 | obj = json.loads(o) 38 | if not 'args' in obj: 39 | obj['args'] = {'ua':'', 'header':'', 'title':'', 'referer':''} 40 | obj['play'] = False 41 | self.handle = launch_player(obj['urls'], obj['ext'], **obj['args']) 42 | self.handle.wait() 43 | -------------------------------------------------------------------------------- /webykdl/webykdl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from flask import Flask 5 | from flask import request 6 | app = Flask(__name__) 7 | 8 | from pydbus import SessionBus 9 | bus = SessionBus() 10 | try: 11 | player = bus.get('github.zhangn1985.dbplay') 12 | except: 13 | from playthread import Mpvplayer 14 | player = Mpvplayer() 15 | player.start() 16 | 17 | import json 18 | import types 19 | 20 | from ykdl.common import url_to_module 21 | 22 | 23 | def handle_videoinfo(info): 24 | player_args = info.extra 25 | player_args['title'] = info.title 26 | stream = info.streams[info.stream_types[0]] 27 | video = json.dumps({ 28 | 'urls': stream['src'], 29 | 'ext': stream['container'], 30 | 'args': player_args}) 31 | player.play(video) 32 | 33 | @app.route('/play', methods=['POST', 'GET']) 34 | def play(): 35 | if request.method == 'POST': 36 | url = request.form['url'] 37 | try: 38 | islist = request.form['list'] 39 | islist = islist == 'True' 40 | except: 41 | islist = False 42 | m,u = url_to_module(url) 43 | if not islist: 44 | parser = m.parser 45 | else: 46 | parser = m.parser_list 47 | try: 48 | info = parser(u) 49 | except AssertionError as e: 50 | return str(e) 51 | if type(info) is types.GeneratorType or type(info) is list: 52 | for i in info: 53 | handle_videoinfo(i) 54 | else: 55 | handle_videoinfo(info) 56 | return 'OK' 57 | else: 58 | return 'curl --data-urlencode "url=" http://IP:5000/play' 59 | 60 | @app.route('/stop') 61 | def stop(): 62 | player.stop() 63 | return 'OK' 64 | 65 | @app.route('/') 66 | def index(): 67 | return ''' 68 | 69 | 70 | 71 | 72 | Web YKDL 73 | 74 | 75 | 76 |
77 | 输入视频网址: 78 | 播放列表? 79 | 80 |
81 |
82 | 83 |
84 | 85 | 86 | 87 | ''' 88 | 89 | if __name__ == '__main__': 90 | app.run(host='0.0.0.0') 91 | -------------------------------------------------------------------------------- /ykdl/__init__.py: -------------------------------------------------------------------------------- 1 | from . import compact 2 | del compact 3 | -------------------------------------------------------------------------------- /ykdl/common.py: -------------------------------------------------------------------------------- 1 | import re 2 | import logging 3 | from importlib import import_module 4 | 5 | from .util.http import get_head_response 6 | from .util.wrap import reverse_list_dict 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | # TODO: add support to find module via mid@site[.type] 12 | 13 | alias = reverse_list_dict({ 14 | 'acfun' : ['aixifan'], 15 | 'cctv' : ['cntv'], 16 | 'douyin' : ['amemv', 'iesdouyin'], 17 | 'douyu' : ['douyutv'], 18 | 'netease' : ['163'], 19 | 'qq' : ['wetv'], 20 | 'sina' : ['iask'], 21 | 'weibo' : ['weibocdn'], 22 | }) 23 | exclude_list = {'com', 'net', 'org'} 24 | 25 | def url_to_module(url): 26 | redirection = False 27 | while True: 28 | if not url.startswith('http'): 29 | logger.warning('> url not starts with http(s) ' + url) 30 | logger.warning('> assume http connection!') 31 | url = 'http://' + url 32 | url_infos = re.match('''(?x) 33 | https?:// 34 | ( # catch host 35 | [\-\w\.]*? # ignore 36 | (?:([\-\w]+)\.)? # try catch 3rd domain 37 | ([\-\w]+)\. # catch 2nd domain 38 | [\-\w]+ # top domain 39 | ) 40 | (?::\d+)? # allow port 41 | (?=/|$) # allow empty path 42 | (?: 43 | / # path start 44 | .+? # path & main name 45 | (?:\.(\w+))? # try catch extension name 46 | (?:\?|\#|&|$) # path end, '&' is used to ignore wrong query 47 | )? 48 | ''', url) 49 | assert url_infos, 'wrong URL string!' 50 | host, dm3, dm2, ext = url_infos.groups() 51 | logger.debug('host> ' + host) 52 | 53 | short_name = dm2 in exclude_list and dm3 or dm2 54 | if short_name in alias.keys(): 55 | short_name = alias[short_name] 56 | logger.debug('short_name> ' + short_name) 57 | 58 | try: 59 | m = import_module('.'.join(['ykdl','extractors', short_name])) 60 | if hasattr(m, 'get_extractor'): 61 | site, url = m.get_extractor(url) 62 | else: 63 | site = m.site 64 | return site, url 65 | 66 | except ImportError as e: 67 | logger.debug('Import Error: %s', e) 68 | 69 | from .extractors import singlemultimedia 70 | 71 | if ext in singlemultimedia.extNames: 72 | logger.debug('> the extension name %r match multimedia types', ext) 73 | logger.debug('> Go SingleMultimedia') 74 | return singlemultimedia.site, url 75 | 76 | if not redirection: 77 | logger.debug('> Try HTTP Redirection!') 78 | response = get_head_response(url, max_redirections=None) 79 | 80 | if response.url == url: 81 | if not redirection: 82 | logger.debug('> NO HTTP Redirection') 83 | if response.headers.get('Content-Type', '').startswith('text/'): 84 | logger.debug('> Try GeneralSimple') 85 | from ykdl.extractors.generalsimple import site 86 | site = site.get_proxy('parser_list', url) 87 | if site: 88 | return site, url 89 | logger.debug('> Try GeneralEmbed') 90 | return import_module('ykdl.extractors.generalembed').site, url 91 | else: 92 | logger.debug('> Try SingleMultimedia') 93 | return singlemultimedia.site, url 94 | 95 | logger.info('> New url: ' + response.url) 96 | url = response.url 97 | redirection = True 98 | -------------------------------------------------------------------------------- /ykdl/compact.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import io 4 | import socket 5 | import random 6 | import inspect 7 | import logging 8 | import tempfile 9 | import builtins 10 | 11 | from .util.log import ColorHandler 12 | 13 | 14 | logging.basicConfig(handlers=[ColorHandler()]) 15 | 16 | 17 | builtins.Infinity = float('inf') 18 | 19 | 20 | if sys.version_info > (3, 13): 21 | import warnings 22 | warnings.filterwarnings('ignore', 23 | 'invalid escape sequence', 24 | SyntaxWarning, append=True) 25 | 26 | 27 | if sys.version_info < (3, 10): 28 | import types 29 | types.NoneType = type(None) 30 | 31 | 32 | def bound_monkey_patch(orig, new): 33 | '''Monkey patch the original function with new, and bind the original 34 | function as its first argument, at end clear the new function from the 35 | module which it defined with. 36 | ''' 37 | if hasattr(orig, 'orig'): 38 | raise ValueError( 39 | 'Monkey patched function can not be patched twice, please use ' 40 | 'the attribute `orig` to get original function and patch it.') 41 | f = sys._getframe() 42 | module = f.f_globals['__name__'] 43 | co_name = f.f_code.co_name 44 | argspec = str(inspect.signature(orig)) 45 | marks = '*' * 76 46 | doc = new.__doc__ or '' 47 | doc += ''' 48 | {marks} 49 | {orig.__name__}.orig{argspec} 50 | 51 | This is a bound monkey patched function via use '{module}.{co_name}', 52 | {orig.__name__}.orig is the original. 53 | ''' 54 | if orig.__doc__: 55 | doc += '''{marks} 56 | 57 | {orig.__doc__} 58 | ''' 59 | new.__doc__ = doc.format(**vars()) 60 | new.orig = orig 61 | new = new.__get__(orig, type(new)) # bind original as the first argument 62 | orig.__globals__[orig.__name__] = new 63 | del new.__globals__[new.__name__] 64 | 65 | 66 | if os.name == 'nt': 67 | 68 | # Re-encoding Windows cmd shell output, py35 and below 69 | 70 | if sys.version_info < (3, 6): 71 | sys.stderr = io.TextIOWrapper(sys.stderr.detach(), 72 | encoding=sys.stderr.encoding, 73 | errors='ignore', 74 | line_buffering=True) 75 | sys.stdout = io.TextIOWrapper(sys.stdout.detach(), 76 | encoding=sys.stdout.encoding, 77 | errors='ignore', 78 | line_buffering=True) 79 | 80 | 81 | # Implements as general method instead of Windows primitive delete-on-close 82 | # which would lock the temporary files 83 | 84 | class _TemporaryFileCloser: 85 | # codes were copied from tempfile._TemporaryFileCloser 86 | def close(self, unlink=os.unlink): 87 | if not self.close_called and self.file is not None: 88 | self.close_called = True 89 | try: 90 | self.file.close() 91 | finally: 92 | if self.delete: 93 | unlink(self.name) 94 | def __del__(self): 95 | self.close() 96 | 97 | def NamedTemporaryFile(orig, 98 | mode='w+b', buffering=-1, encoding=None, newline=None, 99 | suffix=None, prefix='tmp', dir=None, delete=True, 100 | *, errors=None): 101 | '''Windows delete-on-close flag will not be used, a closer is use to 102 | close the temporary file, so it can be opened as shared. 103 | ''' 104 | kwargs = vars() 105 | del kwargs['orig'] 106 | kwargs['delete'] = False # skip setting os.O_TEMPORARY in the flags 107 | if sys.version_info < (3, 8): 108 | del kwargs['errors'] 109 | tempfile = orig(**kwargs) 110 | # at here setting whether is deleted on close 111 | tempfile._closer.delete = tempfile.delete = delete 112 | return tempfile 113 | 114 | tempfile._TemporaryFileCloser.close = _TemporaryFileCloser.close 115 | tempfile._TemporaryFileCloser.__del__ = _TemporaryFileCloser.__del__ 116 | del _TemporaryFileCloser 117 | bound_monkey_patch(tempfile.NamedTemporaryFile, NamedTemporaryFile) 118 | 119 | 120 | # Shuffles getaddrinfo() result, that helps multi-connect to servers 121 | 122 | def getaddrinfo(orig, *args, **kwargs): 123 | '''Shuffles the orig result.''' 124 | addrlist = orig(*args, **kwargs) 125 | random.shuffle(addrlist) 126 | return addrlist 127 | 128 | bound_monkey_patch(socket.getaddrinfo, getaddrinfo) 129 | 130 | 131 | #compact_dev_null = open(os.devnull, 'w') 132 | -------------------------------------------------------------------------------- /ykdl/extractors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LifeActor/ykdl/44cba0d088787a4c9aed62bba1ff84c7e4066c15/ykdl/extractors/__init__.py -------------------------------------------------------------------------------- /ykdl/extractors/_byted.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | # result length is incorrect 7 | # 8 | #js_dom = ''' 9 | #var window = this, 10 | # document = {{referrer: 'http://www.douyin.com/'}}, 11 | # location = {{href: '{url}', protocol: 'https'}}, 12 | # navigator = {{userAgent: '{ua}'}}; 13 | #''' 14 | #js_acrawler = None 15 | # 16 | #def get_acrawler_signer(url): 17 | # assert JSEngine, "No JS Interpreter found, can't load byted acrawler!" 18 | # global js_acrawler 19 | # if js_acrawler is None: 20 | # js_acrawler = get_pkgdata_str(__name__, '_byted_acrawler.js', 21 | # 'https://lf3-cdn-tos.bytescm.com/obj/rc-web-sdk/acrawler.js') 22 | # 23 | # js_ctx = JSEngine(js_dom.format(url=url, ua=fake_headers['User-Agent'])) 24 | # js_ctx.append(js_acrawler) 25 | # 26 | # def sign(*args): 27 | # return js_ctx.call('byted_acrawler.sign', *args) 28 | # 29 | # return sign 30 | # 31 | #def get_acrawler_cookies(url): 32 | # assert JSEngine, "No JS Interpreter found, can't load byted acrawler!" 33 | # install_cookie() 34 | # __ac_nonce = get_random_id(21) 35 | # _cookies['signed'] = cookies = { 36 | # '__ac_nonce': __ac_nonce, 37 | # '__ac_signature': get_acrawler_signer(url)('', __ac_nonce), 38 | # '__ac_referer': '__ac_blank' 39 | # } 40 | # _get_response(url, headers={'Cookie': cookies}, cache=False) 41 | # cookies.update(get_cookies_d(url)) 42 | # uninstall_cookie() 43 | # return cookies 44 | 45 | 46 | def generate_mstoken(): 47 | ms = base64.b64encode(os.urandom(random.randrange(91,100))) \ 48 | .decode().replace('+','9').replace('/','9').rstrip('=') 49 | if len(ms) <= 128: 50 | ms += '==' 51 | while len(ms) < 132: 52 | i = random.randrange(128) 53 | c = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') 54 | p = random.choice('-_') 55 | ms = ms[:i] + ms[i:].replace(c, c+p, 1) 56 | return ms 57 | 58 | def sign_xbogus(params, ua): 59 | assert JSEngine, "No JS Interpreter found, can't load byted X-Bogus util!" 60 | if not isinstance(params, str): 61 | params = urlencode(params) 62 | js_ctx = JSEngine(get_pkgdata_str(__name__, '_byted_X-Bogus.js')) 63 | return js_ctx.call('sign', params, ua) 64 | 65 | def get_cookies_d(url): 66 | return {c.name: c.value 67 | for c in get_cookies(urlsplit(url).hostname, '/')} 68 | 69 | def get_nonce_cookies(): 70 | __ac_nonce = get_random_id(21) 71 | _cookies['nonce'] = cookies = { 72 | '__ac_nonce': __ac_nonce, 73 | } 74 | return cookies 75 | 76 | def get_ttwid_cookies(url): 77 | install_cookie() 78 | _cookies['ttwid'] = cookies = { 79 | '__ac_nonce': get_random_id(21), 80 | 'ttwid_date': '1' 81 | } 82 | _get_response(url, headers={'Cookie': cookies}, cache=False) 83 | cookies.update(get_cookies_d(url)) 84 | uninstall_cookie() 85 | return cookies 86 | 87 | _cookies = {} 88 | _get_response = get_response 89 | _get_content = get_content 90 | 91 | def get_response(url, *args, **kwargs): 92 | if 'live.douyin.' in url: 93 | cookies = _cookies.get('nonce') or get_nonce_cookies(url) 94 | elif 'ixigua.' in url: 95 | cookies = _cookies.get('ttwid') or get_ttwid_cookies(url) 96 | kwargs.setdefault('headers', {})['Cookie'] = cookies 97 | return _get_response(url, *args, **kwargs) 98 | 99 | def get_content(*args, **kwargs): 100 | response = get_response(*args, **kwargs) 101 | if kwargs.get('encoding') == 'ignore': 102 | return response.content 103 | return response.text 104 | -------------------------------------------------------------------------------- /ykdl/extractors/_common.py: -------------------------------------------------------------------------------- 1 | '''How we do import here. 2 | 3 | We do import the most functions/classes and variables/constants which are 4 | common using in our extractors at here. 5 | 6 | Don't import ALL (*) from module unless ensure all them are needed, if not sure 7 | then only import the module or its attributes which we are used. 8 | ''' 9 | 10 | from ..extractor import * 11 | from ..mediainfo import MediaInfo 12 | 13 | from ..util.http import * 14 | from ..util.human import * 15 | from ..util.m3u8 import * 16 | from ..util.match import * 17 | from ..util.wrap import * 18 | from ..util.kt_player import * 19 | 20 | from ..util.lazy import lazy_import 21 | lazy_import('from jsengine import JSEngine') 22 | del lazy_import 23 | 24 | import os 25 | import sys 26 | import re 27 | import json 28 | import time 29 | import base64 30 | import random 31 | import functools 32 | import urllib.parse 33 | import urllib.request 34 | 35 | from html import * 36 | from urllib.parse import * 37 | from tempfile import NamedTemporaryFile 38 | 39 | g = globals() 40 | for name in urllib.request.__all__: 41 | if name.startswith('HTTP') or name.endswith('Handler'): 42 | g[name] = urllib.request.__dict__[name] 43 | del g, name 44 | -------------------------------------------------------------------------------- /ykdl/extractors/acfun/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | def get_extractor(url): 4 | if '/bangumi/' in url: 5 | from . import bangumi as s 6 | elif '/live' in url: 7 | from . import live as s 8 | else: 9 | from . import video as s 10 | 11 | return s.site, url 12 | -------------------------------------------------------------------------------- /ykdl/extractors/acfun/acbase.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class AcBase(Extractor): 7 | 8 | quality_2_id = { 9 | 2160: '4K', 10 | 1080: 'BD', 11 | 720: 'TD', 12 | 540: 'HD', 13 | 360: 'SD', 14 | 270: 'LD' 15 | } 16 | 17 | def prepare(self): 18 | info = MediaInfo(self.name) 19 | 20 | self.mid # scan & check 21 | html = get_content(self.url) 22 | info.title, info.artist, sourceVid, data = self.get_page_info(html) 23 | 24 | data = json.loads(data)['adaptationSet'][0]['representation'] 25 | self.logger.debug('data:\n%s', data) 26 | 27 | url = random.choice(['url', 'backupUrl']) 28 | for q in data: 29 | quality = int(match1(q['qualityType'], '(\d+)')) 30 | stream_id = self.quality_2_id[quality] 31 | if q['frameRate'] > 30: 32 | stream_id += '-f' + str(int(q['frameRate'] + 0.1)) 33 | stream_profile = q['qualityLabel'] 34 | urls = q[url] 35 | if not isinstance(urls, list): 36 | urls = [urls] 37 | info.streams[stream_id] = { 38 | 'container': 'm3u8', 39 | 'profile': stream_profile, 40 | 'src': urls 41 | } 42 | 43 | return info 44 | -------------------------------------------------------------------------------- /ykdl/extractors/acfun/bangumi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | from .acbase import AcBase 5 | 6 | 7 | class AcBan(AcBase): 8 | 9 | name = 'AcFun 弹幕视频网 (番剧)' 10 | 11 | def get_page_info(self, html): 12 | artist = None 13 | bgmInfo = json.loads(match1(html, '(?:pageInfo|bangumiData) = ({.+?});')) 14 | videoInfo = bgmInfo.get('currentVideoInfo') 15 | assert videoInfo, bgmInfo.get('playErrorMessage') or "can't play this video!!" 16 | 17 | title = '{} - {}'.format(bgmInfo['bangumiTitle'], bgmInfo['episodeName']) 18 | sourceVid = videoInfo['id'] 19 | m3u8Info = videoInfo.get('playInfos') 20 | if m3u8Info: 21 | m3u8Info = m3u8Info[0] 22 | else: 23 | m3u8Info = videoInfo.get('ksPlayJson') 24 | 25 | return title, artist, sourceVid, m3u8Info 26 | 27 | def format_mid(self, mid): 28 | if not isinstance(mid, tuple): 29 | mid = mid, None 30 | mid = mid[:2] 31 | if len(mid) == 1: 32 | mid += (None, ) 33 | bid, iid = mid 34 | assert fullmatch(bid, '(aa)?\d+') 35 | assert not iid or fullmatch(iid, '\d+_\d+') 36 | bid = match1(bid, '(\d+)') 37 | if self.url is None: 38 | if iid: 39 | self.url = 'https://www.acfun.cn/bangumi/aa{bid}_{iid}'.format(**vars()) 40 | else: 41 | self.url = 'https://www.acfun.cn/bangumi/aa{bid}'.format(**vars()) 42 | return mid 43 | 44 | def prepare_mid(self): 45 | mid = matchm(self.url, '/aa(\d+)_(\d+_\d+)', '/aa(\d+)') 46 | if mid[0]: 47 | return mid 48 | 49 | def list_only(self): 50 | bid, iid = self.mid 51 | return bid and not iid 52 | 53 | def prepare_list(self): 54 | bid, iid = self.mid 55 | html = get_content( 56 | 'https://www.acfun.cn/bangumi/aa{bid}'.format(**vars()), 57 | params={ 58 | 'pagelets': 'pagelet_partlist', 59 | 'reqID': 0, 60 | 'ajaxpipe': 1, 61 | 't': int(time.time() * 1000) 62 | }) 63 | iids = matchall(html, '{bid}_(\d+_\d+)'.format(**vars())) 64 | self.set_index(iid, iids) 65 | for iid in iids: 66 | yield 'https://www.acfun.cn/bangumi/aa{bid}_{iid}'.format(**vars()) 67 | 68 | site = AcBan() 69 | -------------------------------------------------------------------------------- /ykdl/extractors/acfun/live.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class AcLive(Extractor): 7 | name = 'AcFun 弹幕视频网 (直播)' 8 | 9 | headers = { 10 | 'Accept': 'application/json, text/plain, */*', 11 | 'Referer': 'https://live.acfun.cn/' 12 | } 13 | 14 | @staticmethod 15 | def profile_2_id(profile): 16 | p1, p2 = matchm(profile, '(\S+) ?(\d+M)?') 17 | id = { 18 | '蓝光': 'BD', 19 | '超清': 'TD', 20 | '高清': 'HD' 21 | }[p1] 22 | if p2: 23 | id += p2 24 | return id 25 | 26 | @staticmethod 27 | def format_mid(mid): 28 | mid = fullmatch(mid, '\d+') 29 | assert mid 30 | return mid 31 | 32 | def prepare_mid(self): 33 | return match1(self.url, '/live/(\d+)') 34 | 35 | @functools.cache 36 | def prepare_auth(self): 37 | self.mid # scan & check 38 | did = 'web_{}{}{}'.format(random.randrange(1, 10), # 9 39 | random.randrange(1, 10 ** 7), # 9999999 40 | get_random_hex(8).upper()) # FFFFFFFF 41 | self.headers['Cookie'] = {'_did': did} 42 | data = get_response( 43 | 'https://id.app.acfun.cn/rest/app/visitor/login', 44 | data=b'sid=acfun.api.visitor', 45 | headers=self.headers 46 | ).json() 47 | assert data['result'] == 0, data['error_msg'] 48 | return did, data['userId'], data['acfun.api.visitor_st'] 49 | 50 | def prepare(self): 51 | info = MediaInfo(self.name, True) 52 | 53 | did, user_id, visitor_st = self.prepare_auth() 54 | data = get_response( 55 | 'https://api.kuaishouzt.com/rest/zt/live/web/startPlay', 56 | params={ 57 | 'subBiz': 'mainApp', 58 | 'kpn': 'ACFUN_APP', 59 | 'kpf': 'PC_WEB', 60 | 'userId': user_id, 61 | 'did': did, 62 | 'acfun.api.visitor_st': visitor_st 63 | }, 64 | data={ 65 | 'authorId': self.mid, 66 | 'pullstreamType': 'FLV' 67 | }, 68 | headers=self.headers 69 | ).json() 70 | assert data['result'] == 1, data['error_msg'] 71 | data = data['data'] 72 | 73 | info.title = data['caption'] 74 | 75 | data = json.loads(data['videoPlayRes']) 76 | for stream in data['liveAdaptiveManifest'][0]['adaptationSet']['representation']: 77 | stream_profile = stream['name'] 78 | stream_id = self.profile_2_id(stream_profile) 79 | info.streams[stream_id] = { 80 | 'container': 'flv', 81 | 'profile': stream_profile, 82 | 'src' : [stream['url']], 83 | 'size': Infinity 84 | } 85 | 86 | data = get_response( 87 | 'https://live.acfun.cn/rest/pc-direct/user/userInfo', 88 | params={'userId': self.mid}, 89 | headers=self.headers 90 | ).json() 91 | assert data['result'] == 0, data['error_msg'] 92 | data = data['profile'] 93 | 94 | info.artist = data['name'] 95 | info.add_comment(data['signature']) 96 | 97 | return info 98 | 99 | site = AcLive() 100 | -------------------------------------------------------------------------------- /ykdl/extractors/acfun/video.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | from .acbase import AcBase 5 | 6 | 7 | class AcVideo(AcBase): 8 | 9 | name = 'AcFun 弹幕视频网' 10 | 11 | def get_page_info(self, html): 12 | pageInfo = json.loads(match1(html, '(?:pageInfo|videoInfo) = ({.+?});')) 13 | videoList = pageInfo['videoList'] 14 | videoInfo = pageInfo.get('currentVideoInfo') 15 | assert videoInfo, bgmInfo.get('playErrorMessage') or "can't play this video!!" 16 | 17 | title = pageInfo['title'] 18 | sub_title = videoInfo['title'] 19 | artist = pageInfo['user']['name'] 20 | if sub_title not in ('noTitle', 'Part1', title) or len(videoList) > 1: 21 | title = '{title} - {sub_title}'.format(**vars()) 22 | sourceVid = videoInfo['id'] 23 | 24 | m3u8Info = videoInfo.get('playInfos') 25 | if m3u8Info: 26 | m3u8Info = m3u8Info[0] 27 | else: 28 | m3u8Info = videoInfo.get('ksPlayJson') 29 | 30 | return title, artist, sourceVid, m3u8Info 31 | 32 | def format_mid(self, mid): 33 | assert fullmatch(mid, '(ac)?\d+') 34 | mid = match1(mid, '(\d+)') 35 | # force rebuild url for list index 36 | self.url = 'https://www.acfun.cn/v/ac{mid}'.format(**vars()) 37 | return mid 38 | 39 | def prepare_mid(self): 40 | return match1(self.url, 'v/ac(\d+)', r'\bac=(\d+)') 41 | 42 | def prepare_list(self): 43 | html = get_content(self.url) 44 | videos = ['https://www.acfun.cn' + path for path in 45 | matchall(html, 'href=[\'"](/v/ac[0-9_]+)[\'"] title=[\'"]')] 46 | self.set_index(self.url, videos) 47 | return videos 48 | 49 | site = AcVideo() 50 | -------------------------------------------------------------------------------- /ykdl/extractors/baomihua.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class Baomihua(Extractor): 7 | # https://www.baomihua.com/ 8 | name = '爆米花(Baomihua)' 9 | 10 | def prepare_mid(self): 11 | return match1(self.url, '_(\d+)', 'm/(\d+)', 'v/(\d+)') 12 | 13 | def prepare(self): 14 | info = MediaInfo(self.name) 15 | 16 | add_header('Referer', 'https://m.mideo.baomihua.com/') 17 | data = get_response('https://play.baomihua.com/getvideourl.aspx', 18 | params={ 19 | 'flvid': self.mid, 20 | 'datatype': 'json', 21 | 'devicetype': 'wap' 22 | }).json() 23 | 24 | info.title = data['title'] 25 | host = data['host'] 26 | stream_name = data['stream_name'] 27 | t = data['videofiletype'] 28 | size = int(data['videofilesize']) 29 | 30 | hls = data['ishls'] 31 | url = 'http://{host}/{hls}/{stream_name}.{t}'.format(**vars()) 32 | info.streams['current'] = { 33 | 'container': t, 34 | 'profile': 'current', 35 | 'src' : [url], 36 | 'size': size 37 | } 38 | return info 39 | 40 | site = Baomihua() 41 | -------------------------------------------------------------------------------- /ykdl/extractors/bilibili/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | from .util import * 5 | 6 | def get_extractor(url): 7 | install_cookie() 8 | add_header('Referer', 'https://www.bilibili.com/') 9 | 10 | if 'live.bilibili' in url: 11 | from . import live as s 12 | return s.site, url 13 | elif 'vc.bilibili' in url: 14 | from . import vc as s 15 | return s.site, url 16 | elif '/bangumi/' in url: 17 | from . import bangumi as s 18 | return s.site, url 19 | 20 | page_index = match1(url, '(?:page|\?p)=(\d+)', 'index_(\d+)\.') or '1' 21 | 22 | bv_id = match1(url, r'\b((?:BV|bv)[0-9A-Za-z]{10})') 23 | if not bv_id: 24 | av_id = match1(url, r'\b(?:av|aid=)(\d+)') 25 | if av_id: 26 | bv_id = av2bv(av_id) 27 | 28 | if bv_id: 29 | data = get_media_data(bv_id) 30 | forward = data.get('forward') 31 | if forward: 32 | from .video import site 33 | forward = av2bv(forward) 34 | site.logger.warning('视频撞车了! 从 %s 跳转至首发 %s', bv_id, forward) 35 | bv_id = forward 36 | data = get_media_data(bv_id) 37 | url = data.get('redirect_url') or \ 38 | 'https://www.bilibili.com/video/{bv_id}/'.format(**vars()) 39 | else: 40 | url = get_location(url) 41 | 42 | if '/bangumi/' in url: 43 | from . import bangumi as s 44 | else: 45 | if page_index > '1': 46 | url = '{url}?p={page_index}'.format(**vars()) 47 | from . import video as s 48 | 49 | return s.site, url 50 | -------------------------------------------------------------------------------- /ykdl/extractors/bilibili/bangumi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | from .util import * 5 | from .bilibase import BiliBase 6 | 7 | 8 | APPKEY = '84956560bc028eb7' 9 | SECRETKEY = '94aba54af9065f71de72f5508f1cd42e' 10 | api_url = 'https://bangumi.bilibili.com/player/web_api/v2/playurl' 11 | 12 | ua_legacy = 'Mozilla/5.0 (X11; Linux x86_64; rv:50.1) Gecko/20100101 Firefox/50.1' 13 | 14 | class BiliBan(BiliBase): 15 | name = '哔哩哔哩 番剧 (Bilibili Bangumi)' 16 | 17 | def list_only(self): 18 | return '/play/ss' in self.url 19 | 20 | def get_page_info(self, info): 21 | html = get_content(self.url, headers={'User-Agent': ua_legacy}) 22 | data = json.loads(match1(html, '__INITIAL_STATE__=({.+?});')) 23 | 24 | epInfo = data['epInfo'] 25 | assert epInfo['epStatus'] != 13, "can't play VIP video!" 26 | 27 | self.mid = epInfo['cid'] 28 | mediaInfo = data['mediaInfo'] 29 | self.seasonType = mediaInfo['ssType'] 30 | ssTypeFormat = mediaInfo['ssTypeFormat'] 31 | ss_name = ssTypeFormat['name'] 32 | ss_name_e = ssTypeFormat['homeLink'].split('/')[-2].title() 33 | if ss_name_e != 'Anime': 34 | if ss_name_e == 'Tv': 35 | ss_name_e = 'TV' 36 | info.site = '哔哩哔哩 {ss_name} (Bilibili {ss_name_e})'.format(**vars()) 37 | 38 | def get_badge(): 39 | stype = epInfo['sectionType'] 40 | if stype: 41 | for s in data['sections']: 42 | if s['type'] == stype: 43 | return s['title'] 44 | else: 45 | return epInfo['badge'] 46 | 47 | title_h1 = data['h1Title'] 48 | title_share = epInfo['share_copy'] 49 | title = title_h1 in title_share and title_share or title_h1 50 | badge = get_badge() 51 | if badge != '预告': 52 | badge = '' 53 | info.title = '{title} {badge}'.format(**vars()) 54 | info.artist = mediaInfo.get('upInfo', {}).get('name') or \ 55 | mediaInfo.get('up_info', {}).get('uname') 56 | info.duration = epInfo['duration'] // 1000 57 | 58 | def get_api_url(self, qn): 59 | params = { 60 | 'appkey': APPKEY, 61 | 'cid': self.mid, 62 | 'module': 'bangumi', 63 | 'platform': 'html5', 64 | 'player': 1, 65 | 'qn': qn, 66 | 'season_type': self.seasonType 67 | } 68 | return sign_api_url(api_url, params, SECRETKEY) 69 | 70 | def prepare_list(self): 71 | html = get_content(self.url, headers={'User-Agent': ua_legacy}) 72 | data = json.loads(match1(html, '__INITIAL_STATE__=({.+?});')) 73 | epid = data['epInfo']['id'] 74 | eplist = sum((s['epList'] for s in data['sections']), data['epList']) 75 | epids = [ep['id'] for ep in eplist if ep['epStatus'] != 13] 76 | 77 | skiped = len(eplist) - len(epids) 78 | if skiped: 79 | self.logger.info('skiped %d VIP videos', skiped) 80 | assert epids, "can't play VIP videos!" 81 | 82 | self.set_index(epid, epids) 83 | for id in epids: 84 | yield 'https://www.bilibili.com/bangumi/play/ep{id}'.format(**vars()) 85 | 86 | site = BiliBan() 87 | -------------------------------------------------------------------------------- /ykdl/extractors/bilibili/bilibase.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class BiliBase(Extractor): 7 | format_2_type_profile = { 8 | 'hdflv2': ('4K', '超清 4K'), #120 IGNORE 9 | 'flv_p60': ('BD', '高清 1080P60'), #116 IGNORE 10 | 'flv': ('BD', '高清 1080P'), #80 11 | 'flv720_p60': ('TD', '高清 720P'), #74 IGNORE 12 | 'flv720': ('TD', '高清 720P'), #64 13 | 'hdmp4': ('TD', '高清 720P'), #48 14 | 'flv480': ('HD', '清晰 480P'), #32 15 | 'mp4': ('SD', '流畅 360P'), #16 16 | 'flv360': ('SD', '流畅 360P'), #15 17 | } 18 | 19 | def prepare(self): 20 | info = MediaInfo(self.name) 21 | info.extra.referer = 'https://www.bilibili.com/' 22 | info.extra['ua'] = fake_headers['User-Agent'] 23 | 24 | self.get_page_info(info) 25 | 26 | def get_video_info(qn=0): 27 | # need login with high qn 28 | if qn == 74 or qn > 80: 29 | return 30 | 31 | api_url = self.get_api_url(qn) 32 | data = get_response(api_url).xml()['root'] 33 | assert data['result'] == 'suee', '{}: {}, {}'.format( 34 | data['result'], data['code'], data['message']) 35 | 36 | durl = data['durl'] 37 | urls = [] 38 | size = 0 39 | for d in durl: 40 | urls.append(d['url']) 41 | size += d['size'] 42 | fmt = data['format'] 43 | if 'mp4' in fmt: 44 | ext = 'mp4' 45 | elif 'flv' in fmt: 46 | ext = 'flv' 47 | st, prf = self.format_2_type_profile[fmt] 48 | if urls: 49 | st += '-' + str(data['quality']) 50 | info.streams[st] = { 51 | 'container': ext, 52 | 'profile': prf, 53 | 'src' : urls, 54 | 'size': size 55 | } 56 | 57 | if qn == 0: 58 | aqlts = data['accept_quality'].split(',') 59 | aqlts.remove(str(data['quality'])) 60 | for aqlt in aqlts: 61 | get_video_info(int(aqlt)) 62 | 63 | get_video_info() 64 | 65 | assert info.streams, "can't play this video!!" 66 | return info 67 | -------------------------------------------------------------------------------- /ykdl/extractors/bilibili/idconvertor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | '''Bilibili VID convertor, AV <=> BV. 4 | 5 | Origin by mcft: 6 | https://www.zhihu.com/question/381784377/answer/1099438784 7 | 8 | Modified by SeaHOH 9 | ''' 10 | 11 | 12 | __all__ = ['bv2av', 'av2bv'] 13 | 14 | tablec = list('fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF') 15 | tablei = {c: i for i, c in enumerate(tablec)} 16 | bvtl = list('BV1**4*1*7**') 17 | bvco = [9, 8, 1, 6, 2, 4] # av >= 29460791296 ( 2^35 - 2^32 - 2^29 - 2^26 ) 18 | # ? [9, 8, 1, 6, 2, 4, 0, 7, 3, 5] 19 | # ? [9, 8, 1, 6, 2, 4, 5, 7, 3, 0] 20 | xor = 177451812 21 | add = [] 22 | _d = 100618342136696320 23 | while _d: 24 | _d, _m = divmod(_d, 58) 25 | add.append(_m) 26 | 27 | 28 | def bv2av(bv): 29 | r = 0 30 | x = list(bv[-10:]) 31 | for p, i in enumerate(bvco): 32 | r += (tablei[x[i]] - add[p]) * 58 ** p 33 | return str(r ^ xor) 34 | 35 | def av2bv(av): 36 | if isinstance(av, str): 37 | av = av.lstrip('av') 38 | r = bvtl.copy() 39 | x = int(av) ^ xor 40 | for p, i in enumerate(bvco): 41 | x, m = divmod(x + add[p], 58) 42 | r[i + 2] = tablec[m] 43 | return ''.join(r) 44 | 45 | -------------------------------------------------------------------------------- /ykdl/extractors/bilibili/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | from .idconvertor import * 5 | 6 | 7 | __all__ = ['av2bv', 'sign_api_url', 'get_media_data'] 8 | 9 | def sign_api_url(api_url, params, skey): 10 | params = sorted(params.items()) 11 | params.append(('sign', hash.md5(urlencode(params) + skey))) 12 | params_str = urlencode(params) 13 | return '{api_url}?{params_str}'.format(**vars()) 14 | 15 | def get_media_data(bvid): 16 | data = get_response('https://api.bilibili.com/x/web-interface/view', 17 | params={'bvid': bvid}).json() 18 | assert data['code'] == 0, "can't play this video!!" 19 | return data['data'] 20 | -------------------------------------------------------------------------------- /ykdl/extractors/bilibili/vc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class BiliVC(Extractor): 7 | name = '哔哩哔哩 小视频 (Bili VC)' 8 | 9 | def prepare(self): 10 | return match1(self.url, 'video/(\d+)') 11 | 12 | def prepare(self): 13 | info = MediaInfo(self.name) 14 | 15 | video_data = get_response( 16 | 'https://api.vc.bilibili.com/clip/v1/video/detail', 17 | params={'video_id': self.mid}).json() 18 | 19 | info.title = video_data['data']['item']['description'] 20 | info.artist = video_data['data']['user']['name'] 21 | 22 | info.streams['current'] = { 23 | 'container': 'mp4', 24 | 'profile': 'current', 25 | 'src' : [video_data['data']['item']['video_playurl']], 26 | 'size': int(video_data['data']['item']['video_size']) 27 | } 28 | 29 | return info 30 | 31 | site = BiliVC() 32 | 33 | -------------------------------------------------------------------------------- /ykdl/extractors/bilibili/video.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | from .util import * 5 | from .bilibase import BiliBase 6 | 7 | 8 | APPKEY = 'iVGUTjsxvpLeuDCf' 9 | SECRETKEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' 10 | api_url = 'https://interface.bilibili.com/v2/playurl' 11 | 12 | class BiliVideo(BiliBase): 13 | name = '哔哩哔哩 (Bilibili)' 14 | 15 | def get_page_info(self, info): 16 | page_index = match1(self.url, '\?p=(\d+)', 'index_(\d+)\.') or '1' 17 | html = get_content(self.url) 18 | data = match1(html, '__INITIAL_STATE__=({.+?});') 19 | self.logger.debug('data:\n%s', data) 20 | data = json.loads(data)['videoData'] 21 | title = data['title'] 22 | pages = data['pages'] 23 | for page in pages: 24 | index = str(page['page']) 25 | subtitle = page['part'] 26 | if index == page_index: 27 | self.mid = page['cid'] 28 | if len(pages) > 1: 29 | title = '{title} - {index} - {subtitle}'.format(**vars()) 30 | elif subtitle and subtitle != title: 31 | title = '{title} - {subtitle}'.format(**vars()) 32 | info.duration = page['duration'] 33 | break 34 | info.title = title 35 | info.artist = data['owner']['name'] 36 | info.add_comment(data['tname']) 37 | 38 | def get_api_url(self, qn): 39 | params = { 40 | 'appkey': APPKEY, 41 | 'cid': self.mid, 42 | 'platform': 'html5', 43 | 'player': 0, 44 | 'qn': qn 45 | } 46 | return sign_api_url(api_url, params, SECRETKEY) 47 | 48 | def prepare_list(self): 49 | vid = match1(self.url, '/(av\d+|(?:BV|bv)[0-9A-Za-z]{10})') 50 | if vid[:2] == 'av': 51 | vid = av2bv(vid) 52 | data = get_media_data(vid) 53 | 54 | if 'ugc_season' in data: 55 | bvids = [episode['bvid'] for episode in 56 | sum((section['episodes'] for section in 57 | data['ugc_season']['sections']), [])] 58 | self.set_index(vid, bvids) 59 | for bvid in bvids: 60 | yield 'https://www.bilibili.com/video/{bvid}/'.format(**vars()) 61 | 62 | else: 63 | page = int(match1(self.url, '[^a-z]p(?:age)?=(\d+)', 64 | 'index_(\d+)\.') 65 | or '1') 66 | self.set_index(page, data['videos']) 67 | for p in range(data['videos']): 68 | p = p + 1 69 | yield 'https://www.bilibili.com/video/{vid}/?p={p}'.format(**vars()) 70 | 71 | site = BiliVideo() 72 | -------------------------------------------------------------------------------- /ykdl/extractors/cctv.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class CNTV(Extractor): 7 | name = '央视网 (CNTV)' 8 | 9 | supported_chapters = [ 10 | ['chapters6', 'BD', '超高清 1080P'], 11 | ['chapters5', 'TD', '超高清 720P'], 12 | ['chapters4', 'TD', '超清'], 13 | ['chapters3', 'HD', '高清'], 14 | ['chapters2', 'SD', '标清'], 15 | ['lowChapters', 'LD', '流畅']] 16 | 17 | def prepare_mid(self): 18 | mid = match1(self.url, '(?:guid|videoCenterId)=(\w+)', 19 | '(\w+)/index\.shtml') 20 | if mid is None: 21 | html = get_content(self.url) 22 | mid = match1(html, 'guid\s*=\s*[\'"]([^\'"]+)', 23 | '"videoCenterId","([^"]+)', 24 | 'initMyAray\s*=\s*[\'"]([^\'"]+)') 25 | return mid 26 | 27 | def prepare(self): 28 | info = MediaInfo(self.name) 29 | 30 | data = get_response( 31 | 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', 32 | params={ 33 | 'pid': self.mid, 34 | 'tsp': int(time.time()), 35 | 'vn' : 2054, 36 | 'pcv': 152438790 37 | }).json() 38 | 39 | info.title = '{} - {}'.format(data['title'], data['play_channel']) 40 | 41 | video_data = data['video'] 42 | for chapters, stream_id, stream_profile in self.supported_chapters: 43 | stream_data = video_data.get(chapters) 44 | if stream_data: 45 | urls = [] 46 | for v in stream_data: 47 | urls.append(v['url']) 48 | info.streams[stream_id] = { 49 | 'container': 'mp4', 50 | 'profile': stream_profile, 51 | 'src': urls 52 | } 53 | return info 54 | 55 | site = CNTV() 56 | -------------------------------------------------------------------------------- /ykdl/extractors/douban/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def get_extractor(url): 5 | if 'music.douban' in url and '/subject/' not in url or 'site.douban' in url: 6 | from . import music as s 7 | return s.site, url 8 | 9 | if 'movie.douban' in url and ('/trailer' in url or '/video' in url): 10 | from . import movie as s 11 | return s.site, url 12 | 13 | raise NotImplementedError(url) 14 | -------------------------------------------------------------------------------- /ykdl/extractors/douban/movie.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class DoubanMovie(Extractor): 7 | name = 'Douban movie (豆瓣电影)' 8 | 9 | def prepare(self): 10 | info = MediaInfo(self.name) 11 | html = get_content(self.url) 12 | info.title = match1(html, '') 14 | url = match1(html,'"embedUrl": "(.+?)"') 15 | 16 | info.streams['current'] = { 17 | 'container': 'mp4', 18 | 'profile': 'current', 19 | 'src': [url] 20 | } 21 | return info 22 | 23 | def list_only(self): 24 | return '/subject/' in self.url 25 | 26 | def prepare_list(self): 27 | html = get_content(self.url) 28 | return matchall(html, '')[-1] 37 | data = json.loads(data) 38 | self.logger.debug('data: \n%s', data) 39 | data = json.loads(match1(data, '(\[.+\])'))[-1] 40 | 41 | try: 42 | video_info = data['state']['roomStore']['roomInfo'].get('room') 43 | except KeyError: 44 | video_info = data['/webcast/reflow/:id'].get('room') 45 | 46 | assert video_info and video_info['status'] == 2, 'live is off!!!' 47 | 48 | title = video_info['title'] 49 | info.artist = nickName = video_info['owner']['nickname'] 50 | info.title = '{title} - {nickName}'.format(**vars()) 51 | 52 | stream_info = video_info['stream_url'] 53 | stream_urls = [] 54 | if 'flv_pull_url' in stream_info: 55 | for ql, url in stream_info['flv_pull_url'].items(): 56 | stream_urls.append(['flv', ql, url]) 57 | orig = stream_info.get('rtmp_pull_url') 58 | if orig and orig not in stream_info['flv_pull_url'].values(): 59 | stream_urls.append(['flv', 'ORIGION', orig]) 60 | if 'hls_pull_url_map' in stream_info: 61 | for ql, url in stream_info['hls_pull_url_map'].items(): 62 | stream_urls.append(['m3u8', ql, url]) 63 | orig = stream_info.get('hls_pull_url') 64 | if orig and orig not in stream_info['hls_pull_url_map'].values(): 65 | stream_urls.append(['m3u8', 'ORIGION', orig]) 66 | 67 | for ext, ql, url in stream_urls: 68 | if not url: 69 | continue 70 | stream_profile, stream_id = self.quality_2_profile_id[ql] 71 | info.streams[stream_id + '-' + ext[:3]] = { 72 | 'container': ext, 73 | 'profile': stream_profile, 74 | 'src' : [url], 75 | 'size': Infinity 76 | } 77 | 78 | return info 79 | 80 | site = Douyin() 81 | -------------------------------------------------------------------------------- /ykdl/extractors/douyin/video.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | from .._byted import generate_mstoken, sign_xbogus 5 | 6 | 7 | class Douyin(Extractor): 8 | name = '抖音 (Douyin)' 9 | 10 | def prepare_mid(self): 11 | return match1(self.url, r'\b(?:video/|music/|note/|vid=|aweme_id=|item_ids=)(\d+)') 12 | 13 | def prepare(self): 14 | info = MediaInfo(self.name) 15 | 16 | ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36' 17 | params = { 18 | 'aweme_id': self.mid, 19 | 'aid': 6383, 20 | 'version_name': '23.5.0', 21 | 'device_platform': 'webapp', 22 | 'os_version': 10 23 | } 24 | params['X-Bogus'] = sign_xbogus(urlencode(params), ua) 25 | data = get_response('https://www.douyin.com/aweme/v1/web/aweme/detail/', 26 | params=params, 27 | headers={ 28 | 'User-Agent': ua, 29 | 'Cookie': {'msToken': generate_mstoken()}, 30 | 'Referer': 'https://www.douyin.com/' 31 | }).json() 32 | assert data['status_code'] == 0, data['status_msg'] 33 | assert data['aweme_detail'], data['filter_detail'] 34 | 35 | data = data['aweme_detail'] 36 | aweme_type = data['aweme_type'] 37 | # TikTok [0, 51, 55, 58, 61, 150] 38 | if aweme_type not in [2, 68, 150, 0, 4, 51, 55, 58, 61]: 39 | print('new type', aweme_type) 40 | music_image = aweme_type in [2, 68, 150] # video [0, 4, 51, 55, 58, 61] 41 | title = data['desc'] 42 | nickName = data['author'].get('nickname', '') 43 | uid = data['author'].get('unique_id') or \ 44 | data['author']['short_id'] 45 | 46 | info.title = '{title} - {nickName}(@{uid})'.format(**vars()) 47 | info.artist = nickName 48 | info.duration = data['duration'] // 1000 49 | 50 | ext = 'mp4' 51 | url = data['video']['play_addr']['url_list'][0] \ 52 | .replace('playwm', 'play') 53 | if music_image or 'music' in url: 54 | ext = 'mp3' 55 | url = data['video']['cover']['url_list'][0], url 56 | info.streams['current'] = { 57 | 'container': ext, 58 | 'profile': data['video']['ratio'].upper(), 59 | 'src': [url] 60 | } 61 | return info 62 | 63 | site = Douyin() 64 | -------------------------------------------------------------------------------- /ykdl/extractors/douyu/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def get_extractor(url): 5 | if 'v.douyu' in url or 'vmobile.douyu' in url: 6 | from . import video as s 7 | else: 8 | from . import live as s 9 | 10 | return s.site, url 11 | -------------------------------------------------------------------------------- /ykdl/extractors/douyu/live.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | from .util import get_h5enc, ub98484234 5 | 6 | 7 | class Douyutv(Extractor): 8 | name = '斗鱼直播 (DouyuTV)' 9 | 10 | profile_2_id = { 11 | '原画': 'OG', 12 | '蓝光10M': 'BD10M', 13 | '蓝光8M': 'BD8M', 14 | '蓝光4M': 'BD4M', 15 | '蓝光': 'BD', 16 | '超清': 'TD', 17 | '高清': 'HD', 18 | '流畅': 'SD' 19 | } 20 | 21 | def prepare_mid(self): 22 | html = get_content(self.url) 23 | mid = match1(html, '\$ROOM\.room_id\s*=\s*(\d+)', 24 | 'room_id\s*=\s*(\d+)', 25 | '"room_id.?":(\d+)', 26 | 'data-onlineid=(\d+)', 27 | '(房间已被关闭)') 28 | assert mid != '房间已被关闭', '房间已被关闭' 29 | return mid 30 | 31 | def prepare(self): 32 | info = MediaInfo(self.name, True) 33 | 34 | add_header('Referer', 'https://www.douyu.com') 35 | html = get_content(self.url) 36 | 37 | title = match1(html, 'Title-head\w*">([^<]+)<') 38 | artist = match1(html, 'Title-anchorName\w*" title="([^"]+)"') 39 | if not title or not artist: 40 | room_data = get_response( 41 | 'https://open.douyucdn.cn/api/RoomApi/room/' + self.mid 42 | ).json() 43 | if room_data['error'] == 0: 44 | room_data = room_data['data'] 45 | title = room_data['room_name'] 46 | artist = room_data['owner_name'] 47 | 48 | info.title = '{title} - {artist}'.format(**vars()) 49 | info.artist = artist 50 | 51 | js_enc = get_h5enc(html, self.mid) 52 | params = { 53 | 'cdn': '', 54 | 'iar': 0, 55 | 'ive': 0, 56 | } 57 | ub98484234(js_enc, self.mid, self.logger, params) 58 | 59 | def get_live_info(rate=0): 60 | params['rate'] = rate 61 | live_data = get_response( 62 | 'https://www.douyu.com/lapi/live/getH5Play/' + self.mid, 63 | data=params).json() 64 | if live_data['error']: 65 | return live_data['msg'] 66 | 67 | live_data = live_data['data'] 68 | real_url = '/'.join([live_data['rtmp_url'], live_data['rtmp_live']]) 69 | rate_2_profile = {rate['rate']: rate['name'] 70 | for rate in live_data['multirates']} 71 | stream_profile = rate_2_profile[live_data['rate']] 72 | if '原画' in stream_profile: 73 | stream_id = 'OG' 74 | else: 75 | stream_id = self.profile_2_id[stream_profile] 76 | info.streams[stream_id] = { 77 | 'container': match1(live_data['rtmp_live'], '\.(\w+)\?'), 78 | 'profile': stream_profile, 79 | 'src' : [real_url], 80 | 'size': Infinity 81 | } 82 | 83 | error_msges = [] 84 | if rate == 0: 85 | rate_2_profile.pop(0, None) 86 | rate_2_profile.pop(live_data['rate'], None) 87 | for rate in rate_2_profile: 88 | error_msg = get_live_info(rate) 89 | if error_msg: 90 | error_msges.append(error_msg) 91 | if error_msges: 92 | return ', '.join(error_msges) 93 | 94 | error_msg = get_live_info() 95 | if error_msg: 96 | self.logger.debug('error_msg:\n\t' + error_msg) 97 | 98 | return info 99 | 100 | def prepare_list(self): 101 | html = get_content(self.url) 102 | return matchall(html, 'class="hroom_id" value="([^"]+)', 103 | 'data-room_id="([^"]+)') 104 | 105 | site = Douyutv() 106 | -------------------------------------------------------------------------------- /ykdl/extractors/douyu/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | assert JSEngine, "No JS Interpreter found, can't extract douyu live/video!" 6 | 7 | 8 | # REF: https://cdnjs.com/libraries/crypto-js 9 | js_md5 = get_pkgdata_str(__name__, 'crypto-js-md5.min.js', 10 | 'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.9-1/crypto-js.min.js') 11 | 12 | def get_h5enc(html, rid): 13 | js_enc = match1(html, '(var vdwdae325w_64we =[\s\S]+?)\s*') 14 | if js_enc is None or 'ub98484234(' not in js_enc: 15 | data = get_response('https://www.douyu.com/swf_api/homeH5Enc', 16 | params={'rids': rid}).json() 17 | assert data['error'] == 0, data['msg'] 18 | js_enc = data['data']['room' + rid] 19 | return js_enc 20 | 21 | def ub98484234(js_enc, rid, logger, params): 22 | names_dict = { 23 | 'debugMessages': get_random_name(8), 24 | 'decryptedCodes': get_random_name(8), 25 | 'patchCode': get_random_name(8), 26 | 'resoult': get_random_name(8), 27 | '_ub98484234': get_random_name(8), 28 | 'workflow': match1(js_enc, 'function ub98484234\(.+?\Weval\((\w+)\);'), 29 | } 30 | js_dom = ''' 31 | {debugMessages} = {{{decryptedCodes}: []}}; 32 | if (!this.window) {{window = {{}};}} 33 | if (!this.document) {{document = {{}};}} 34 | '''.format(**names_dict) 35 | js_patch = [''' 36 | function {patchCode}(workflow) {{ 37 | let testVari = /(\w+)=(\w+)\([\w\+]+\);.*?(\w+)="\w+";/.exec(workflow); 38 | if (testVari && testVari[1] == testVari[2]) {{ 39 | workflow += `${{testVari[1]}}[${{testVari[3]}}] = function() {{return true;}};`; 40 | }} 41 | let subWorkflow = /(?:\w+=)?eval\((\w+)\)/.exec(workflow); 42 | if (subWorkflow) {{ 43 | let subPatch = ` 44 | {debugMessages}.{decryptedCodes}.push('sub workflow: ' + subWorkflow); 45 | subWorkflow = {patchCode}(subWorkflow); 46 | `.replace(/subWorkflow/g, subWorkflow[1]) + subWorkflow[0]; 47 | workflow = workflow.replace(subWorkflow[0], subPatch); 48 | }} 49 | return workflow; 50 | }} 51 | '''.format(**names_dict), ''' 52 | {debugMessages}.{decryptedCodes}.push({workflow}); 53 | eval({patchCode}({workflow})); 54 | '''.format(**names_dict)] 55 | js_debug = ''' 56 | var {_ub98484234} = ub98484234; 57 | ub98484234 = function(p1, p2, p3) {{ 58 | try {{ 59 | let resoult = {_ub98484234}(p1, p2, p3); 60 | {debugMessages}.{resoult} = resoult; 61 | }} catch(e) {{ 62 | {debugMessages}.{resoult} = e.message; 63 | }} 64 | return {debugMessages}; 65 | }}; 66 | '''.format(**names_dict) 67 | 68 | js_ctx = JSEngine() 69 | js_ctx.append(js_md5) 70 | js_ctx.append(js_dom) 71 | if names_dict['workflow']: 72 | js_ctx.append(js_patch[0]) 73 | js_ctx.append(js_enc.replace('eval({workflow});'.format(**names_dict), js_patch[1])) 74 | else: 75 | js_ctx.append(js_enc) 76 | js_ctx.append(js_debug) 77 | 78 | did = get_random_uuid_hex() 79 | tt = str(int(time.time())) 80 | ub98484234 = js_ctx.call('ub98484234', rid, did, tt) 81 | ub98484234 = { 82 | 'decryptedCodes': ub98484234[names_dict['decryptedCodes']], 83 | 'resoult': ub98484234[names_dict['resoult']] 84 | } 85 | logger.debug('ub98484234: %s', ub98484234) 86 | ub98484234 = ub98484234['resoult'] 87 | params.update({ 88 | 'v': match1(ub98484234, 'v=(\d+)'), 89 | 'did': did, 90 | 'tt': tt, 91 | 'sign': match1(ub98484234, 'sign=(\w{32})') 92 | }) 93 | -------------------------------------------------------------------------------- /ykdl/extractors/douyu/video.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | from .util import get_h5enc, ub98484234 5 | 6 | 7 | class DouyutvVideo(Extractor): 8 | name = '斗鱼视频 (DouyuTV)' 9 | 10 | profile_2_id = { 11 | 'super': 'OG', # Need Login 12 | 'high': 'TD', 13 | 'normal': 'HD' 14 | } 15 | 16 | def prepare_mid(self): 17 | return match1(self.url, 'show/(\w+)') 18 | 19 | def prepare(self): 20 | info = MediaInfo(self.name) 21 | 22 | if self.url is None or 'vmobile' in self.url: 23 | self.url = 'https://v.douyu.com/show/' + self.mid 24 | 25 | html = get_content(self.url) 26 | info.title = match1(html, 'title>(.+?)-斗鱼视频<') 27 | vid = match1(html, '"point_id":\s?(\d+)') 28 | assert vid, "can't find video!!!" 29 | 30 | js_enc = get_h5enc(html, vid) 31 | params = {'vid': self.mid} 32 | ub98484234(js_enc, vid, self.logger, params) 33 | 34 | add_header('Referer', self.url) 35 | data = get_response('https://v.douyu.com/api/stream/getStreamUrl', 36 | {'Cookie': 'dy_did=' + params['did']}, 37 | data=params).json() 38 | assert data['error'] == 0, data 39 | 40 | for stream_profile, st_date in data['data']['thumb_video'].items(): 41 | if not st_date: 42 | continue 43 | stream_id = self.profile_2_id[stream_profile] 44 | info.streams[stream_id] = { 45 | 'container': 'm3u8', 46 | 'profile': stream_profile, 47 | 'src': [st_date['url']] 48 | } 49 | 50 | return info 51 | 52 | site = DouyutvVideo() 53 | -------------------------------------------------------------------------------- /ykdl/extractors/generalembed.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | ''' 7 | refer to http://open.youku.com/tools 8 | ''' 9 | youku_embed_patterns = [ 10 | 'youku\.com/v_show/id_([a-zA-Z0-9=]+)', 11 | 'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf', 12 | 'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)', 13 | 'player\.youku\.com/embed/([a-zA-Z0-9=]+)', 14 | 'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\'', 15 | 'data-youku=\"[a-zA-Z0-9,:]+vid:([a-zA-Z0-9=]+)\"' 16 | ] 17 | 18 | ''' 19 | v.qq.com 20 | ''' 21 | qq_embed_patterns = [ 22 | 'v\.qq\.com[a-zA-Z0-9\/\?\.\;]+vid=([a-zA-Z0-9]+)', 23 | 'TPout\.swf[a-zA-Z0-9=\?\&_]+vid=([a-zA-Z0-9]+)' 24 | ] 25 | 26 | 27 | ''' 28 | tv.sohu.com 29 | ''' 30 | sohu_embed_patterns = [ 31 | 'tv\.sohu\.com[a-zA-Z0-9\/\?=]+\&vid=([a-zA-Z0-9]+)\&', 32 | 'share\.vrs\.sohu\.com\/my\/v.swf[&+=a-zA-z0-9]+&id=(\d+)', 33 | 'my\.tv\.sohu\.com\/[a-zA-Z0-9\/]+/(\d+)' 34 | ] 35 | 36 | ''' 37 | Ku6 38 | ''' 39 | ku6_embed_url = [ 40 | '(http://v.ku6vms.com/[^\"]+)' 41 | ] 42 | 43 | ku6_embed_patterns = [ 44 | 'http://player.ku6.com/refer/(.*)/v.swf' 45 | ] 46 | ''' 47 | 163 48 | ''' 49 | netease_embed_patterns = [ 50 | 'v\.163\.com\/[0-9a-zA-Z\/\?\.]+topicid=([^&]+)&\;vid=([^&]+)', 51 | 'topicid=([a-zA-Z0-9]+)&vid=([a-zA-Z0-9]+)&' 52 | ] 53 | 54 | ''' 55 | iqiyi 56 | ''' 57 | iqiyi_embed_patterns = [ 58 | 'definitionID=([^&]+)&tvId=([^&]+)' 59 | ] 60 | 61 | ''' 62 | Letv Cloud 63 | ''' 64 | lecloud_embed_patterns = [ 65 | '{"uu":"([^\"]+)","vu":"([^\"]+)"', 66 | 'bcloud.swf\?uu=([^&]+)&vu=([^&]+)', 67 | 'uu=([^&]+)&vu=([^&]+)' 68 | ] 69 | 70 | ''' 71 | ifeng 72 | ''' 73 | ifeng_embed_patterns = [ 74 | 'v\.ifeng\.com\/[a-zA-Z\=\/\?\&\.]+guid=([^\"&]+)' 75 | ] 76 | 77 | ''' 78 | weibo 79 | ''' 80 | weibo_embed_patterns = [ 81 | 'http://video.weibo.com/player/1034:(\w{32})\w*' 82 | ] 83 | 84 | ''' 85 | Sina 86 | ''' 87 | sina_embed_patterns = [ 88 | 'http://video.sina.com.cn/share/video/(\d+).swf' 89 | ] 90 | 91 | ''' 92 | Bilibili 93 | ''' 94 | bilibili_embed_patterns = [ 95 | 'flashvars="aid=(\d+)' 96 | ] 97 | 98 | class GeneralEmbed(EmbedExtractor): 99 | name = 'GeneralEmbed (通用嵌入视频)' 100 | 101 | def prepare_playlist(self): 102 | 103 | def append_media_info(site, mid): 104 | media_info = self.new_media_info({ 105 | 'site': site, 106 | 'mid': mid 107 | }) 108 | if media_info not in self.media_info_list: 109 | self.media_info_list.append(media_info) 110 | 111 | html = get_content(self.url) 112 | 113 | for mid in matchall(html, *youku_embed_patterns): 114 | append_media_info('youku', mid) 115 | 116 | for mid in matchall(html, *qq_embed_patterns): 117 | append_media_info('qq.video', mid) 118 | 119 | for mid in matchall(html, *sohu_embed_patterns): 120 | append_media_info('sohu.my', mid) 121 | 122 | for url in matchall(html, *ku6_embed_url): 123 | flashvars = matchall(get_content(url),'vid=([^&]+)', 124 | 'style=([^&]+)', 125 | 'sn=([^&]+)') 126 | data = get_response( 127 | 'http://v.ku6vms.com/phpvms/player/forplayer/vid/' 128 | '{}/style/{}/sn/{}' 129 | .format(*flashvars)).json() 130 | mid = data['ku6vid'] 131 | append_media_info('ku6', mid) 132 | 133 | for mid in matchall(html, *ku6_embed_patterns): 134 | append_media_info('ku6', mid) 135 | 136 | for mid in matchall(html, *netease_embed_patterns): 137 | append_media_info('netease.video', mid) 138 | 139 | for mid in matchall(html, *iqiyi_embed_patterns): 140 | append_media_info('iqiyi', mid) 141 | 142 | for mid in matchall(html, *lecloud_embed_patterns): 143 | append_media_info('le.letvcloud', mid) 144 | 145 | for mid in matchall(html, *ifeng_embed_patterns): 146 | append_media_info('ifeng.news', mid) 147 | 148 | for mid in matchall(html, *weibo_embed_patterns): 149 | append_media_info('weibo', 'http://weibo.com/p/' + mid) 150 | 151 | for mid in matchall(html, *sina_embed_patterns): 152 | append_media_info('sina.video', mid) 153 | 154 | for mid in matchall(html, *bilibili_embed_patterns): 155 | append_media_info('bilibili.video', mid) 156 | 157 | parser = EmbedExtractor.parser_list 158 | 159 | site = GeneralEmbed() 160 | -------------------------------------------------------------------------------- /ykdl/extractors/generalsimple.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | from .singlemultimedia import contentTypes 5 | 6 | 7 | # TODO: subtitles support 8 | # REF: https://developer.mozilla.org/zh-CN/docs/Web/API/WebVTT_API 9 | 10 | pattern_ext = r'''(?ix) 11 | ["']( 12 | (?:https?:|\\?/)[^"'#]+?\. 13 | ( 14 | m3u8? | # HLS 15 | mpd | # DASH 16 | mp4|webm | # video/audio 17 | f4v|flv|ts | # video 18 | mov|qt|m4[pv]|og[mv] | # video 19 | ogg|vid|3gp|mpe?g | # video/audio 20 | mp3|flac|wave?|oga|aac|weba # audio 21 | ) 22 | /?(?:[\?&].+?)? 23 | )["'#] 24 | ''' 25 | pattern_src = r'''(?ix) 26 | <(?:video|audio|source)[^>]+? 27 | src=["']?((?:https?:|\\?/)[^"' ]+)["' ] 28 | [^>]*? 29 | (?: 30 | type=["']((?:video|audio|application)/[^"']+)["'] 31 | | 32 | [^>](?!type)*> 33 | ) 34 | ''' 35 | 36 | class GeneralSimple(Extractor): 37 | name = 'GeneralSimple (通用简单)' 38 | 39 | def list_only(self): 40 | return True 41 | 42 | def prepare_list(self): 43 | html = get_content(self.url) 44 | title = match1(html, '(.+?)') 47 | _title, domain = matchm(title, '(.+) [-|*] (.+)$') 48 | if domain and domain.lower() in self.url.split('/')[2]: 49 | title = _title 50 | 51 | streams = get_kt_playlist(html) 52 | if streams: 53 | info = MediaInfo(self.name) 54 | info.title = title 55 | info.streams = streams 56 | info.extra.referer = self.url 57 | yield info 58 | return 59 | 60 | for i in range(2): 61 | urls = matchall(html, pattern_src) 62 | if urls: 63 | urls = [(i and url or unescape(url), ctype, None) for url, ctype in urls] 64 | break 65 | urls = matchall(html, pattern_ext) 66 | if urls: 67 | urls = [(i and url or unescape(url), None, ext) for url, ext in urls] 68 | break 69 | if i == 0: 70 | html = unquote(unescape(html)) 71 | 72 | urls = set(urls) 73 | self.set_index(0, len(urls)) 74 | for i, (url, ctype, ext) in enumerate(urls): 75 | info = MediaInfo(self.name) 76 | info.title = len(urls) == 1 and title or f'{title}_{i+1}' 77 | url = literalize(url, True) 78 | url = match1(url, '.+(https?://.+)') or url # redirect clear 79 | if url[:2] == '//': 80 | url = self.url[:self.url.find('/')] + url 81 | elif url[0] == '/': 82 | url = self.url[:self.url.find('/', 9)] + url 83 | if '?' not in url and '&' in url: 84 | url = url.replace('&', '?', 1) 85 | if ext is None or ctype: 86 | ctype = str(ctype).lower() 87 | ext = contentTypes.get(ctype) or url_info(url)[1] or ( 88 | ctype.startswith('audio') and 'mp3' or 'mp4') 89 | if ext[:3] == 'm3u': 90 | info.streams = load_m3u8_playlist(url, headers={'Referer': self.url}) 91 | else: 92 | info.streams['current'] = { 93 | 'container': ext, 94 | 'profile': 'current', 95 | 'src': [url], 96 | 'size': 0 97 | } 98 | info.extra.referer = self.url 99 | yield info 100 | 101 | site = GeneralSimple() 102 | -------------------------------------------------------------------------------- /ykdl/extractors/heibaizhibo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | assert JSEngine, "No JS Interpreter found, can't extract heibaizhibo!" 7 | 8 | js_m = get_pkgdata_str(__name__, 'heibaizhibo.m.js', 9 | 'https://pichb2.huoxinglaike.com/nuxt/static/m.js') 10 | 11 | class Heibai(Extractor): 12 | name = '黑白直播' 13 | 14 | def prepare(self): 15 | info = MediaInfo(self.name, True) 16 | 17 | js_ctx = JSEngine('if (!this.window) {window = {};}') 18 | js_ctx.append(js_m) 19 | 20 | vid = match1(self.url, '/live/.*?(\d+)') 21 | if vid is None: 22 | html = get_content(self.url) 23 | js_data = match1(html, 'window.__NUXT__=(.+?)') 24 | data = js_ctx.eval(js_data) 25 | self.logger.debug('data:\n%s', data) 26 | data = data['data'][0] 27 | data = data.get('videoInfo', data) 28 | else: 29 | data = get_response('https://www.heibaizhibo.com/api/index/live', 30 | params={'id': vid}).json() 31 | msg = data['message'] 32 | assert '成功' in msg, msg 33 | data = data['data']['detail'] 34 | 35 | try: 36 | qllist = data['hd'] 37 | except KeyError: 38 | # anchor 39 | qllist = data['hdlist'] 40 | title = data['anchorInfo']['title'] 41 | artist = data['anchorInfo']['nickname'] 42 | else: 43 | title = '[{}] {}({})'.format( 44 | data['eventName'], data['homeName'], data['awayName']) 45 | assert data['playCode'], 'live video is offline!' 46 | data = data['playCode'][0] 47 | artist = data['gtvDesc'] or data['name'] 48 | 49 | info.title = '{title} - {artist}'.format(**vars()) 50 | info.artist = artist 51 | params = { 52 | 'gtvId': data.get('gtvId'), 53 | 'id': data.get('id', 0), 54 | 'type': 3, 55 | 'source': 2, 56 | 'liveType': 3, # 1: rtmp, 2: m3u8, 3: flv 57 | } 58 | if not params['gtvId']: 59 | del params['gtvId'] 60 | 61 | for ql in qllist: 62 | params['defi'] = ql['defi'] 63 | data_live = get_response( 64 | 'https://sig.heibaizhibo.com/signal-front/live/matchLiveInfo?', 65 | params=params).json() 66 | msg = data_live['msg'] 67 | assert '成功' in msg, msg 68 | data_live = data_live['data'][0] 69 | assert data_live['score'] >= 0, 'live video is offline!' 70 | url = js_ctx.call('vp', data_live['liveUrl']) 71 | stream_id = ql['defi'].upper() 72 | info.streams[stream_id] = { 73 | 'container': 'flv', 74 | 'profile': ql['name'], 75 | 'src' : [url], 76 | 'size': Infinity 77 | } 78 | break # seems the same quality? 79 | 80 | return info 81 | 82 | site = Heibai() 83 | -------------------------------------------------------------------------------- /ykdl/extractors/huajiao/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def get_extractor(url): 5 | if 'com/v' in url: 6 | from . import video as s 7 | else: 8 | from . import live as s 9 | 10 | return s.site, url 11 | -------------------------------------------------------------------------------- /ykdl/extractors/huajiao/live.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class Huajiao(Extractor): 7 | name = 'huajiao (花椒直播)' 8 | 9 | def prepare_mid(self): 10 | html = get_content(self.url) 11 | return match1(html, '"sn":"([^"]+)') 12 | 13 | def prepare(self): 14 | info = MediaInfo(self.name, True) 15 | html = get_content(self.url) 16 | t_a = match1(html, '"keywords" content="([^"]+)') 17 | info.title = t_a.split(',')[0] 18 | info.artist = t_a.split(',')[1] 19 | 20 | replay_url = match1(html, '"m3u8":\s?("[^"]+)"') 21 | if replay_url and len(replay_url) > 2: 22 | replay_url = json.loads(replay_url) 23 | info.live = False 24 | info.streams = load_m3u8_playlist(replay_url) 25 | return info 26 | 27 | channel = match1(html, '"channel":"([^"]+)') 28 | encoded_json = get_response('http://g2.live.360.cn/liveplay', 29 | params={ 30 | 'stype': 'flv', 31 | 'channel': channel, 32 | 'bid': 'huajiao', 33 | 'sn': self.mid, 34 | 'sid': get_random_uuid_hex('SID'), 35 | '_rate': 'xd', 36 | 'ts': time.time(), 37 | 'r': random.random(), 38 | '_ostype': 'flash', 39 | '_delay': 0, 40 | '_sign': 'null', 41 | '_ver': 13 42 | }).content 43 | decoded_json = unb64(encoded_json[0:3] + encoded_json[6:]) 44 | self.logger.debug('decoded_json:\n%s', decoded_json) 45 | data = json.loads(decoded_json) 46 | info.live = True 47 | info.streams['current'] = { 48 | 'container': 'flv', 49 | 'profile': 'current', 50 | 'src' : [data['main']], 51 | 'size': Infinity 52 | } 53 | return info 54 | 55 | site = Huajiao() 56 | -------------------------------------------------------------------------------- /ykdl/extractors/huajiao/video.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class HuajiaoVideo(Extractor): 7 | name = 'huajiao video (花椒小视频)' 8 | 9 | def get_data(self, type): 10 | html = get_content(self.url) 11 | data = match1(html, '_DATA.{type} = (.+?[}}\]]);'.format(**vars())) 12 | self.logger.debug('%s data:\n%s', type, data) 13 | return json.loads(data) 14 | 15 | def generate_info(self, data): 16 | info = MediaInfo(self.name) 17 | info.artist = data['user_name'] 18 | info.title = data['video_name'] 19 | info.duration = data.get('duration') 20 | info.streams['current'] = { 21 | 'container': 'mp4', 22 | 'profile': 'current', 23 | 'src': [data['video_url']] 24 | } 25 | return info 26 | 27 | def prepare_feed(self): 28 | data = self.get_data('feed') 29 | feed = data['feed'] 30 | feed['user_name'] = data['author']['nickname'] 31 | return feed 32 | 33 | def prepare(self): 34 | return self.generate_info(self.prepare_feed()) 35 | 36 | def prepare_list(self): 37 | info = self.prepare_feed() 38 | infos = self.get_data('list') 39 | 40 | vid = info['vid'] 41 | vids = [i['vid'] for i in infos] 42 | if vid not in vids: 43 | vids.insert(0, vid) 44 | infos.insert(0, info) 45 | self.set_index(vid, vids) 46 | 47 | for info in infos: 48 | yield self.generate_info(info) 49 | 50 | site = HuajiaoVideo() 51 | -------------------------------------------------------------------------------- /ykdl/extractors/huya/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def get_extractor(url): 5 | if 'v.huya' in url: 6 | from . import video as s 7 | else: 8 | from . import live as s 9 | 10 | return s.site, url 11 | -------------------------------------------------------------------------------- /ykdl/extractors/huya/live.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class HuyaLive(Extractor): 7 | name = 'Huya Live (虎牙直播)' 8 | 9 | def profile_2_id(self, profile): 10 | id = match1(profile, '(\d+K)') 11 | br = match1(profile, '(\d+M)') 12 | if id is None: 13 | if profile.startswith(('蓝光', 'HDR')): 14 | id = 'BD' 15 | else: 16 | id = { 17 | '超清': 'TD', 18 | '高清': 'HD', 19 | '流畅': 'SD' 20 | }[profile] 21 | if br: 22 | id += br 23 | if 'HDR' in profile: 24 | id += '-HDR' 25 | return id 26 | 27 | def prepare(self): 28 | info = MediaInfo(self.name, True) 29 | 30 | html = get_content(self.url) 31 | 32 | data = match1(html, 'stream: ({.+)\n.*?};') 33 | assert data, "can't found video!!" 34 | self.logger.debug('data:\n%s', data) 35 | data = json.loads(data) 36 | assert data['vMultiStreamInfo'], 'live video is offline' 37 | 38 | room_info = data['data'][0]['gameLiveInfo'] 39 | info.title = '{}「{} - {}」'.format( 40 | room_info['roomName'], room_info['nick'], room_info['introduction']) 41 | info.artist = room_info['nick'] 42 | liveSourceType = room_info['liveSourceType'] 43 | 44 | stream_info_list = data['data'][0]['gameStreamInfoList'] 45 | random.shuffle(stream_info_list) 46 | random.shuffle(stream_info_list) 47 | while stream_info_list: 48 | stream_info = stream_info_list.pop() 49 | sUrl = stream_info['sFlvUrl'] 50 | if sUrl: 51 | info.add_comment(stream_info['sCdnType']) 52 | break 53 | sStreamName = stream_info['sStreamName'] 54 | sUrlSuffix = stream_info['sFlvUrlSuffix'] 55 | _url = '{sUrl}/{sStreamName}.{sUrlSuffix}?'.format(**vars()) 56 | 57 | params = dict(parse_qsl(unescape(stream_info['sFlvAntiCode']))) 58 | params.setdefault('t', '100') # 102 59 | ct = int((int(params['wsTime'], 16) + random.random()) * 1000) 60 | lPresenterUid = stream_info['lPresenterUid'] 61 | if liveSourceType and not sStreamName.startswith(str(lPresenterUid)): 62 | uid = int(lPresenterUid) 63 | else: 64 | uid = int(ct % 1e10 * 1e3 % 0xffffffff) 65 | u1 = uid & 0xffffffff00000000 66 | u2 = uid & 0xffffffff 67 | u3 = uid & 0xffffff 68 | u = u1 | u2 >> 24 | u3 << 8 69 | params.update({ 70 | 'ctype': 'huya_live', # !!!! 71 | 'u': str(u), 72 | 'seqid': str(ct + uid), 73 | 'ver': '1', 74 | }) 75 | fm = unb64(params['fm']).split('_', 1)[0] 76 | ss = hash.md5('|'.join([params['seqid'], params['ctype'], params['t']])) 77 | 78 | for si in data['vMultiStreamInfo']: 79 | stream_profile = si['sDisplayName'] 80 | stream_id = self.profile_2_id(stream_profile) 81 | rate = si['iBitRate'] 82 | if rate: 83 | params['ratio'] = rate 84 | else: 85 | params.pop('ratio', None) 86 | params['wsSecret'] = hash.md5('_'.join( 87 | [fm, params['u'], sStreamName, ss, params['wsTime']])) 88 | url = _url + urlencode(params, safe=',*') 89 | info.streams[stream_id] = { 90 | 'container': 'flv', 91 | 'profile': stream_profile, 92 | 'src': [url], 93 | 'size': Infinity 94 | } 95 | fake_headers.update({ 96 | 'Accept': '*/*', 97 | 'Origin': 'https://www.huya.com', 98 | 'Referer': 'https://www.huya.com/', 99 | 'Sec-Fetch-Dest': 'empty', 100 | 'Sec-Fetch-Mode': 'cors', 101 | 'Sec-Fetch-Site': 'same-site', 102 | }) 103 | info.extra['header'] = fake_headers 104 | return info 105 | 106 | site = HuyaLive() 107 | -------------------------------------------------------------------------------- /ykdl/extractors/huya/video.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class HuyaVideo(Extractor): 7 | name = 'huya video (虎牙视频)' 8 | 9 | quality_2_id_profile = { 10 | 'yuanhua': ['BD', '原画'], 11 | '1300': ['TD', '超清'], 12 | #'TODO': ['HD', '高清'], 13 | '350': ['SD', '流畅'] 14 | } 15 | 16 | def prepare_mid(self): 17 | mid = match1(self.url, 'play/(\d+)') 18 | if mid is None: 19 | html = get_content(self.url) 20 | mid = match1(html, 'vid = (\d+)', 'data-vid="(\d+)') 21 | return mid 22 | 23 | def prepare(self): 24 | info = MediaInfo(self.name) 25 | 26 | html = get_content(self.url) 27 | info.title = match1(html, '

(.+?)

') 28 | info.artist = match1(html, '
[\s\S]+?

(.+?)

') 29 | 30 | t1 = int(time.time() * 1000) 31 | t2 = t1 + random.randrange(5, 10) 32 | rnd = str(random.random()).replace('.', '') 33 | data = get_response('https://v-api-player-ssl.huya.com/', 34 | params={ 35 | 'callback': 'jQuery1124{rnd}_{t1}'.format(**vars()), 36 | 'r': 'vhuyaplay/video', 37 | 'vid': self.mid, 38 | 'format': 'mp4,m3u8', 39 | '_': t2 40 | }).json() 41 | assert data['code'] == 1, data['message'] 42 | data = data['result']['items'] 43 | 44 | for stream_date in data: 45 | ext = stream_date['format'] 46 | quality =stream_date['definition'] 47 | stream_id, stream_profile = self.quality_2_id_profile[quality] 48 | stream_id += '-' + ext 49 | url = stream_date['transcode']['urls'][0] 50 | info.streams[stream_id] = { 51 | 'container': ext, 52 | 'profile': stream_profile, 53 | 'src' : [url], 54 | 'size': int(stream_date['size']) 55 | } 56 | 57 | return info 58 | 59 | site = HuyaVideo() 60 | -------------------------------------------------------------------------------- /ykdl/extractors/ifeng/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def get_extractor(url): 5 | if 'video' in url: 6 | from . import video as s 7 | elif 'gongkaike' in url: 8 | from . import gongkaike as s 9 | else: 10 | from . import news as s 11 | 12 | return s.site, url 13 | -------------------------------------------------------------------------------- /ykdl/extractors/ifeng/gongkaike.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | from .news import Ifeng 5 | 6 | 7 | class IfengOpenC(Ifeng): 8 | name = '凤凰公开课 (ifeng open course)' # 404 9 | 10 | def prepare(self): 11 | info = MediaInfo(self.name) 12 | 13 | xml = get_content( 14 | 'http://vxml.ifengimg.com/video_info_new/{}/{}/{}.xml' 15 | .format(self.mid[-2], self.mid[-2:], self.mid)) 16 | 17 | info.title = match1(xml, 'SE_Title="([^"]+)') 18 | urls = matchall(xml, 'playurl="([^"]+)') 19 | urls = ['http://ips.ifeng.com/' + u[7:] for u in urls ] 20 | info.streams['current'] = { 21 | 'container': 'mp4', 22 | 'profile': 'current', 23 | 'src': urls 24 | } 25 | 26 | return info 27 | 28 | site = IfengOpenC() 29 | -------------------------------------------------------------------------------- /ykdl/extractors/ifeng/news.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class Ifeng(Extractor): 7 | name = '凤凰新闻 (ifeng news)' # EXPIRED 8 | 9 | types_2_id_profile = { 10 | '1M': ['TD', '超清'], 11 | '500k': ['HD', '高清'], 12 | '350k': ['SD', '标清'] 13 | } 14 | 15 | def prepare_mid(self): 16 | mid = match1(self.url, '#([a-zA-Z0-9\-]+)', 17 | '/([a-zA-Z0-9\-]+).shtml') 18 | if mid is None: 19 | html = get_content(self.url) 20 | mid = match1(html, r'\bvid"?: "([^"]+)') 21 | return mid 22 | 23 | def prepare(self): 24 | info = MediaInfo(self.name) 25 | 26 | doc = get_response( 27 | 'http://vxml.ifengimg.com/video_info_new/{}/{}/{}.xml' 28 | .format(self.mid[-2], self.mid[-2:], self.mid)).xml() 29 | info.title = doc.getElementsByTagName('item')[0].getAttribute('Name') 30 | videos = doc.getElementsByTagName('videos') 31 | for v in videos[0].getElementsByTagName('video'): 32 | ext = v.getAttribute('mediaType') 33 | _t = v.getAttribute('type') 34 | _u = v.getAttribute('VideoPlayUrl') 35 | stream_id, stream_profile = self.types_2_id_profile[_t] 36 | info.streams[stream_id] = { 37 | 'container': ext, 38 | 'profile': stream_profile, 39 | 'src': [_u] 40 | } 41 | 42 | return info 43 | 44 | site = Ifeng() 45 | -------------------------------------------------------------------------------- /ykdl/extractors/ifeng/video.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class IfengVideo(Extractor): 7 | name = '凤凰视频 (ifeng video)' # Expired 8 | 9 | def prepare(self): 10 | return self.url[-13: -6] 11 | 12 | def prepare(self): 13 | info = MediaInfo(self.name) 14 | 15 | info.title = self.name + '-' + self.mid 16 | data = get_response( 17 | 'http://tv.ifeng.com/html5/{self.mid}/video.json' 18 | .format(**vars())).json() 19 | if 'bqSrc' in data: 20 | info.streams['SD'] = { 21 | 'container': 'mp4', 22 | 'profile': '标清', 23 | 'src': [data['bqSrc']] 24 | } 25 | if 'gqSrc' in data: 26 | info.streams['HD'] = { 27 | 'container': 'mp4', 28 | 'profile': '高清', 29 | 'src': [data['gqSrc']] 30 | } 31 | return info 32 | 33 | site = IfengVideo() 34 | -------------------------------------------------------------------------------- /ykdl/extractors/iqilu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class Iqilu(SimpleExtractor): 7 | name = '齐鲁网 (iqilu)' 8 | 9 | def init(self): 10 | self.title_pattern = '= 10 ): 34 | # v8 = v4 + 88 35 | # else: 36 | # v8 = v4 + 49 37 | # sufix += chr(v8) 38 | return md5(s + '1j2k2k3l3l4m4m5n5n6o6o7p7p8q8q9r') 39 | 40 | def cmd5x(s): 41 | # the param src below uses salt h2l6suw16pbtikmotf0j79cej4n8uw13 42 | # 01010031010000000000 43 | # 01010031010010000000 44 | # 01080031010000000000 45 | # 01080031010010000000 46 | # 03020031010000000000 47 | # 03020031010010000000 48 | # 03030031010000000000 49 | # 03030031010010000000 50 | # 02020031010000000000 51 | # 02020031010010000000 52 | #if len(s) < 6: 53 | # return '0' 54 | #return md5(s + 'h2l6suw16pbtikmotf0j79cej4n8uw13') 55 | # out of date 56 | 57 | init_jsengine() 58 | return js_ctx.call('cmd5x_exports.cmd5x', s) 59 | 60 | def cmd5x_iqiyi3(s): 61 | # used for live 62 | init_jsengine() 63 | return js_ctx.call('cmd5x', s) 64 | -------------------------------------------------------------------------------- /ykdl/extractors/ixigua.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | from . import _byted 5 | 6 | 7 | class IXiGua(Extractor): 8 | name = '西瓜视频 (IXiGua)' 9 | 10 | @staticmethod 11 | def profile_2_id(profile): 12 | if profile[-1] == 'p': 13 | return { 14 | '1080p': 'BD', 15 | '720p': 'TD', 16 | '480p': 'HD', 17 | '360p': 'SD', 18 | }[profile] 19 | if profile[-1] == 'k': 20 | return profile.upper() 21 | assert 0, 'unsupported profile: %r' % profile 22 | 23 | def prepare(self): 24 | info = MediaInfo(self.name) 25 | 26 | html = _byted.get_content(self.url) 27 | data = match1(html, 'window._SSR_HYDRATED_DATA=(.+?)') 28 | self.logger.debug('data: \n%s', data) 29 | data = json.loads(data.replace('undefined', 'null')) 30 | 31 | video_info = data['anyVideo']['gidInformation']['packerData'] 32 | 33 | if 'video' in video_info: 34 | video_info = video_info['video'] 35 | info.title = video_info['title'] 36 | info.artist = video_info['user_info']['name'] 37 | else: 38 | albumInfo = video_info['albumInfo'] 39 | al_title = albumInfo['title'] 40 | info.artist = albumInfo['userInfo']['name'] 41 | for c in (*albumInfo['areaList'], 42 | albumInfo['year'], 43 | *albumInfo['tagList'], 44 | *[a['name'] for a in albumInfo.get('actorList', [])]): 45 | info.add_comment(c) 46 | ep_title = video_info['episodeInfo']['title'] 47 | if al_title in ep_title: 48 | info.title = ep_title 49 | else: 50 | info.title = '{al_title} - {ep_title}'.format(**vars()) 51 | 52 | videoResource = video_info['videoResource']['normal'] 53 | info.duration = videoResource['video_duration'] 54 | 55 | for v in videoResource['video_list'].values(): 56 | stream_profile = v['definition'] 57 | stream_id = self.profile_2_id(stream_profile) 58 | info.streams[stream_id] = { 59 | 'container': v['vtype'], 60 | 'profile': stream_profile, 61 | 'src' : [unb64(v['backup_url_1'])], # main_url status 403 62 | 'size': v['size'] 63 | } 64 | 65 | return info 66 | 67 | def prepare_list(self): 68 | albumId, episodeId = matchall(self.url, '.ixigua.com/(\d+)(?:.+?id=(\d+))?')[0] 69 | data = get_response('https://www.ixigua.com/api/albumv2/details', 70 | headers={'Referer': 'https://www.ixigua.com/'}, 71 | params={'albumId': albumId}).json() 72 | assert data['code'] == 200, "can't fetch playlist!" 73 | 74 | ep_ids = [b for a, b in sorted((ep['seq'], ep['episodeId']) 75 | for ep in data['data']['playlist'])] 76 | self.set_index(episodeId, ep_ids) 77 | for ep_id in ep_ids: 78 | yield 'https://www.ixigua.com/{albumId}?id={ep_id}'.format(**vars()) 79 | 80 | site = IXiGua() 81 | -------------------------------------------------------------------------------- /ykdl/extractors/joy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class Joy(Extractor): 7 | 8 | name = '激动网 (Joy)' 9 | 10 | def prepare_mid(self): 11 | return match1(self.url, 'resourceId=([0-9]+)') 12 | 13 | def prepare(self): 14 | info = MediaInfo(self.name) 15 | 16 | data= get_response('https://api.joy.cn/v1/video', 17 | params={'id': self.mid}).json() 18 | assert data['code'] > 0, data['message'] 19 | data = data['data'] 20 | 21 | info.title = data['title'] 22 | url = data['res_url'] 23 | _, ext, _ = url_info(url) 24 | 25 | info.streams['current'] = { 26 | 'container': ext, 27 | 'profile': 'current', 28 | 'src': [url] 29 | } 30 | return info 31 | 32 | site = Joy() 33 | -------------------------------------------------------------------------------- /ykdl/extractors/kankanews.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | # TODO: Live & TV 7 | 8 | class KankanNews(Extractor): 9 | name = '看看新闻 (kankannews)' 10 | 11 | def prepare(self): 12 | info = MediaInfo(self.name) 13 | 14 | html = get_content(self.url) 15 | vid = match1(html, 'omsid="(\d+)"') 16 | assert vid, 'No omsid has been found!!' 17 | 18 | info.artist = match1(html, 'keyboard:"(.+?)"') 19 | info.title = info.artist + \ 20 | match1(html, '视频(.+?)_[^_]+_看看新闻') 21 | 22 | params = [ 23 | ('nonce', get_random_str(8).lower()), 24 | ('omsid', vid), 25 | ('platform', 'pc'), 26 | ('timestamp', int(time.time())), 27 | ('version', '1.0') 28 | ] 29 | sign = hash.md5(hash.md5(urlencode(params) + 30 | '&28c8edde3d61a0411511d3b1866f0636')) 31 | params.append(('sign', sign)) 32 | data = get_response('https://api-app.kankanews.com/kankan/pc/getvideo', 33 | params=params).json() 34 | assert data['code'] == '10000', data['error']['message'] 35 | data = data['result']['video'] 36 | 37 | info.streams['current'] = { 38 | 'container': 'mp4', 39 | 'profile': 'current', 40 | 'src' : [data['videourl']], 41 | 'size': int(data['filesize']) 42 | } 43 | return info 44 | 45 | site = KankanNews() 46 | -------------------------------------------------------------------------------- /ykdl/extractors/ku6.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class Ku6(SimpleExtractor): 7 | name = '酷6 (Ku6)' 8 | 9 | def init(self): 10 | self.url_pattern = 'flvURL: "([^"]+)' 11 | self.title_pattern = 'title = "([^"]+)' 12 | pass 13 | 14 | def list_only(self): 15 | return match(self.url, 'https://www.ku6.com/detail/\d+') 16 | 17 | def prepare_list(self): 18 | html = get_content(self.url) 19 | videos = matchall(html, "'title': '(.+?)',[\s\S]+?'playUrl': '(.+?)',") 20 | videos.reverse() 21 | self.set_index(None, videos) 22 | for title, url in videos: 23 | info = MediaInfo(self.name) 24 | info.title = title 25 | info.streams['current'] = { 26 | 'container': 'mp4', 27 | 'src': [url] 28 | } 29 | yield info 30 | 31 | site = Ku6() 32 | -------------------------------------------------------------------------------- /ykdl/extractors/kuwo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class Kuwo(Extractor): 7 | name = 'KuWo (酷我音乐)' 8 | 9 | def prepare_mid(self): 10 | return match1(self.url, '/play_detail/(\d+)') 11 | 12 | def prepare(self): 13 | info = MediaInfo(self.name) 14 | install_cookie() 15 | 16 | if not self.is_list: 17 | resp = get_response('https://www.kuwo.cn/favicon.ico?v=1') 18 | kw_token = get_cookie('www.kuwo.cn', '/', 'kw_token').value 19 | params = { 20 | 'mid': self.mid, 21 | 'httpsStatus': 1, 22 | 'reqId': get_random_uuid() 23 | } 24 | data = get_response('https://www.kuwo.cn/api/www/music/musicInfo', 25 | headers={'csrf': kw_token}, 26 | params=params).json() 27 | assert data.get('code') == 200, data['message'] 28 | data = data['data'] 29 | 30 | pay = data['isListenFee'] 31 | if pay: 32 | if self.is_list: # just skip pay when extract from list 33 | self.logger.warning('Skip pay song: %s', self.mid) 34 | return 35 | raise AssertionError('Pay song: %s' % self.mid) 36 | 37 | albumpic = data['albumpic'] 38 | album = data['album'] 39 | title = data['name'] 40 | info.title = album in title and title or '{title} - {album}'.format(**vars()) 41 | info.artist = data['artist'] 42 | info.album = data['album'] 43 | info.duration = data['duration'] 44 | info.add_comment(data['albuminfo']) 45 | 46 | params['type'] = 'music' 47 | data = get_response('https://www.kuwo.cn/api/v1/www/music/playUrl', 48 | params=params).json() 49 | assert data.get('code') == 200, data['message'] 50 | 51 | url = data['data']['url'] 52 | info.streams['current'] = { 53 | 'container': 'mp3', 54 | 'profile': 'current', 55 | 'src': [(albumpic, url)] 56 | } 57 | return info 58 | 59 | def list_only(self): 60 | return 'playlist_detail' in self.url 61 | 62 | def prepare_list(self): 63 | install_cookie() 64 | html = get_content(self.url) 65 | return matchall(html, 'href="/play_detail/(\d+)"') 66 | 67 | site = Kuwo() 68 | -------------------------------------------------------------------------------- /ykdl/extractors/laifeng.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | import datetime 6 | 7 | 8 | class Laifeng(Extractor): 9 | name = 'laifeng (来疯直播)' 10 | 11 | def prepare(self): 12 | info = MediaInfo(self.name, True) 13 | 14 | html = get_content(self.url) 15 | info.artist = match1(html, 'anchorName:\s*\'([^\']+)', 16 | '"anchorName":\s*"([^"]+)"') 17 | info.title = info.artist + '的直播房间' 18 | 19 | Alias = match1(html, 'initAlias:\'([^\']+)' ,'"ln":\s*"([^"]+)"') 20 | Token = match1(html, 'initToken: \'([^\']+)', '"tk":\s*"([^"]+)"') 21 | ts = datetime.datetime.utcnow().isoformat().split('.')[0] + 'Z' 22 | data = get_response('http://lapi.lcloud.laifeng.com/Play', 23 | params={ 24 | 'AppId': 101, 25 | 'StreamName': Alias, 26 | 'Action': 'Schedule', 27 | 'Token': Token, 28 | 'Version': 2.0, 29 | 'CallerVersion': 3.3, 30 | 'Caller': 'flash', 31 | 'Format': 'HttpFlv', 32 | 'Timestamp': ts, 33 | 'rd': random.randint(10000, 99999), 34 | }).json() 35 | assert data['Code'] == 'Success', data['Message'] 36 | 37 | stream_url = data['HttpFlv'][0]['Url'] 38 | info.streams['current'] = { 39 | 'container': 'flv', 40 | 'profile': 'current', 41 | 'src' : [stream_url], 42 | 'size': Infinity 43 | } 44 | return info 45 | 46 | site = Laifeng() 47 | -------------------------------------------------------------------------------- /ykdl/extractors/le.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | def calcTimeKey(t): 7 | ror = lambda val, r_bits: ((val & (2**32-1)) >> r_bits%32) | \ 8 | (val << (32-(r_bits%32)) & (2**32-1)) 9 | magic = 185025305 10 | return ror(t, magic % 17) ^ magic 11 | 12 | def decode_m3u8(data): 13 | version = data[0:5] 14 | if version.lower() == b'vc_01': 15 | #get real m3u8 16 | loc2 = bytearray(data[5:]) 17 | length = len(loc2) 18 | loc4 = [0]*(2*length) 19 | for i in range(length): 20 | loc4[2*i] = loc2[i] >> 4 21 | loc4[2*i+1]= loc2[i] & 15; 22 | loc6 = loc4[len(loc4)-11:]+loc4[:len(loc4)-11] 23 | loc7 = bytearray(length) 24 | for i in range(length): 25 | loc7[i] = (loc6[2 * i] << 4) +loc6[2*i+1] 26 | return loc7 27 | else: 28 | # directly return 29 | return data 30 | 31 | headers = { 32 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) ' 33 | 'AppleWebKit/603.1.30 (KHTML, like Gecko) ' 34 | 'Version/10.1 Safari/603.1.30' 35 | } 36 | 37 | class Letv(Extractor): 38 | name = '乐视视频 (Letv)' 39 | 40 | stream_2_id_profile = { 41 | '1080p': ['BD', '1080P'], 42 | '1300': ['TD', '超清'], 43 | '1000': ['HD', '高清'], 44 | '720p': ['SD', '标清'], 45 | '350': ['LD', '流畅'] 46 | } 47 | 48 | __STREAM_TEMP__ = [] 49 | 50 | def prepare_mid(self): 51 | return match1(self.url, '/vplay/(\d+).html', '#record/(\d+)') 52 | 53 | def prepare(self): 54 | info = MediaInfo(self.name) 55 | stream_temp = {st: None for st in self.stream_2_id_profile.keys()} 56 | self.__STREAM_TEMP__.append(stream_temp) 57 | 58 | #normal process 59 | data = get_response('https://player-pc.le.com/mms/out/video/playJson', 60 | params={ 61 | 'id': self.mid, 62 | 'platid': 1, 63 | 'splatid': 105, 64 | 'format': 1, 65 | 'tkey': calcTimeKey(int(time.time())), 66 | 'domain': 'www.le.com', 67 | 'region': 'cn', 68 | 'source': 1000, 69 | 'accessyx': 1 70 | }, 71 | headers=headers).json()['msgs']['playurl'] 72 | 73 | info.title = data['title'] 74 | info.duration = data['duration'] 75 | for stream, sdp in data['dispatch'].items(): 76 | s_url = data['domain'][0] + sdp[0] 77 | data2 = get_response(s_url, 78 | params={ 79 | 'm3v': 1, 80 | 'termid': 1, 81 | 'format': 1, 82 | 'hwtype': 'un', 83 | 'ostype': 'MacOS10.12.4', 84 | 'p1': 1, 85 | 'p2': 10, 86 | 'p3': '-', 87 | 'expect': '3', 88 | 'tn': random.random(), 89 | 'vid': self.mid, 90 | 'uuid': hash.sha1(s_url) + '_0', 91 | 'tss': 'ios' 92 | }, 93 | headers=headers).json() 94 | 95 | # hold on ! more things to do 96 | # to decode m3u8 (encoded) 97 | m3u8 = get_content(data2['location'], 98 | params={ 99 | 'r': int(time.time() * 1000), 100 | 'appid': 500 101 | }, 102 | headers=headers, encoding=decode_m3u8) 103 | stream_id, stream_profile = self.stream_2_id_profile[stream] 104 | info.streams[stream_id] = { 105 | 'container': 'm3u8', 106 | 'profile': stream_profile 107 | } 108 | stream_temp[stream] = NamedTemporaryFile(mode='w+b', suffix='.m3u8') 109 | stream_temp[stream].write(m3u8) 110 | info.streams[stream_id]['src'] = [stream_temp[stream].name] 111 | stream_temp[stream].flush() 112 | 113 | return info 114 | 115 | def list_only(self): 116 | return bool(match1(self.url, '/tv/\d+.html')) 117 | 118 | def prepare_list(self): 119 | if self.list_only(): 120 | mid = None 121 | else: 122 | mid = self.mid 123 | html = get_content(self.url) 124 | pid = match1(html, r'\bpid: ?(\d+)') 125 | if pid is None: 126 | return 127 | self.url = 'https://www.le.com/tv/{pid}.html'.format(**vars()) 128 | 129 | html = get_content(self.url) 130 | vids = matchall(html, '/vplay/(\d+).html"') 131 | mids = [] 132 | for vid in vids: 133 | if vid in mids: 134 | continue 135 | mids.append(vid) 136 | self.set_index(mid, mids) 137 | return mids 138 | 139 | site = Letv() 140 | -------------------------------------------------------------------------------- /ykdl/extractors/lizhi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class Lizhi(Extractor): 7 | name = 'Lizhi FM (荔枝电台)' 8 | 9 | def prepare_mid(self): 10 | pass 11 | 12 | def prepare(self): 13 | info = MediaInfo(self.name) 14 | 15 | html = get_content(self.url) 16 | self.mid, info.artist, _, info.title = matchm(html, 17 | 'data-hidden-ph\s?=\s?"(.+?)" ' 18 | 'data-user-name\s?=\s?"(.+?)" ' 19 | 'data-radio-name\s?=\s?"(.+?)" ' 20 | 'data-title\s?=\s?"(.+?)"') 21 | data = get_response('https://www.lizhi.fm/hidden_ph/{self.mid}' 22 | .format(**vars())).json() 23 | assert data['rcode'] == 0, data['msg'] 24 | 25 | info.streams['current'] = { 26 | 'container': 'mp3', 27 | 'profile': 'current', 28 | 'src': [data['data']['url']] 29 | } 30 | return info 31 | 32 | def list_only(self): 33 | return 'user' in self.url 34 | 35 | def prepare_list(self): 36 | html = get_content(self.url) 37 | fm = match1(html, 'class="user-info-name">FM(\d+)') 38 | audio = matchall(html, 'href="(/{fm}/\d+)"'.format(**vars())) 39 | audio.reverse() 40 | return ['https://www.lizhi.fm' + a for a in audio] 41 | 42 | site = Lizhi() 43 | -------------------------------------------------------------------------------- /ykdl/extractors/longzhu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class LongzhuLive(Extractor): 7 | name = 'Longzhu Live (龙珠直播)' 8 | 9 | def prepare_mid(self): 10 | return match1(get_content(self.url), '(?i)"roomid":(\d+)') 11 | 12 | def prepare(self): 13 | info = MediaInfo(self.name, True) 14 | 15 | html = get_content(self.url) 16 | info.title = match1(html, '"title":"([^"]+)', '([^>]+)<') 17 | info.artist = match1(html, '"Name":"([^"]+)') 18 | 19 | data = get_response('http://livestream.longzhu.com/live/getlivePlayurl', 20 | params={ 21 | 'roomId': self.mid, 22 | 'utmSr': '', 23 | 'platform': 'h5', 24 | 'device': 'pc' 25 | }).json()['playLines'] 26 | assert data, 'Live is offline!!' 27 | 28 | for i in data[0]['urls']: 29 | ext = i['ext'] 30 | info.streams[ext] = { 31 | 'container': ext, 32 | 'profile': i['description'], 33 | 'src': [i['securityUrl']] 34 | } 35 | 36 | return info 37 | 38 | site = LongzhuLive() 39 | -------------------------------------------------------------------------------- /ykdl/extractors/mgtv.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | encode_translation = bytes.maketrans(b'+/=', b'_~-') 7 | decode_translation = bytes.maketrans(b'_~-', b'+/=') 8 | 9 | def encode_tk2(s): 10 | s = bytearray(base64.b64encode(s.encode()).translate(encode_translation)) 11 | s.reverse() 12 | return s.decode() 13 | 14 | def decode_tk2(s): 15 | if not isinstance(s, bytes): 16 | s = s.encode() 17 | s = bytearray(s) 18 | s.reverse() 19 | s = base64.b64decode(s.translate(decode_translation)) 20 | return s.decode() 21 | 22 | def generate_tk2(did): 23 | s = 'did={}|pno=1030|ver=0.3.0301|clit={}'.format(did, int(time.time())) 24 | return encode_tk2(s) 25 | 26 | class Hunantv(Extractor): 27 | name = '芒果TV (HunanTV)' 28 | 29 | profile_2_id = { 30 | '复刻版': 'BD', 31 | '蓝光': 'BD', 32 | '超清': 'TD', 33 | '高清': 'HD', 34 | '标清': 'SD' 35 | } 36 | 37 | def prepare_mid(self): 38 | mid = match1(self.url, 'com/[bl]/\d+/(\d+).html', 39 | 'com/s/(\d+).html') 40 | if mid is None: 41 | html = get_content(self.url) 42 | if match1(self.url, 'com/h/(\d+).html'): 43 | assert JSEngine, 'No JS Interpreter found!!!' 44 | js_ctx = JSEngine() 45 | js = match1(html, '<script>window.__NUXT__=(.+);</script>') 46 | data = js_ctx.eval(js) 47 | mid = match1(data, "PartId': '(\d+)'") 48 | else: 49 | mid = match1(html, 50 | 'window.location = "/b/\d+/(\d+).html"', 51 | r'routePath:"\\u002Fl\\u002F\d+\\u002F(\d+).html"', 52 | 'vid[=:]\D?(\d+)') 53 | return mid 54 | 55 | def prepare(self): 56 | info = MediaInfo(self.name) 57 | info.extra.referer = self.url 58 | install_cookie() 59 | 60 | did = get_random_uuid() 61 | tk2 = generate_tk2(did) 62 | params = { 63 | 'tk2': tk2, 64 | 'video_id': self.mid, 65 | 'type': 'pch5' 66 | } 67 | data = get_response('https://pcweb.api.mgtv.com/player/video', 68 | params=params).json() 69 | assert data['code'] == 200, ('[failed] code: {}, msg: {}' 70 | .format(data['code'], data['msg'])) 71 | assert data['data'], '[Failed] Video info not found.' 72 | data = data['data'] 73 | 74 | info.title = data['info']['title'] + ' ' + data['info']['desc'] 75 | 76 | params['pm2'] = data['atc']['pm2'] 77 | data = get_response('https://pcweb.api.mgtv.com/player/getSource', 78 | params=params).json() 79 | assert data['code'] == 200, ('[failed] code: {}, msg: {}' 80 | .format(data['code'], data['msg'])) 81 | assert data['data'], '[Failed] Video source not found.' 82 | data = data['data'] 83 | 84 | domain = data['stream_domain'][0] 85 | for lstream in data['stream']: 86 | lurl = lstream['url'] 87 | if lurl: 88 | url = get_response(domain + lurl, 89 | params={'did': did}).json()['info'] 90 | stream_profile = lstream['name'] 91 | stream_id = self.profile_2_id[stream_profile] 92 | info.streams[stream_id] = { 93 | 'container': 'm3u8', 94 | 'profile': stream_profile, 95 | 'src': [url] 96 | } 97 | 98 | return info 99 | 100 | site = Hunantv() 101 | -------------------------------------------------------------------------------- /ykdl/extractors/miaopai.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | # BROKEN 7 | 8 | api_info1 = 'https://n.miaopai.com/api/aj_media/info.json?smid={}&appid=530&_cb={}' 9 | api_info2 = 'http://api.miaopai.com/m/v2_channel.json?fillType=259&scid={}&vend=' 10 | api_stream = 'http://gslb.miaopai.com/stream/{}.json?vend=' 11 | 12 | class Miaopai(Extractor): 13 | 14 | name = '秒拍 (Miaopai)' 15 | 16 | def prepare_mid(self): 17 | mid = match1(self.url, '/media/([^\./]+)') 18 | if mid is None: 19 | html = get_content(self.url) 20 | mid = match1(html, 's[cm]id ?= ?[\'"]([^\'"]+)[\'"]') 21 | return mid 22 | 23 | def prepare(self): 24 | info = MediaInfo(self.name) 25 | title = None 26 | 27 | if 'show' in self.url: 28 | new_url = get_location(self.url) 29 | if new_url != self.url: 30 | self.logger.debug('redirect to' + new_url) 31 | self.url = new_url 32 | 33 | if len(self.mid) > 24: 34 | add_header('Referer', self.url) 35 | cb = '_jsonp{}'.format(get_random_str(10).lower()) 36 | data = get_response(api_info1.format(self.mid, cb)).json() 37 | data = json.loads(json_html[json_html.find('{'):-2]) 38 | assert data['code'] == 200, data['msg'] 39 | 40 | data = data['data'] 41 | title = data['description'] 42 | url = data['meta_data'][0]['play_urls']['m'] 43 | _, ext, _ = url_info(url) 44 | 45 | else: 46 | try: 47 | data = get_response(api_info2.format(self.mid)).json() 48 | assert data['status'] == 200, data['msg'] 49 | 50 | data = data['result'] 51 | title = data['ext']['t'] 52 | scid = data['scid'] or self.mid 53 | ext = data['stream']['and'] 54 | base = data['stream']['base'] 55 | vend = data['stream']['vend'] 56 | url = '{base}{scid}.{ext}?vend={vend}'.format(**vars()) 57 | except: 58 | # fallback 59 | data = get_response(api_stream.format(self.mid)).json() 60 | assert data['status'] == 200, data['msg'] 61 | 62 | data = data['result'][0] 63 | ext = None 64 | scheme = data['scheme'] 65 | host = data['host'] 66 | path = data['path'] 67 | sign = data['sign'] 68 | url = '{scheme}{host}{path}{sign}'.format(**vars()) 69 | 70 | if not title: 71 | html = get_content(self.url) 72 | title = match1(html, '<meta name="description" content="([^"]+)">') 73 | info.title = title 74 | 75 | info.streams['current'] = { 76 | 'container': ext or 'mp4', 77 | 'profile': 'current', 78 | 'src': [url] 79 | } 80 | return info 81 | 82 | def prepare_list(self): 83 | html = get_content(self.url) 84 | video_list = match1(html, 'video_list=\[([^\]]+)') 85 | return matchall(video_list, '"([^",]+)') 86 | 87 | site = Miaopai() 88 | -------------------------------------------------------------------------------- /ykdl/extractors/netease/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def get_extractor(url): 5 | if 'v.163.com/movie/' in url: 6 | url = url.replace('v.163', 'open.163') 7 | if 'cc.163' in url: 8 | from . import livecc as s 9 | elif 'live.163' in url: 10 | from . import live as s 11 | elif 'open.163' in url or '/opencourse/' in url: 12 | from . import openc as s 13 | elif 'music.163' in url: 14 | from . import music as s 15 | return s.get_extractor(url) 16 | elif '3g.163' in url: 17 | from . import m3g as s 18 | else: 19 | from . import video as s 20 | 21 | return s.site, url 22 | -------------------------------------------------------------------------------- /ykdl/extractors/netease/live.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class NeteaseLive(Extractor): 7 | name = '网易直播 (163)' 8 | 9 | def prepare_mid(self): 10 | return match1(self.url, 'room/(\d+)') 11 | 12 | def prepare(self): 13 | info = MediaInfo(self.name, True) 14 | 15 | data = get_response( 16 | 'https://data.live.126.net/liveAll/{self.mid}.json'.format(**vars()), 17 | params={'tt': int(time.time() * 1000)} 18 | ).json() 19 | assert 'liveVideoUrl' in data, 'live video is offline' 20 | 21 | info.title = data['roomName'] 22 | try: 23 | info.artist = data['sourceinfo']['tname'] 24 | except KeyError: 25 | pass 26 | info.duration = duration = data.get('duration') 27 | info.add_comment = data['channal']['name'] 28 | 29 | url = data['liveVideoUrl'] 30 | info.streams['current'] = { 31 | 'container': url.split('.')[-1], 32 | 'profile': 'current', 33 | 'src': [url], 34 | not duration and 'size': Infinity 35 | } 36 | return info 37 | 38 | site = NeteaseLive() 39 | -------------------------------------------------------------------------------- /ykdl/extractors/netease/livecc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class NeteaseLive(Extractor): 7 | name = '网易CC直播 (163)' 8 | 9 | profile_2_id = { 10 | '原画': 'OG', 11 | '蓝光': 'BD', 12 | '超清': 'TD', 13 | '高清': 'HD', 14 | '标清': 'SD', 15 | } 16 | 17 | quality_2_profile = { 18 | 'blueray': '蓝光', 19 | 'ultra': '超清', 20 | 'high': '高清', 21 | 'standard': '标清', 22 | } 23 | 24 | def prepare_mid(self): 25 | return match1(self.url, '\D/(\d+)/?$') 26 | 27 | def prepare(self): 28 | info = MediaInfo(self.name, True) 29 | 30 | html = get_content(self.url, headers={'Referer': 'https://cc.163.com/'}) 31 | data = match1(html, '<script id="__NEXT_DATA__".*?>(.*?)</script>') 32 | #self.logger.debug('data:\n%s', data) # too long 33 | data = json.loads(data) 34 | 35 | def get_live_info(vbr=0): 36 | params = vbr and {'vbr': vbr} or None 37 | data = get_response('http://cgi.v.cc.163.com/video_play_url/{self.mid}' 38 | .format(**vars()), params=params).json() 39 | 40 | stream_profile = data['vbrname_mapping'][data['pc_vbr_sel']] 41 | stream_id = self.profile_2_id[stream_profile] 42 | info.streams[stream_id] = { 43 | 'container': 'flv', 44 | 'profile': stream_profile, 45 | 'src' : [data['videourl']], 46 | 'size': Infinity 47 | } 48 | 49 | if vbr == 0: 50 | vbr_sel = data['vbr_sel'] 51 | for vbr in data['vbr_list']: 52 | if vbr != vbr_sel: 53 | get_live_info(vbr) 54 | 55 | try: 56 | # project, select first living room 57 | data = data['props']['pageProps']['data'] 58 | rooms = data['module_infos'][0]['content'] 59 | 60 | except KeyError: 61 | data = data['props']['pageProps']['roomInfoInitData'] 62 | assert 'micfirst' in data, 'unsupported live!' 63 | 64 | info.title = data['live']['title'] 65 | info.artist = data['micfirst']['nickname'] 66 | 67 | try: 68 | streams = data['live']['quickplay']['resolution'] 69 | except KeyError: 70 | get_live_info() 71 | else: 72 | for quality, stream in streams.items(): 73 | stream_profile = self.quality_2_profile[quality] 74 | stream_id = self.profile_2_id[stream_profile] 75 | cdn = stream['cdn'] 76 | cdn.pop('wy', None) # UDP 77 | url = random.choice(list(cdn.values())) 78 | info.streams[stream_id] = { 79 | 'container': 'flv', 80 | 'profile': stream_profile, 81 | 'src' : [url], 82 | 'size': Infinity 83 | } 84 | 85 | else: 86 | for room in rooms: 87 | if room['is_living']: 88 | self.mid = room['ccid'] 89 | info.artist = room['name'] 90 | break 91 | info.title = data['share_title'] 92 | get_live_info() 93 | 94 | return info 95 | 96 | site = NeteaseLive() 97 | -------------------------------------------------------------------------------- /ykdl/extractors/netease/m3g.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class m3g(SimpleExtractor): 7 | name = '网易手机网 (163 3g)' 8 | 9 | def init(self): 10 | self.url_patterns = ['"contentUrl":"([^"]+)"', '<video\s+data-src="([^"]+)"'] 11 | self.title_pattern = 'class="title">(.+?)</' 12 | 13 | def get_url(self): 14 | if self.url_patterns: 15 | v_url = [] 16 | for url in matchall(self.html, *self.url_patterns): 17 | if url[:2] == '//': 18 | url = 'http:' + url 19 | if url not in v_url: 20 | v_url.append(url) 21 | self.v_url = v_url 22 | 23 | site = m3g() 24 | -------------------------------------------------------------------------------- /ykdl/extractors/netease/music/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def get_extractor(url): 5 | add_header('Referer', 'http://music.163.com/') 6 | 7 | if '/program' in url: 8 | from . import program as s 9 | elif '/dj' in url: 10 | from . import program as s 11 | elif '/mv' in url: 12 | from . import mv as s 13 | else: 14 | from . import music as s 15 | 16 | return s.site, url 17 | -------------------------------------------------------------------------------- /ykdl/extractors/netease/music/music.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ..._common import * 4 | from .musicbase import NeteaseMusicBase 5 | 6 | 7 | class NeteaseMusic(NeteaseMusicBase): 8 | name = 'Netease Music (网易云音乐)' 9 | api_url = 'http://music.163.com/api/song/detail/' 10 | 11 | def get_music(self, data): 12 | return data['songs'][0] 13 | 14 | def prepare_list(self): 15 | params = { 16 | 'id': self.mid, 17 | 'csrf_token': '' 18 | } 19 | if 'album' in self.url: 20 | data = get_response('http://music.163.com/api/album/' + self.mid, 21 | params=params).json() 22 | playlist = data['album']['songs'] 23 | elif 'playlist' in self.url or 'toplist' in self.url: 24 | data = get_response('http://music.163.com/api/playlist/detail', 25 | params=params).json() 26 | playlist = data['result']['tracks'] 27 | elif 'artist' in self.url: 28 | data = get_response('http://music.163.com/api/artist/' + self.mid, 29 | params=params).json() 30 | playlist = data['hotSongs'] 31 | 32 | mids = [p['id'] for p in playlist] 33 | self.set_index(self.mid, mids) 34 | return mids 35 | 36 | site = NeteaseMusic() 37 | -------------------------------------------------------------------------------- /ykdl/extractors/netease/music/musicbase.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ..._common import * 4 | 5 | from Crypto.Cipher import AES 6 | 7 | 8 | #consts here 9 | first_key = '0CoJUm6Qyw8W8jud' 10 | iv = '0102030405060708' 11 | 12 | def pksc7_padding(string): 13 | aes_block_size = 16 14 | padding_size = aes_block_size - len(string) % 16 15 | return string.ljust(len(string)+padding_size, chr(padding_size)) 16 | 17 | def make_json_data(url_id): 18 | fixed = {} 19 | fixed['br'] = 128000 20 | fixed['csrf_token'] = '' #in the cookie 21 | fixed['ids'] = '[{}]'.format(url_id) 22 | return json.dumps(fixed, separators=(',', ':')) 23 | 24 | def RSA_string(input_str): 25 | modular = 157794750267131502212476817800345498121872783333389747424011531025366277535262539913701806290766479189477533597854989606803194253978660329941980786072432806427833685472618792592200595694346872951301770580765135349259590167490536138082469680638514416594216629258349130257685001248172188325316586707301643237607 26 | exp = 65537 27 | 28 | #first do LE packing 29 | to_number = 0 30 | rev_str = input_str[::-1] 31 | for i in rev_str: 32 | to_number = to_number * 256 + ord(i) 33 | #then calc ras with exp and modular 34 | encSecKey = hex(pow(to_number, exp, modular))[2:] 35 | return encSecKey.rjust(256, '0') 36 | 37 | def AES_128_CBC_b64_wrapper(data, key, iv): 38 | obj = AES.new(key.encode(), AES.MODE_CBC, iv.encode()) 39 | input_data = pksc7_padding(data) 40 | out = obj.encrypt(input_data.encode()) 41 | return base64.b64encode(out).decode() 42 | 43 | def netease_req(ids='468490608', snd_key=None, encSecKey=None): 44 | data = make_json_data(ids) 45 | if snd_key is None: 46 | snd_key = get_random_str(16, 'snd_key') 47 | encSecKey = RSA_string(snd_key) 48 | first_pass = AES_128_CBC_b64_wrapper(data, first_key, iv) 49 | second_pass = AES_128_CBC_b64_wrapper(first_pass, snd_key, iv) 50 | 51 | payload = {} 52 | payload['params'] = second_pass 53 | payload['encSecKey'] = encSecKey 54 | 55 | return payload 56 | 57 | class NeteaseMusicBase(Extractor): 58 | 59 | def prepare_mid(self): 60 | return match1(self.url, r'\bid=(\w+)', 'song/(\d+)') 61 | 62 | def prepare(self): 63 | info = MediaInfo(self.name) 64 | 65 | data = get_response(self.api_url, params={ 66 | 'id': self.mid, 67 | 'ids': self.mid, 68 | 'csrf_token': '' 69 | }).json() 70 | data = self.get_music(data) 71 | self.logger.debug('data:\n%s', data) 72 | 73 | info.title = data['name'] 74 | info.artist = data['artists'][0]['name'] 75 | 76 | real_id = data['id'] 77 | snd_key = get_random_str(16, 'snd_key') 78 | encSecKey = RSA_string(snd_key) 79 | payload = netease_req(real_id, snd_key, encSecKey) 80 | data = get_response( 81 | 'http://music.163.com/weapi/song/enhance/player/url?csrf_token=', 82 | data=payload).json()['data'][0] 83 | self.logger.debug('mp3 data:\n%s', data) 84 | 85 | info.streams['current'] = { 86 | 'container': data['type'], 87 | 'profile': 'current', 88 | 'src' : [data['url']], 89 | 'size': data['size'] 90 | } 91 | 92 | return info 93 | -------------------------------------------------------------------------------- /ykdl/extractors/netease/music/mv.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ..._common import * 4 | 5 | 6 | class NeteaseMv(Extractor): 7 | name = 'Netease MV (网易音乐 MV)' 8 | 9 | resolution_2_id_profile = { 10 | '1080': ['BD', '1080P'], 11 | '720': ['TD', '超清'], 12 | '480': ['HD', '高清'], 13 | '240': ['SD', '标清'] 14 | } 15 | 16 | def prepare_mid(self): 17 | return match1(self.url, '\?id=(.*)', 'mv/(\d+)') 18 | 19 | def prepare(self): 20 | info = MediaInfo(self.name) 21 | 22 | data = get_response('http://music.163.com/api/mv/detail/', 23 | params={ 24 | 'id': self.mid, 25 | 'ids': self.mid, 26 | 'csrf_token': '' 27 | }).json()['data'] 28 | 29 | info.title = data['name'] 30 | info.artist = data['artistName'] 31 | for resolution in self.resolution_2_id_profile.keys(): 32 | if resolution in data['brs']: 33 | stream_id, stream_profile = self.resolution_2_id_profile[id] 34 | info.streams[stream_id] = { 35 | 'container': 'mp4', 36 | 'profile': stream_profile, 37 | 'src': [data['brs'][id]] 38 | } 39 | 40 | return info 41 | 42 | site = NeteaseMv() 43 | -------------------------------------------------------------------------------- /ykdl/extractors/netease/music/program.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ..._common import * 4 | from .musicbase import NeteaseMusicBase 5 | 6 | 7 | class NeteaseDj(NeteaseMusicBase): 8 | name = 'Netease Dj (网易电台)' 9 | api_url = 'http://music.163.com/api/dj/program/detail/' 10 | 11 | def get_music(self, data): 12 | return data['program']['mainSong'] 13 | 14 | def prepare_list(self): 15 | if 'djradio' in self.url: 16 | data = get_response( 17 | 'http://music.163.com/api/dj/program/byradio/', 18 | params={ 19 | 'radioId': self.mid, 20 | 'ids': self.mid, 21 | 'csrf_token': '', 22 | }).json() 23 | mids = [p['id'] for p in data['programs']] 24 | self.set_index(self.mid, mids) 25 | return mids 26 | 27 | site = NeteaseDj() 28 | -------------------------------------------------------------------------------- /ykdl/extractors/netease/openc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | assert JSEngine, "No JS Interpreter found, can't extract netease openCourse!" 7 | 8 | class OpenC(Extractor): 9 | name = '网易公开课 (163 openCourse)' 10 | 11 | sopported_stream_types = [ 12 | ['TD', 'Shd', '超清'], 13 | ['HD', 'Hd', '高清'], 14 | ['SD', 'Sd', '标清'] 15 | ] 16 | name2lang = { 17 | '中文': 'zh', 18 | '英文': 'en' 19 | } 20 | 21 | def list_only(self): 22 | return self.mid[1] is None 23 | 24 | @staticmethod 25 | def format_mid(mid): 26 | if not isinstance(mid, tuple): 27 | mid = mid, None 28 | mid = mid[:2] 29 | assert len(mid) == 2 and mid[0] 30 | return mid 31 | 32 | def prepare_mid(self): 33 | return match1(self.url, r'\bpid=(\w+)'), match1(self.url, r'\bmid=(\w+)') 34 | 35 | @functools.cache 36 | def parse_html(self, url): 37 | html = get_content(url) 38 | js = match1(html, 'window\.__NUXT__=(.+);</script>') 39 | data = JSEngine().eval(js) 40 | self.logger.debug('data: \n%s', data) 41 | return data 42 | 43 | def prepare_data(self): 44 | url = 'https://open.163.com/newview/movie/free?pid={}'.format(self.mid[0]) 45 | data = self.parse_html(url) 46 | try: 47 | self.url = data['data'][0]['playUrl'] 48 | except KeyError: 49 | return data 50 | else: 51 | self.mid = None 52 | return self.prepare_data() 53 | 54 | def prepare(self): 55 | info = MediaInfo(self.name) 56 | 57 | data = self.prepare_data() 58 | moiveList = data['state']['movie']['moiveList'] 59 | if not moiveList: 60 | return 61 | 62 | mid = self.mid[1] 63 | for movie in moiveList: 64 | if movie['mid'] == mid: 65 | break 66 | assert movie['mid'] == mid, "can't found mid %r" % mid 67 | 68 | title = data['data'][0]['title'] 69 | mtitle = movie['title'].rpartition(title)[-1] 70 | if mtitle: 71 | for sp in [':', '】']: 72 | t1, _, t2 = mtitle.partition(sp) 73 | if title.startswith(t1): 74 | mtitle = t2 75 | break 76 | if mtitle: 77 | p = movie['pNumber'] 78 | pc = len(moiveList) 79 | if pc > 1 and not mtitle[0].isdecimal() and str(p) not in mtitle: 80 | pl = 0 81 | while pc: 82 | pl += 1 83 | pc //= 10 84 | mtitle = ('{:0>%dd} {}' % pl).format(p, mtitle) 85 | title = '{title} - {mtitle}'.format(**vars()) 86 | school_info = data['data'][0] 87 | school = school_info['school'] 88 | director = school_info['director'] 89 | if director and director != 'null': 90 | if director != school : 91 | director = '[{school}] {director}'.format(**vars()) 92 | else: 93 | director = school 94 | if school not in title: 95 | title = '[{school}] {title}'.format(**vars()) 96 | info.title = title 97 | info.artist = director 98 | 99 | for stream_id, tp, stream_profile in self.sopported_stream_types: 100 | for ext in ['mp4', 'm3u8']: 101 | for orig in ['', 'Orign']: 102 | if stream_id in info.streams: 103 | continue 104 | url = movie['{ext}{tp}Url{orig}'.format(**vars())] 105 | if not url: 106 | continue 107 | size = movie['{ext}{tp}Size{orig}'.format(**vars())] 108 | info.streams[stream_id] = { 109 | 'container': ext, 110 | 'profile': stream_profile, 111 | 'src' : [url], 112 | 'size': size 113 | } 114 | 115 | nlang = 0 116 | for sub in movie['subList']: 117 | name = sub['subName'] 118 | if not name: 119 | if nlang: 120 | name = movie['subtitle'] 121 | else: 122 | name = '中文' 123 | nlang += 1 124 | lang = self.name2lang[name] 125 | info.subtitles.append({ 126 | 'lang': lang, 127 | 'name': name, 128 | 'format': 'srt', 129 | 'src' : sub['subUrl'], 130 | 'size': sub['subSize'] 131 | }) 132 | 133 | return info 134 | 135 | def prepare_list(self): 136 | data = self.prepare_data() 137 | pid, mid = self.mid 138 | mids = [movie['mid'] for movie in data['state']['movie']['moiveList']] 139 | self.set_index(mid, mids) 140 | for mid in mids: 141 | yield pid, mid 142 | 143 | site = OpenC() 144 | -------------------------------------------------------------------------------- /ykdl/extractors/netease/video.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class NeteaseVideo(Extractor): 7 | name = '网易视频 (163)' 8 | 9 | def prepare_mid(self): 10 | return match1(self.url, '(\w+)\.html') 11 | 12 | def prepare(self): 13 | info = MediaInfo(self.name) 14 | 15 | data = get_response('https://so.v.163.com/v6/video/videodetail.do', 16 | params={ 17 | 'vid': self.mid, 18 | 'adapter': 1 19 | }).json() 20 | assert data['code'] == 1, data['msg'] 21 | data = data['data'] 22 | 23 | info.title = data['title'] 24 | info.artist = data.get('username') 25 | info.add_comment(data['keywords']) 26 | info.streams['current'] = { 27 | 'container': 'mp4', 28 | 'profile': 'current', 29 | 'src': [data['url']] 30 | } 31 | 32 | return info 33 | 34 | site = NeteaseVideo() 35 | -------------------------------------------------------------------------------- /ykdl/extractors/pps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | def gsign(params): 7 | s = [] 8 | for key in sorted(params.keys()): 9 | s.append('{}:{}'.format(key, params[key])) 10 | s.append('w!ytDgy#lEXWoJmN4HPf') 11 | return hash.sha1(''.join(s)) 12 | 13 | def getlive(mid, rate='source'): 14 | params = { 15 | 'type_id': 1, 16 | 'vid': 1, 17 | 'anchor_id': mid, 18 | 'app_key': 'show_web_h5', 19 | 'version': '1.0.0', 20 | 'platform': '1_10_101', 21 | 'time': int(time.time()), 22 | 'netstat': 'wifi', 23 | 'device_id': get_random_id(32, 'device'), 24 | 'bit_rate_type': rate, 25 | 'protocol': 5, 26 | } 27 | params['sign'] = gsign(params) 28 | return get_response('https://m-glider-xiu.pps.tv/v2/stream/get.json', 29 | data=params).json() 30 | 31 | class PPS(Extractor): 32 | name = '奇秀(Qixiu)' 33 | 34 | rate_2_id_profile = { 35 | 'source': ['TD', '超清'], 36 | 'high': ['HD', '高清'], 37 | 'smooth': ['SD', '标清'] 38 | } 39 | 40 | def prepare_mid(self): 41 | html = get_content(self.url) 42 | return match1(html, '"user_id":"([^"]+)",') 43 | 44 | def prepare(self): 45 | info = MediaInfo(self.name, True) 46 | 47 | html = get_content(self.url) 48 | title = json.loads(match1(html, '"room_name":("[^"]*"),')) 49 | artist = json.loads(match1(html, '"nick_name":("[^"]+"),')) 50 | info.title = '{title} - {artist}'.format(**vars()) 51 | info.artist = artist 52 | 53 | def get_live_info(rate='source'): 54 | data = getlive(self.mid, rate) 55 | if data['code'] != 'A00000': 56 | return data.get('msg') 57 | 58 | data = data['data'] 59 | url = data.get('https_flv') or data.get('flv') or data.get('rtmp') 60 | if url: 61 | url = url.replace('rtmp://', 'http://') 62 | ran = random.randrange(1e4) 63 | sep = '?' in url and '&' or '?' 64 | url = '{url}{sep}ran={ran}'.format(**vars()) 65 | stream_id, stream_profile = self.rate_2_id_profile[rate] 66 | info.streams[stream_id] = { 67 | 'container': 'flv', 68 | 'profile': stream_profile, 69 | 'src' : [url], 70 | 'size': Infinity 71 | } 72 | 73 | error_msges = [] 74 | if rate == 'source': 75 | rate_list = data['rate_list'] 76 | if 'source' in rate_list: 77 | rate_list.remove('source') 78 | for rate in rate_list: 79 | error_msg = get_live_info(rate) 80 | if error_msg: 81 | error_msges.append(error_msg) 82 | if error_msges: 83 | return ', '.join(error_msges) 84 | 85 | error_msg = get_live_info() 86 | if error_msg: 87 | self.logger.debug('error_msg:\n\t' + error_msg) 88 | 89 | return info 90 | 91 | site = PPS() 92 | -------------------------------------------------------------------------------- /ykdl/extractors/pptv.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class PPTV(Extractor): 7 | # https://tv.pptv.com/ 8 | name = 'PPTV (PP聚力)' 9 | 10 | id_2_profile = { 11 | 'BD': '蓝光', 12 | 'TD': '超清', 13 | 'HD': '高清', 14 | 'SD': '高清', 15 | 'LD': '流畅' 16 | } 17 | 18 | def prepare_mid(self): 19 | html = get_content(self.url) 20 | return match1(html, '"(?:c|ps)id":"?(\d+)') 21 | 22 | def prepare(self): 23 | info = MediaInfo(self.name) 24 | 25 | #key = gen_key(int(time.time()) - 60) 26 | data = get_response('https://web-play.pptv.com/webplay3-0-{self.mid}.xml' 27 | .format(**vars()), 28 | params={ 29 | 'zone': 8, 30 | 'version': 4, 31 | 'username': '', 32 | 'ppi': '302c3333', 33 | 'type': 'ppbox.launcher', 34 | 'pageUrl': 'http://v.pptv.com', 35 | 'o': 0, 36 | 'referrer': '', 37 | 'kk': '', 38 | 'scver': 1, 39 | 'appplt': 'flp', 40 | 'appid': 'pptv.flashplayer.vod', 41 | 'appver': '3.4.3.3', 42 | 'nddp': 1 43 | }).xml()['root'] 44 | assert 'error' not in data, data['error'][0]['@message'] 45 | 46 | info.title = data['channel'][0]['@nm'] 47 | for item, dt, drag in zip(data['channel'][0]['file'][0]['item'], 48 | data['dt'], 49 | data.get('dragdata') or data['drag']): 50 | host = dt['sh'] 51 | rid = dt['@rid'] 52 | params = urlencode({ 53 | #'key': key, # it is now useless 54 | 'k': unquote(dt['key'][0]['#text']), 55 | 'fpp.ver': '1.3.0.23', 56 | 'type': 'ppbox.launcher' 57 | }) 58 | urls = [] 59 | for seg in drag['sgm']: 60 | no = seg['@no'] 61 | urls.append('http://{host}/{no}/{rid}?{params}'.format(**vars())) 62 | 63 | stream_id = format_vps(item['@width'], item['@height'])[0] 64 | stream_profile = self.id_2_profile[stream_id] 65 | info.streams[stream_id] = { 66 | 'container': 'mp4', 67 | 'profile': stream_profile, 68 | 'src' : urls, 69 | 'size': int(item['@filesize']) 70 | } 71 | 72 | return info 73 | 74 | site = PPTV() 75 | -------------------------------------------------------------------------------- /ykdl/extractors/qq/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def get_extractor(url): 5 | if 'live.qq' in url: 6 | from . import live as s 7 | elif 'egame.qq' in url: 8 | from . import egame as s 9 | else: 10 | from . import video as s 11 | 12 | return s.site, url 13 | -------------------------------------------------------------------------------- /ykdl/extractors/qq/egame.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | assert JSEngine, "No JS Interpreter found, can't extract egame live!" 7 | 8 | 9 | class QQEGame(Extractor): 10 | name = 'QQ EGAME (企鹅电竟)' 11 | 12 | lv_2_id = { 13 | 10: 'BD10M', 14 | 8: 'BD8M', 15 | 6: 'BD6M', 16 | 4: 'BD4M', 17 | 3: 'TD', 18 | 2: 'HD', 19 | 1: 'SD', 20 | } 21 | 22 | @staticmethod 23 | def format_mid(mid): 24 | mid = fullmatch(mid, '\d+') 25 | assert mid 26 | return mid 27 | 28 | def prepare_mid(self): 29 | return match1(self.url, '/(\d+)') 30 | 31 | def prepare(self): 32 | info = MediaInfo(self.name, True) 33 | 34 | if self.url is None: 35 | self.url = 'https://egame.qq.com/' + self.mid 36 | html = get_content(self.url) 37 | 38 | js_nuxt = match1(html, '<script>window.__NUXT__=(.+?)</script>') 39 | js_ctx = JSEngine() 40 | data = js_ctx.eval(js_nuxt) 41 | self.logger.debug('data:\n%s', data) 42 | 43 | state = data.get('state', {}) 44 | error = data.get('error') or state.get('errors') 45 | assert not error, 'error: {error}!!'.format(**vars()) 46 | 47 | liveInfo = state['live-info']['liveInfo'] 48 | videoInfo = liveInfo['videoInfo'] 49 | profileInfo = liveInfo['profileInfo'] 50 | assert profileInfo['isLive'], 'error: live show is not on line!!' 51 | 52 | title = videoInfo['title'] 53 | info.artist = artist = profileInfo['nickName'] 54 | info.title = '{title} - {artist}'.format(**vars()) 55 | 56 | for s in videoInfo['streamInfos']: 57 | stream_id = self.lv_2_id[s['levelType']] 58 | info.streams[stream_id] = { 59 | 'container': 'flv', 60 | 'profile': s['desc'], 61 | 'src' : [s['playUrl']], 62 | 'size': Infinity 63 | } 64 | 65 | return info 66 | 67 | 68 | site = QQEGame() 69 | -------------------------------------------------------------------------------- /ykdl/extractors/qq/live.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class QQLive(Extractor): 7 | name = 'QQ Live (企鹅直播)' 8 | 9 | def prepare_mid(self): 10 | mid = match1(self.url, '/(\d+)') 11 | if mid is None: 12 | html = get_content(self.url) 13 | mid = match1(html, '"room_id":(\d+)') 14 | return mid 15 | 16 | def prepare(self): 17 | info = MediaInfo(self.name, True) 18 | 19 | #from upstream!! 20 | data = get_response( 21 | 'http://www.qie.tv/api/v1/room/{self.mid}'.format(**vars())).json() 22 | assert data['error'] == 0, '{error}: {data}'.format(**data) 23 | 24 | livedata = data['data'] 25 | assert livedata['show_status'] == '1', 'live is offline!!' 26 | 27 | info.title = livedata['room_name'] 28 | info.artist = livedata['nickname'] 29 | 30 | info.streams['current'] = { 31 | 'container': 'flv', 32 | 'profile': 'current', 33 | 'src' : ['{rtmp_url}/{rtmp_live}'.format(**livedata)], 34 | 'size': Infinity 35 | } 36 | return info 37 | 38 | site = QQLive() 39 | 40 | -------------------------------------------------------------------------------- /ykdl/extractors/sina/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def get_extractor(url): 5 | if 'open.sina' in url: 6 | from . import openc as s 7 | elif '.ivideo.sina' in url: 8 | from . import embed as s 9 | else: 10 | from . import video as s 11 | 12 | return s.site, url 13 | -------------------------------------------------------------------------------- /ykdl/extractors/sina/embed.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class Embed(Extractor): 7 | name = '新浪视频 (sina)' 8 | 9 | def prepare(self): 10 | info = MediaInfo(self.name) 11 | 12 | vid = match1(self.url, '/(\d+)\.mp4', 'vid=(\d+)') 13 | url = 'https://ask.ivideo.sina.com.cn/v_play_ipad.php?' + urlencode({'vid': vid}) 14 | 15 | info.streams['current'] = { 16 | 'container': 'mp4', 17 | 'profile': 'current', 18 | 'src': [url] 19 | } 20 | return info 21 | 22 | site = Embed() 23 | -------------------------------------------------------------------------------- /ykdl/extractors/sina/openc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | def get_k(vid, rand): 7 | t = str(int(time.time()) >> 6) 8 | s = '{vid}Z6prk18aWxP278cVAH{t}{rand}'.format(**vars()) 9 | return hash.md5(s)[:16] + t 10 | 11 | class OpenC(Extractor): 12 | name = 'Sina openCourse (新浪公开课)' 13 | 14 | def format_mid(self, mid): 15 | # [0] course id 16 | # [1] lesson id 17 | if not isinstance(mid, tuple): 18 | mid = mid, None 19 | mid = mid[:2] 20 | if len(mid) == 1: 21 | mid += (None, ) 22 | cid, lid = mid 23 | cid = fullmatch(cid, '\d+') 24 | lid = fullmatch(lid, '\d+') 25 | assert cid 26 | return cid, lid 27 | 28 | def prepare_mid(self): 29 | mid = matchm(self.url, '/course/id_(\d+)/lesson_(\d+)', 30 | '/course/id_(\d+)') 31 | if mid[0]: 32 | return mid 33 | 34 | def list_only(self): 35 | return not self.mid[1] 36 | 37 | def prepare(self): 38 | info = MediaInfo(self.name) 39 | 40 | cid, lid = self.mid 41 | if lid is None: 42 | url = 'https://open.sina.com.cn/course/id_{cid}/' 43 | else: 44 | url = 'https://open.sina.com.cn/course/id_{cid}/lesson_{lid}/' 45 | html = get_content(url.format(**vars())) 46 | vid = match1(html, 'playVideo\("(\d+)') 47 | info.artist = match1(html, '讲师:(.+?)<br/>') 48 | 49 | assert vid, "can't find vid!" 50 | 51 | rand = str(random.random())[:18] 52 | data = get_response('http://ask.ivideo.sina.com.cn/v_play.php', 53 | params={ 54 | 'vid': vid, 55 | 'ran': rand, 56 | 'p': 'i', 57 | 'k': get_k(vid, rand), 58 | }).xml()['root'] 59 | 60 | info.title = data['vname'] 61 | urls = [] 62 | size = 0 63 | for durl in data['durl']: 64 | urls.append(durl['url']) 65 | size += durl['filesize'] 66 | 67 | info.streams['current'] = { 68 | 'container': 'hlv', 69 | 'profile': 'current', 70 | 'src' : urls, 71 | 'size': size 72 | } 73 | return info 74 | 75 | def prepare_list(self): 76 | cid, lid = self.mid 77 | url = 'https://open.sina.com.cn/course/id_{cid}/' 78 | html = get_content(url.format(**vars())) 79 | lids = [None] + matchall(html, '/lesson_(\d+)/">') 80 | self.set_index(lid, lids) 81 | for lid in lids: 82 | yield cid, lid 83 | 84 | site = OpenC() 85 | -------------------------------------------------------------------------------- /ykdl/extractors/sina/video.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | def get_realurl(url, vid): 7 | params = urlencode({'vid': vid}) 8 | url = '{url}?{params}'.format(**vars()) 9 | if get_location(url) != url: 10 | return url # redirect url will be expired, keep origin 11 | html = get_content(url) 12 | print(html) 13 | return matchall(html, 'CDATA\[([^\]]+)')[1] 14 | 15 | class Sina(Extractor): 16 | name = '新浪视频 (sina)' 17 | 18 | def prepare_mid(self): 19 | mid = match1(self.url, 'video_id=(\d+)', '(\d{5,})\.swf') 20 | if mid: 21 | return mid 22 | html = get_content(self.url) 23 | return match1(html, '[vV]ideo_?[iI]d[\'"]?\s*[:=]\s*[\'"]?(\d+)') 24 | 25 | def prepare(self): 26 | info = MediaInfo(self.name) 27 | 28 | data = get_response('https://s.video.sina.com.cn/video/h5play', 29 | params={'video_id': self.mid}).json() 30 | assert data['code'] == 1, data['message'] 31 | data = data['data'] 32 | 33 | info.title = data['title'] 34 | info.duration = int(data['length']) // 1000 35 | 36 | for t in ['mp4', 'flv', 'hlv', '3gp']: 37 | video_info = data['videos'].get(t) 38 | if video_info: 39 | break 40 | 41 | for profile in video_info: 42 | v = video_info[profile] 43 | url = get_realurl(v['file_api'], v['file_id']) 44 | info.streams[profile] = { 45 | 'container': v['type'], 46 | 'profile': profile, 47 | 'src': [url] 48 | } 49 | 50 | return info 51 | 52 | def prepare_list(self): 53 | html = get_content(self.url) 54 | return matchall(html, 'video_id: ([^,]+)') 55 | 56 | site = Sina() 57 | -------------------------------------------------------------------------------- /ykdl/extractors/singlemultimedia.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | # TODO: add more supported types and move to ykdl.util 7 | # REF: https://www.iana.org/assignments/media-types/media-types.xhtml 8 | 9 | contentTypes = { 10 | 'audio/basic': 'au', 11 | 'audio/mpeg': 'mp3', 12 | 'audio/x-aiff': 'aif', 13 | 'audio/x-pn-realaudio': 'ra', 14 | 'audio/x-wav': 'wav', 15 | 'video/3gpp': '3gp', 16 | 'video/3gpp2': '3p2', 17 | 'video/avi': 'avi', 18 | 'video/mp2t': 'ts', 19 | 'video/mp4': 'mp4', 20 | 'video/mpeg': 'mp2v', 21 | 'video/mpeg4': 'mp4', 22 | 'video/mpg': 'mpg', 23 | 'video/ogg': 'ogg', 24 | 'video/quicktime': 'mov', 25 | 'video/vnd.mpegurl': 'mxu', 26 | 'video/vnd.ms-playready.media.pyv': 'pyv', 27 | 'video/vnd.rn-realvideo': 'rv', 28 | 'video/vnd.uvvu.mp4': 'uvu', 29 | 'video/vnd.vivo': 'viv', 30 | 'video/webm': 'webm', 31 | 'video/x-f4v': 'f4v', 32 | 'video/x-fli': 'fli', 33 | 'video/x-flv': 'flv', 34 | 'video/x-ivf': 'IVF', 35 | 'video/x-sgi-movie': 'movie', 36 | 'video/x-m4v': 'm4v', 37 | 'video/x-mpeg': 'mpe', 38 | 'video/x-mpg': 'mpa', 39 | 'video/x-msvideo': 'avi', 40 | 'video/x-ms-asf': 'asf', 41 | 'video/x-ms-wm': 'wm', 42 | 'video/x-ms-wmv': 'wmv', 43 | 'video/x-ms-wmx': 'wmx', 44 | 'video/x-ms-wvx': 'wvx', 45 | 'application/x-mpegurl': 'm3u8', 46 | 'application/vnd.apple.mpegurl': 'm3u8', 47 | 'application/vnd.rn-realmedia': 'rm', 48 | 'application/vnd.rn-realmedia-secure': 'rms', 49 | 'application/vnd.rn-realmedia-vbr': 'rmvb', 50 | } 51 | 52 | extNames = { 53 | # video 54 | 'm2ts', 'mts', 'm2t', 'ts', 'mkv', 'avi', # contain 55 | 'mpeg', 'mpg', 'm1v', 'mpv', 'dat', # MPEG-1 56 | 'mpeg2', 'mpg2', 'm2v', 'mpv2', 'mp2v', 'vob', # MPEG-2 57 | 'mpeg4', 'mpg4', 'm4v', 'mp4', 'mp4v', # H.264/MPEG-4 AVC 58 | 'flv', 'f4v', # Flash Video # H.264/MPEG-4 AVC 59 | '3gpp', '3gp2', '3gp', '3g2', # H.264/MPEG-4 AVC 60 | 'h264', 'x264', '264', 'avc', # H.264/MPEG-4 AVC 61 | 'h265', 'x265', '265', 'hevc', # H.265/HEVC 62 | 'webm', # WebM 63 | 'ogv', # Ogg Media File 64 | 'rm', 'rmvb', # Real Video 65 | 'mov', 'hdmov', 'qt', # QuickTime 66 | 'asf', 'wmv', 'wm', # Windows Media Video 67 | # audio 68 | 'mpa', 'mp1', 'm1a', 'mp2', 'm2a', 'mp3', 'm4a', 69 | 'weba', 'f4a', 'ra', 'ogg', 'oga', 'wav', 'wma', 70 | 'flac', 'ape', 'mka', 'dts', 'aac', 'ac3', 'opus' 71 | # picture 72 | 'jpeg', 'jpe', 'jpg', 'jpc', 'jp2', 'j2k', 73 | 'tiff', 'bmp', 'png', 'gif', 'jbg', 'webp', 74 | # HLS 75 | 'm3u', 76 | *contentTypes.values() 77 | } 78 | 79 | class Multimedia(Extractor): 80 | name = 'Multimedia (多媒体文件)' 81 | 82 | def prepare(self): 83 | resinfo = get_head_response(url).info() 84 | # Get file type 85 | ext = self.url.split('?')[0].split('.')[-1] 86 | if ext not in extNames: 87 | ctype = resinfo.get('Content-Type', '').lower() 88 | if ctype.startswith('image/'): 89 | ext = ctype[6:] 90 | else: 91 | ext = contentTypes.get(ctype) 92 | assert ext in extNames, 'This link is not a Multimedia file!' 93 | 94 | # Get title 95 | title = resinfo.get_filename() 96 | if title is None: 97 | title = self.url.split('?')[0].split('/')[-1] 98 | if title.endswith('.' + ext): 99 | title = title[0 : -len(ext) - 1] 100 | 101 | info = MediaInfo(self.name) 102 | info.title = title 103 | if ext[:3] == 'm3u': 104 | info.streams = load_m3u8_playlist(self.url) 105 | else: 106 | info.streams['current'] = { 107 | 'container': ext, 108 | 'profile': 'current', 109 | 'src': [self.url], 110 | 'size': int(resinfo.get('Content-Length', 0)) 111 | } 112 | return info 113 | 114 | site = Multimedia() 115 | -------------------------------------------------------------------------------- /ykdl/extractors/sohu/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | def get_extractor(url): 7 | path_b64 = match1(url, 'tv.sohu.com/v/(\w+=*)') 8 | if path_b64: 9 | path = unb64(path_b64, urlsafe=True) 10 | if fullmatch(path, '[a-z]{2}/\d+/\d+\.shtml'): 11 | url = 'https://my.tv.sohu.com/' + path 12 | 13 | if 'my.tv.sohu.com' in url: 14 | from . import my as s 15 | return s.site, url 16 | else: 17 | from . import tv as s 18 | return s.site, url 19 | 20 | raise NotImplementedError(url) 21 | -------------------------------------------------------------------------------- /ykdl/extractors/sohu/my.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .sohubase import SohuBase 4 | 5 | 6 | class MySohu(SohuBase): 7 | name = '搜狐自媒体 (MySohu)' 8 | 9 | apiurl = 'http://my.tv.sohu.com/play/videonew.do' 10 | apiparams = { 11 | 'vid': '', 12 | 'referer': 'http://my.tv.sohu.com/' 13 | } 14 | 15 | site = MySohu() 16 | -------------------------------------------------------------------------------- /ykdl/extractors/sohu/sohubase.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | 5 | 6 | class SohuBase(Extractor): 7 | 8 | supported_stream_types = [ 9 | #'h2654kVid', 10 | #'h2654mVid', 11 | #'h265oriVid', 12 | #'h265superVid', 13 | #'h265highVid', 14 | #'h265norVid', 15 | 'h2644kVid', 16 | 'oriVid', 17 | 'superVid', 18 | 'highVid', 19 | 'norVid' 20 | ] 21 | types_2_id = { 22 | 'h2654kVid': '4K', 23 | 'h2654mVid': '4K', 24 | 'h2644kVid': '4K', 25 | 'h265oriVid': 'BD', 26 | 'h265superVid': 'TD', 27 | 'h265highVid': 'HD', 28 | 'h265norVid': 'SD', 29 | 'oriVid': 'BD', 30 | 'superVid': 'TD', 31 | 'highVid': 'HD', 32 | 'norVid': 'SD' 33 | } 34 | id_2_profile = { 35 | '4K': '4K', 36 | 'BD': '原画', 37 | 'TD': '超清', 38 | 'HD': '高清', 39 | 'SD': '标清' 40 | } 41 | 42 | def parser_info(self, info, data, stream, lvid, uid): 43 | if not 'allot' in data or lvid != data['id']: 44 | return 45 | stream_id = self.types_2_id[stream] 46 | stream_profile = self.id_2_profile[stream_id] 47 | host = data['allot'] 48 | data = data['data'] 49 | size = sum(map(int, data['clipsBytes'])) 50 | urls = [] 51 | assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) 52 | for new, ck, in zip(data['su'], data['ck']): 53 | if urlparse(new).netloc == '': 54 | url = get_response('https://{host}/ip'.format(**vars()), 55 | params={ 56 | 'ch': data['ch'], 57 | 'num': data['num'], 58 | 'new': new, 59 | 'key': ck, 60 | 'uid': uid, 61 | 'prod': 'h5n', 62 | 'pt': 1, 63 | 'pg': 2, 64 | }).json()['servers'][0]['url'] 65 | else: 66 | url = new 67 | urls.append(url) 68 | info.streams[stream_id] = { 69 | 'container': 'mp4', 70 | 'profile': stream_profile, 71 | 'src' : urls, 72 | 'size': size 73 | } 74 | 75 | def fetch_info(self, vid): 76 | self.apiparams['vid'] = vid 77 | return get_response(self.apiurl, params=self.apiparams).json() 78 | 79 | def prepare_mid(self): 80 | mid = match1(self.url, '\d/(\d+)\.s?html', 81 | r'\b[bv]?id=(\d+)', 82 | 'share_play.html#(\d+)_') 83 | if mid is None: 84 | html = get_content(self.url) 85 | mid = match1(html, r'\b[bv]id\s*[=:]\s*["\']?(\d+)', 86 | r'(?:&|\x26)[bv]?id=(\d+)' 87 | '/(\d+)/v\.swf') 88 | return mid 89 | 90 | def prepare(self): 91 | info = MediaInfo(self.name) 92 | # this is needless now, uid well be registered in the the following code 93 | #info.extra['header'] = 'Range: ' 94 | 95 | data = self.fetch_info(self.mid) 96 | assert data['status'] == 1, data 97 | 98 | # report 99 | now = time.time() 100 | uid = int(now * 1000) 101 | get_response('http://z.m.tv.sohu.com/h5_cc.gif', 102 | params={ 103 | 'vid': self.mid, 104 | 'url': self.url, 105 | 'refer': self.url, 106 | 't': int(now), 107 | 'uid': uid, 108 | #'nid': nid, 109 | #'pid': pid, 110 | #'screen': '1366x768', 111 | #'channeled': channeled, 112 | #'MTV_SRC': MTV_SRC, 113 | #'position': 'page_adbanner', 114 | #'op': 'click', 115 | #'details': '{}', 116 | #'os': 'linux', 117 | #'platform': 'linux', 118 | #'passport': '', 119 | }) 120 | 121 | _data = data['data'] 122 | info.title = _data['tvName'] 123 | for stream in self.supported_stream_types: 124 | lvid = _data.get(stream) 125 | if lvid == 0 or not lvid: 126 | continue 127 | if lvid != self.mid: 128 | data = self.fetch_info(lvid) 129 | self.parser_info(info, data, stream, lvid, uid) 130 | 131 | return info 132 | -------------------------------------------------------------------------------- /ykdl/extractors/sohu/tv.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .._common import * 4 | from .sohubase import SohuBase 5 | 6 | 7 | class TvSohu(SohuBase): 8 | name = '搜狐视频 (TvSohu)' 9 | 10 | apiurl = 'http://hot.vrs.sohu.com/vrs_flash.action' 11 | apiparams = {'vid': ''} 12 | 13 | def list_only(self): 14 | return bool(match(self.url, 'tv.sohu.com/s\d{4}/[a-z]')) 15 | 16 | def prepare_list(self): 17 | html = get_content(self.url) 18 | plid = match1(html, r'\bplaylistId\s*=\s*["\']?(\d+)') 19 | data = get_response('https://pl.hd.sohu.com/videolist', 20 | params={ 21 | 'playlistid': plid, 22 | 'order': 0, 23 | 'cnt': 1, 24 | 'withLookPoint': 1, 25 | 'preVideoRule': 3, 26 | 'ssl': 1, 27 | 'callback': '__get_videolist', 28 | '_': int(time.time() * 1000) 29 | }).json() 30 | mids = [str(v['vid']) for v in data['videos']] 31 | mid = not self.list_only and self.mid or None 32 | self.set_index(mid, mids) 33 | return mids 34 | 35 | site = TvSohu() 36 | -------------------------------------------------------------------------------- /ykdl/extractors/tudou.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | from .youku import Youku 5 | 6 | class Tudou(Youku): 7 | name = 'Tudou (土豆)' 8 | 9 | def prepare(self): 10 | if match1(self.url, '(new-play|video)\.tudou\.com/') is None: 11 | self.url = get_location(self.url) 12 | return Youku.prepare(self) 13 | 14 | site = Tudou() 15 | -------------------------------------------------------------------------------- /ykdl/extractors/weibo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class Weibo(Extractor): 7 | name = '微博 (Weibo)' 8 | 9 | quality_2_id = { 10 | '4': '4K', 11 | '2': '2K', 12 | '1080': 'BD', 13 | '720': 'TD', 14 | '480': 'HD', 15 | '360': 'SD' 16 | } 17 | 18 | def prepare_mid(self): 19 | patterns = '(?:object|f)_?id"?\s*[=:]\s*"?(\d{4}:(?:\d{16}|\w{32}))\W', \ 20 | 'media_id=(\d{16}|\w{32})' 21 | mid = match1(self.url, '\D(\d{4}:(?:\d{16}|\w{32}))(?:\W|$)', 22 | *patterns) 23 | if mid: 24 | return mid 25 | 26 | rurl = get_location(self.url) 27 | page = match1(rurl, 'https?://[^/]+(/\d+/\w+)') 28 | if page is None or match1(page, '/(\d+)$'): 29 | html = get_content(rurl.replace('//weibo.', '//hk.weibo.') 30 | .replace('/user/', '/')) 31 | mid = match1(html, *patterns) 32 | if mid: 33 | return mid 34 | page = match1(html, '"og:url".+weibo.com(/\d+/\w+)') 35 | 36 | assert page, 'can not find any video!!!' 37 | self.url = 'https://weibo.com' + page 38 | 39 | html = get_content(self.url) 40 | return match1(html, *patterns) 41 | 42 | def prepare(self): 43 | if 'passport.weibo' in self.url: 44 | url = parse_qs(self.url.split('?', 1)[-1]).get('url') 45 | assert url, 'lost the url param in a link of "passport.weibo"' 46 | self.url = url[0] 47 | 48 | info = MediaInfo(self.name) 49 | add_header('User-Agent', 'Baiduspider') 50 | 51 | if '.weibocdn.com' not in self.url: 52 | rurl = get_location(self.url) 53 | assert '/sorry?' not in rurl, 'can not find any video!!!' 54 | 55 | def append_stream(stream_profile, stream_quality, url): 56 | stream_id = self.quality_2_id[stream_quality] 57 | info.streams[stream_id] = { 58 | 'container': 'mp4', 59 | 'profile': stream_profile, 60 | 'src': [url] 61 | } 62 | 63 | try: 64 | self.mid 65 | except AssertionError: 66 | html = get_content(self.url) 67 | streams = match1(html, 'quality_label_list=([^"]+)').split('&')[0] 68 | if streams: 69 | streams = json.loads(unquote(streams)) 70 | for stream in streams: 71 | stream_quality = stream['quality_label'].upper() 72 | stream_profile = stream['quality_desc'] + ' ' + stream_quality 73 | stream_quality = match1(stream_quality, '(\d+)') 74 | append_stream(stream_profile, stream_quality, stream['url']) 75 | else: 76 | url = match1(html, 'action-data="[^"]+?&video_src=([^"&]+)') 77 | if url: 78 | info.streams['current'] = { 79 | 'container': 'mp4', 80 | 'profile': 'current', 81 | 'src': [unquote(url)] 82 | } 83 | if info.streams: 84 | info.title = match1(html, '<meta content="([^"]+)" name="description"').split('\n')[0] 85 | info.artist = match1(html, '<meta content="([^"]+)" name="keywords"').split(',')[0] 86 | i = info.title.find('】') + 1 87 | if i: 88 | info.title = info.title[:i] 89 | return info 90 | 91 | if ':' not in self.mid: 92 | self.mid = '1034:' + self.mid # oid, the prefix is not necessary and would not be checked 93 | vdata = get_response('https://weibo.com/tv/api/component', 94 | headers={ 95 | 'Referer': 'https://weibo.com/tv/show/' + self.mid 96 | }, 97 | data={ 98 | 'data': json.dumps({ 99 | 'Component_Play_Playinfo': {'oid': self.mid} 100 | }) 101 | }).json()['data']['Component_Play_Playinfo'] 102 | 103 | for stream_profile, url in vdata['urls'].items(): 104 | if url: 105 | stream_quality = match1(stream_profile, '(\d+)') 106 | append_stream(stream_profile, stream_quality, 'https:' + url) 107 | 108 | info.title = vdata['title'] 109 | info.artist = vdata['author'] 110 | info.duration = vdata['duration'] 111 | info.add_comment(vdata['text']) 112 | return info 113 | 114 | site = Weibo() 115 | -------------------------------------------------------------------------------- /ykdl/extractors/yinyuetai.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class YinYueTai(Extractor): 7 | name = '音悦台 (YinYueTai)' 8 | 9 | def prepare_mid(self): 10 | return match1(self.url,'\Wid=(\d+)') 11 | 12 | def prepare(self): 13 | info = MediaInfo(self.name) 14 | info.extra.referer = 'https://www.yinyuetai.com/' 15 | 16 | data = get_response('https://data.yinyuetai.com/video/getVideoInfo', 17 | params={'id': self.mid}).json() 18 | assert not data['delFlag'], 'MTV has been deleted!' 19 | 20 | info.title = data['videoName'] 21 | info.artist = data['artistName'] 22 | 23 | url = data['videoUrl'] 24 | info.streams['current'] = { 25 | 'container': url_info(url)[1], 26 | 'profile': 'current', 27 | 'src': [url] 28 | } 29 | 30 | return info 31 | 32 | site = YinYueTai() 33 | -------------------------------------------------------------------------------- /ykdl/extractors/yizhibo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class Yizhibo(Extractor): 7 | name = 'Yizhibo (一直播)' 8 | 9 | def prepare_mid(self): 10 | return self.url[self.url.rfind('/')+1:].split('.')[0] 11 | 12 | def prepare(self): 13 | info = MediaInfo(self.name) 14 | info.live = True 15 | 16 | data = get_response( 17 | 'http://www.yizhibo.com/live/h5api/get_basic_live_info', 18 | params={'scid': self.mid}).json() 19 | assert content['result'] == 1, 'Error : ' + data['result'] 20 | data = data['data'] 21 | 22 | info.title = data['live_title'] 23 | info.artist = data['nickname'] 24 | info.streams['current'] = { 25 | 'container': 'm3u8', 26 | 'profile': 'current', 27 | 'src' : [data['play_url']], 28 | 'size': Infinity 29 | } 30 | return info 31 | 32 | site = Yizhibo() 33 | -------------------------------------------------------------------------------- /ykdl/extractors/zhangyu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class ZYLive(SimpleExtractor): 7 | name = 'ZhangYu Live (章鱼直播)' 8 | 9 | def init(self): 10 | self.headers['User-Agent'] = ( 11 | 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) ' 12 | 'AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 ' 13 | 'Mobile/9A334 Safari/7534.48.3') 14 | self.live = True 15 | self.title_pattern = '<title>([^<]+)' 16 | self.url_pattern = "<video _src='([^']+)" 17 | self.artist_pattern = 'videoTitle = "([^"]+)' 18 | 19 | site = ZYLive() 20 | -------------------------------------------------------------------------------- /ykdl/extractors/zhanqi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class Zhanqi(Extractor): 7 | # live is down, all are playback 8 | name = '战旗 (zhanqi)' 9 | 10 | def prepare(self): 11 | info = MediaInfo(self.name) 12 | install_cookie() 13 | 14 | html = get_content(self.url) 15 | data = json.loads(match1(html, 'oPageConfig.oVideo = ({.+?});', 16 | 'oPageConfig.oRoom = ({.+?});')) 17 | info.title = data['title'] 18 | info.artist = data['nickname'] 19 | if data.get('protocol') == 'hls': 20 | info.streams = load_m3u8_playlist(data['playUrl']) 21 | return info 22 | 23 | vid = data['videoId'] 24 | gid = get_response('https://www.zhanqi.tv/api/public/room.viewer', 25 | params={'uid': data['uid']} 26 | ).json()['data']['gid'] 27 | chain_key = get_response( 28 | 'https://www.zhanqi.tv/api/public/burglar/chain', 29 | data={ 30 | 'stream': vid + '.flv', 31 | 'cdnKey': 202, 32 | 'platform': 128 33 | }).json()['data']['key'] 34 | pn = str(int(time.time() * 1e6))[-11:] 35 | cdn_host = random.choice(get_response( 36 | 'https://umc.danuoyi.alicdn.com/dns_resolve_https', 37 | params={ 38 | 'app': 'zqlive', 39 | 'host_key': 'alhdl-cdn.zhanqi.tv', 40 | 'stream': vid, 41 | 'playNum': pn, 42 | 'protocol': 'hdl', 43 | #'client_ip': '', 44 | 'gId': gid, 45 | 'platform': 128 46 | }).json()['redirect_domain']) 47 | 48 | # valid stream suffix: 1080p 720p 408p 360p 49 | url = ('https://{cdn_host}/alhdl-cdn.zhanqi.tv/zqlive/' 50 | '{vid}.flv?{chain_key}&'.format(**vars()) 51 | + urlencode({ 52 | 'playNum': '{pn}', 53 | 'gId': gid, 54 | 'ipFrom': 1, 55 | 'clientIp': '', 56 | 'fhost': 'h5', 57 | 'platform': 128 58 | })) 59 | info.streams['current'] = { 60 | 'container': 'flv', 61 | 'profile': 'current', 62 | 'src': [url] 63 | } 64 | return info 65 | 66 | site = Zhanqi() 67 | -------------------------------------------------------------------------------- /ykdl/extractors/zhuafan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ._common import * 4 | 5 | 6 | class JustFunLive(Extractor): 7 | name = '抓饭直播 (JustFun Live)' 8 | 9 | def prepare_mid(self): 10 | return match1(self.url, 'live/(\d+)') 11 | 12 | def prepare(self): 13 | info = MediaInfo(self.name, True) 14 | 15 | try: 16 | data = get_response( 17 | 'https://www.zhuafan.tech/live-channel-info/channel/v2/info', 18 | params={ 19 | 'cid': self.mid, 20 | 'decrypt': 1 21 | }).json() 22 | except: 23 | html = get_content(self.url) 24 | data = match1(html, 'window\.__INITIAL_STATE__ = ({.+})</script>') 25 | self.logger.debug('data:\n%s', data) 26 | data = json.loads(data)['channel'] 27 | 28 | assert data['playStatusCode'] == 0, data['playStatusCodeDesc'] 29 | 30 | info.artist = data['uname'] 31 | info.title = data['cname'] 32 | 33 | info.streams['OG-FLV'] = { 34 | 'container': 'flv', 35 | 'profile': 'current', 36 | 'src' : [data['httpsPlayInfo']], 37 | 'size': Infinity 38 | } 39 | info.streams['OG-HLS'] = { 40 | 'container': 'm3u8', 41 | 'profile': 'current', 42 | 'src' : [data['hlsPlayInfo']], 43 | 'size': Infinity 44 | } 45 | 46 | return info 47 | 48 | site = JustFunLive() 49 | -------------------------------------------------------------------------------- /ykdl/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LifeActor/ykdl/44cba0d088787a4c9aed62bba1ff84c7e4066c15/ykdl/util/__init__.py -------------------------------------------------------------------------------- /ykdl/util/fs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | import platform 5 | from .wrap import hash 6 | 7 | 8 | if sys.platform.startswith(('msys', 'cygwin')): 9 | system = 'Windows' 10 | else: 11 | system = platform.system() 12 | 13 | translate_table = None 14 | translate_table_cs = None 15 | 16 | def _ensure_translate_table(): 17 | global translate_table, translate_table_cs 18 | if translate_table is None: 19 | ### Visible ### 20 | # Control characters 21 | # Delete them except Tab and Newline 22 | translate_table = dict.fromkeys((*range(0x20), *range(0x7F, 0xA0))) 23 | translate_table.update({ 24 | ord('\t'): ' ', 25 | ord('\n'): '-', 26 | }) 27 | 28 | # Unicode Category Separator characters 29 | # Convert to Space 30 | translate_table.update(dict.fromkeys(( 31 | # Generate: 32 | # import sys 33 | # from unicodedata import category 34 | # ', '.join((f'0x{u:X}' 35 | # for u in range(0x20, sys.maxunicode) 36 | # if category(chr(u))[0] == 'Z')) 37 | 0x20, 0xA0, 38 | 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 39 | 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 40 | 0x2028, 0x2029, 0x202F, 0x205F, 0x3000 41 | ), ' ')) 42 | 43 | translate_table_cs = translate_table.copy() 44 | 45 | ### Legality ### 46 | translate_table.update({ 47 | ord('/'): '/', # File path component separator 48 | }) 49 | if system == 'Windows': 50 | # FAT12 / FAT16 / FAT32 (VFAT LFNs) 51 | # exFAT 52 | # NTFS / ReFS (Win32 namespace) 53 | translate_table.update({ 54 | ord('\\'): '\', 55 | ord(':'): '꞉', 56 | ord('*'): '∗', 57 | ord('?'): '‽', 58 | ord('"'): '″', 59 | ord('<'): '<', 60 | ord('>'): '>', 61 | ord('|'): '¦', 62 | }) 63 | elif system == 'Darwin': 64 | # HFS+ except longstanding cases 65 | if int(platform.release().split('.')[0]) < 17: 66 | translate_table.update({ 67 | ord(':'): '꞉', 68 | }) 69 | 70 | def legitimize(text, compress='', strip='', trim=82): 71 | '''Converts a string to a valid filename. 72 | Also see `help(compress_strip)`. 73 | ''' 74 | _ensure_translate_table() 75 | text = text.translate(translate_table) 76 | text = compress_strip(text, compress, strip, True) 77 | 78 | assert text, 'the given filename could not be legalized!' 79 | 80 | result = text[:trim] 81 | overflow = text[trim:] 82 | if overflow: 83 | crc = hash.crc32(overflow) 84 | result += '_{crc}'.format(**vars()) 85 | return result 86 | 87 | def compress_strip(text, compress='', strip='', translated=False): 88 | '''Compress same characters, and then strip. 89 | Dot, Minus, Underline and whole characters of Unicode Category Separator 90 | will always be compressed and stripped. 91 | ''' 92 | if not translated: 93 | _ensure_translate_table() 94 | text = text.translate(translate_table_cs) 95 | 96 | compress = set(c for c in compress + '.-_ ') 97 | chars = [] 98 | last_char = None 99 | for char in text: 100 | if not (char is last_char and char in compress): 101 | chars.append(char) 102 | last_char = char 103 | return ''.join(chars).strip(strip + '.-_ ') 104 | -------------------------------------------------------------------------------- /ykdl/util/kt_player.py: -------------------------------------------------------------------------------- 1 | '''Parse & decrypto license code for KVS Player.''' 2 | # https://www.kernel-scripts.com/en/documentation/player/ 3 | 4 | from .match import * 5 | from ..mediainfo import MediaStreams 6 | 7 | import time 8 | 9 | 10 | __all__ = ['get_kt_playlist', 'get_kt_media'] 11 | 12 | profile_2_id = { 13 | '2160P': '2K', 14 | '1080P': 'BD', 15 | '720P': 'TD', 16 | '480P': 'HD', 17 | '360P': 'SD', 18 | '240P': 'LD' 19 | } 20 | 21 | def get_license(html): 22 | license = match1(html, '''license_code: ['"]\$(\d{15})['"]''') 23 | if license is None: 24 | return 25 | mod = license.replace('0', '1') 26 | mod = str(4 * abs(int(mod[:8]) - int(mod[-8:]))) 27 | nlicense = [] 28 | plicense = [] 29 | for o in range(8): 30 | for i in range(4): 31 | nlicense.append((int(license[o + i]) + int(mod[o])) % 10) 32 | n = sum(nlicense) 33 | for i, l in enumerate(nlicense): 34 | plicense.append((i, (n + i) % 32)) 35 | n -= l 36 | plicense.reverse() 37 | return plicense 38 | 39 | def decrypto(url, license): 40 | l1 = match1(url, '/([\da-f]{42})/') 41 | l2 = list(l1) 42 | for i, l in license: 43 | l2[i], l2[l] = l2[l], l2[i] 44 | l2 = ''.join(l2) 45 | return url.replace(l1, l2) 46 | 47 | def get_urls(html): 48 | rnd = int((time.time() - 10) * 1e3) 49 | license = get_license(html) 50 | if license is None: 51 | return 52 | for url in matchall(html, '''function/0/(http[^'"]+)/?['"]'''): 53 | url = decrypto(url, license) 54 | if '?' in url: 55 | yield f'{url}&rnd={rnd}' 56 | else: 57 | yield f'{url}?rnd={rnd}' 58 | 59 | def get_kt_playlist(html): 60 | streams = MediaStreams() 61 | for url in get_urls(html): 62 | stream_profile = (match1(url, '_(\d+)p?.mp4') or '480') + 'P' 63 | stream = profile_2_id[stream_profile] 64 | streams[stream] = { 65 | 'container': 'mp4', 66 | 'profile': stream_profile, 67 | 'src': [url] 68 | } 69 | return streams 70 | 71 | def get_kt_media(html): 72 | return get_kt_playlist(html)[0]['src'] 73 | -------------------------------------------------------------------------------- /ykdl/util/log.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | 3 | IS_ANSI_TERMINAL = os.getenv('TERM', '').startswith(( 4 | 'eterm-color', 5 | 'linux', 6 | 'screen', 7 | 'vt100', 8 | 'xterm' 9 | )) 10 | 11 | if not IS_ANSI_TERMINAL and os.name == 'nt': 12 | try: 13 | import colorama 14 | except ImportError: 15 | pass 16 | else: 17 | colorama.init() 18 | IS_ANSI_TERMINAL = True 19 | 20 | # ANSI escape code 21 | # REF: https://en.wikipedia.org/wiki/ANSI_escape_code 22 | RESET = 0 23 | BOLD = 1 24 | UNDERLINE = 4 25 | NEGATIVE = 7 26 | NO_BOLD = 21 27 | NO_UNDERLINE = 24 28 | POSITIVE = 27 29 | BLACK = 30 30 | RED = 31 31 | GREEN = 32 32 | YELLOW = 33 33 | BLUE = 34 34 | MAGENTA = 35 35 | CYAN = 36 36 | LIGHT_GRAY = 37 37 | DEFAULT = 39 38 | BLACK_BACKGROUND = 40 39 | RED_BACKGROUND = 41 40 | GREEN_BACKGROUND = 42 41 | YELLOW_BACKGROUND = 43 42 | BLUE_BACKGROUND = 44 43 | MAGENTA_BACKGROUND = 45 44 | CYAN_BACKGROUND = 46 45 | LIGHT_GRAY_BACKGROUND = 47 46 | DEFAULT_BACKGROUND = 49 47 | DARK_GRAY = 90 # xterm 48 | LIGHT_RED = 91 # xterm 49 | LIGHT_GREEN = 92 # xterm 50 | LIGHT_YELLOW = 93 # xterm 51 | LIGHT_BLUE = 94 # xterm 52 | LIGHT_MAGENTA = 95 # xterm 53 | LIGHT_CYAN = 96 # xterm 54 | WHITE = 97 # xterm 55 | DARK_GRAY_BACKGROUND = 100 # xterm 56 | LIGHT_RED_BACKGROUND = 101 # xterm 57 | LIGHT_GREEN_BACKGROUND = 102 # xterm 58 | LIGHT_YELLOW_BACKGROUND = 103 # xterm 59 | LIGHT_BLUE_BACKGROUND = 104 # xterm 60 | LIGHT_MAGENTA_BACKGROUND = 105 # xterm 61 | LIGHT_CYAN_BACKGROUND = 106 # xterm 62 | WHITE_BACKGROUND = 107 # xterm 63 | 64 | def sprint(text, *colors): 65 | '''Format text with color or other effects into ANSI escaped string.''' 66 | if IS_ANSI_TERMINAL and colors: 67 | color = ';'.join(map(str, colors)) 68 | return '\33[{color}m{text}\33[0m'.format(**vars()) 69 | return text 70 | 71 | import logging 72 | 73 | _LOG_COLOR_MAP_ = { 74 | logging.CRITICAL : '31;1', 75 | logging.ERROR : RED, 76 | logging.WARNING : YELLOW, 77 | logging.INFO : LIGHT_GRAY, 78 | logging.DEBUG : GREEN, 79 | logging.NOTSET : DEFAULT 80 | } 81 | 82 | _colorFormatter = logging.Formatter('\33[%(color)sm%(levelname)s:%(name)s:%(message)s\33[0m') 83 | 84 | class ColorHandler(logging.StreamHandler): 85 | def __init__(self): 86 | logging.StreamHandler.__init__(self) 87 | if IS_ANSI_TERMINAL: 88 | self.formatter = _colorFormatter 89 | 90 | def format(self, recode): 91 | if IS_ANSI_TERMINAL: 92 | recode.color = _LOG_COLOR_MAP_[recode.levelno] 93 | return logging.StreamHandler.format(self, recode) 94 | -------------------------------------------------------------------------------- /ykdl/util/m3u8.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | from ..mediainfo import MediaStreams 4 | from .http import get_response 5 | from .human import format_vps 6 | 7 | logger = getLogger(__name__) 8 | 9 | 10 | __all__ = ['live_m3u8', 'crypto_m3u8', 'load_m3u8_playlist', 'load_m3u8'] 11 | 12 | 13 | def live_error(): 14 | raise NotImplementedError( 15 | 'Internal live m3u8 parser and downloader had not ' 16 | 'be implementated! Please use FFmpeg instead.') 17 | 18 | def load_live_m3u8(url): 19 | live_error() 20 | 21 | def live_m3u8_lenth(): 22 | live_error() 23 | 24 | import m3u8 25 | from urllib.parse import urljoin 26 | 27 | class HTTPClient(): 28 | hkwargs = {} 29 | def download(self, uri, timeout=None, headers={}, *args, **kwargs): 30 | # live is disabled, results can be cached safely 31 | response = get_response(uri, headers, cache=True, **self.hkwargs) 32 | return response.text, urljoin(response.url, '.') 33 | 34 | def _load(uri, **kwargs): 35 | '''Support keyword arguments from m3u8.load(). 36 | Argument "hkwargs" pass on a keyword arguments dict to .http.get_response(). 37 | ''' 38 | http_client = kwargs.get('http_client') or HTTPClient 39 | hkwargs = kwargs.pop('hkwargs', None) 40 | if isinstance(http_client, type): 41 | http_client = http_client() 42 | if isinstance(http_client, HTTPClient) and hkwargs: 43 | headers = hkwargs.pop('headers', None) 44 | if headers: 45 | if 'headers' in kwargs: 46 | kwargs['headers'].update(headers) 47 | else: 48 | kwargs['headers'] = headers 49 | http_client.hkwargs = hkwargs 50 | kwargs['http_client'] = http_client 51 | return m3u8.load(uri, **kwargs) 52 | 53 | def live_m3u8(url, **kwargs): 54 | '''Params: as same as _load().''' 55 | m = _load(url, **kwargs) 56 | ll = m.playlists or m.iframe_playlists 57 | if ll: 58 | m = _load(ll[0].absolute_uri, **kwargs) 59 | return not (m.is_endlist or m.playlist_type == 'VOD') 60 | 61 | def crypto_m3u8(url, **kwargs): 62 | '''Params: as same as _load().''' 63 | m = _load(url, **kwargs) 64 | for k in m.keys: 65 | try: 66 | assert not k.uri.startswith('skd:'), 'Unsupported FairPlay Streaming' 67 | except AttributeError: 68 | pass 69 | return any(m.keys + m.session_keys) # ignore method NONE 70 | 71 | def _get_stream_info(l, name): 72 | return getattr(getattr(l, 'stream_info', 73 | getattr(l, 'iframe_stream_info', None)), 74 | name) 75 | 76 | def load_m3u8_playlist(url, **kwargs): 77 | '''Params: as same as _load().''' 78 | 79 | def append_stream(stream_id, stream_profile, urls): 80 | streams[stream_id] = { 81 | 'container': 'm3u8', 82 | 'profile': stream_profile, 83 | 'src': urls 84 | } 85 | 86 | streams = MediaStreams() 87 | m = _load(url, **kwargs) 88 | ll = m.playlists or m.iframe_playlists 89 | if ll: 90 | for l in ll: 91 | resolution = _get_stream_info(l, 'resolution') 92 | if resolution: 93 | append_stream(*format_vps(*resolution), [l.absolute_uri]) 94 | else: 95 | bandwidth = str(_get_stream_info(l, 'bandwidth')) 96 | append_stream(bandwidth, bandwidth, [l.absolute_uri]) 97 | else: 98 | append_stream('current','current', [url]) 99 | return streams 100 | 101 | def load_m3u8(url, **kwargs): 102 | '''Params: as same as _load().''' 103 | 104 | def load_media(l=None, m=None): 105 | urls = [] 106 | if l: 107 | m = _load(l.absolute_uri, **kwargs) 108 | if m: 109 | for seg in m.segments: 110 | urls.append(seg.absolute_uri) 111 | return urls 112 | 113 | if live_m3u8(url, **kwargs): 114 | live_error() 115 | m = _load(url, **kwargs) 116 | ll = m.playlists or m.iframe_playlists 117 | if ll: 118 | ll.sort(key=lambda l: _get_stream_info(l, 'bandwidth')) 119 | l = ll[-1] 120 | media = {e.type: e for e in getattr(l, 'media', [])} 121 | urls = load_media(l=l) 122 | else: 123 | media = {} 124 | urls = load_media(m=m) 125 | audio = load_media(media.get('AUDIO')) 126 | subtitle = load_media(media.get('SUBTITLES')) 127 | if audio and urls[0] == audio[0]: 128 | audio.clear() 129 | return urls, audio, subtitle 130 | -------------------------------------------------------------------------------- /ykdl/util/match.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | __all__ = ['match', 'fullmatch', 'match1', 'matchm', 'matchall'] 5 | 6 | def _format_str(pattern, string): 7 | '''Format the target which will be scanned, makes the worker happy.''' 8 | strtype = type(pattern) 9 | if not isinstance(string, strtype): 10 | try: 11 | string = strtype(string, 'utf-8') 12 | except TypeError: 13 | if isinstance(string, bytearray): 14 | string = bytes(string) 15 | else: 16 | for n in ('getvalue', 'tobytes', 'read', 'encode', 'decode'): 17 | f = getattr(string, n, None) 18 | if f: 19 | try: 20 | string = f() 21 | break 22 | except: 23 | pass 24 | if not isinstance(string, (str, bytes)): 25 | try: 26 | if isinstance(string, int): # defense memory burst 27 | raise 28 | string = strtype(string) 29 | except: 30 | string = str(string) 31 | if not isinstance(string, strtype): 32 | string = strtype(string, 'utf-8') 33 | return string 34 | 35 | def match(obj, *patterns): 36 | '''Scans a object for matched some patterns with capture mode (matches first). 37 | 38 | Params: 39 | `obj`, any object which contains string data. 40 | `patterns`, arbitrary number of regex patterns. 41 | 42 | Returns the first Match object, or None. 43 | ''' 44 | for pattern in patterns: 45 | string = _format_str(pattern, obj) 46 | m = re.search(pattern, string) 47 | if m: 48 | return m 49 | return None 50 | 51 | def fullmatch(obj, *patterns): 52 | '''Scans a object for fully matched some patterns (matches first). 53 | 54 | Params: same as match() 55 | 56 | Returns the match string, or None. 57 | ''' 58 | for pattern in patterns: 59 | string = _format_str(pattern, obj) 60 | m = re.fullmatch(pattern, string) 61 | if m: 62 | return m.string 63 | return None 64 | 65 | def match1(obj, *patterns): 66 | '''Scans a object for matched some patterns with capture mode. 67 | 68 | Params: same as match() 69 | 70 | Returns the first captured substring, or None. 71 | ''' 72 | m = match(obj, *patterns) 73 | return m and m.groups()[0] 74 | 75 | def matchm(obj, *patterns): 76 | '''Scans a object for matched some patterns with capture mode. 77 | 78 | Params: same as match() 79 | 80 | Returns all captured substrings of the first Match object, or same number of 81 | None objects. 82 | ''' 83 | m = match(obj, *patterns) 84 | return m and m.groups() or (None,) * re.compile(patterns[0]).groups 85 | 86 | 87 | def matchall(obj, *patterns): 88 | '''Scans a object for matched some patterns with capture mode. 89 | 90 | Params: same as match() 91 | 92 | Returns a list of all the captured substring of matches, or a empty list. 93 | If a conformity form of captures in the list has be excepted, all the regex 94 | patterns MUST include a similar capture mode. 95 | ''' 96 | ret = [] 97 | for pattern in patterns: 98 | string = _format_str(pattern, obj) 99 | m = re.findall(pattern, string) 100 | ret += m 101 | 102 | return ret 103 | -------------------------------------------------------------------------------- /ykdl/util/xml2dict.py: -------------------------------------------------------------------------------- 1 | '''A simple XML document parser which used builtin expat, output a dict with 2 | Python data type, likes json package.''' 3 | 4 | from logging import getLogger 5 | 6 | 7 | logger = getLogger(__name__) 8 | 9 | _cdict = { # special objects 10 | 'true': True, 11 | 'false': False, 12 | 'NaN': float('nan'), 13 | 'INF': float('inf'), 14 | '-INF': float('-inf') 15 | } 16 | xml_schema_instance = 'http://www.w3.org/2001/XMLSchema-instance' 17 | 18 | def _convert(text): 19 | if text in _cdict: 20 | return _cdict[text] 21 | if text.isdecimal(): 22 | return int(text) 23 | if text.count('e') == 1 or text.count('.') == 1: 24 | try: 25 | f = float(text) 26 | except ValueError: 27 | pass 28 | else: 29 | if text.count('e') and not f % 1: 30 | return int(f) # e.g. 2.2e2 => 220 31 | return f 32 | return text 33 | 34 | def _get1(l): 35 | # unpack standalone element from list 36 | if isinstance(l, list) and len(l) == 1: 37 | o = l[0] 38 | if not isinstance(o, dict): # contribute to compatibility 39 | return o 40 | return l 41 | 42 | def xml2dict(source): 43 | '''Convert giving XML document to a dict object.''' 44 | from xml.parsers import expat 45 | # don't expand namespace, handle them ourself 46 | parser = expat.ParserCreate(namespace_separator=None) 47 | parser.buffer_text = True 48 | root = {'#text': []} 49 | xml = { # default properties 50 | 'version': '1.0', 51 | 'encoding': 'UTF-8', 52 | 'standalone': -1, 53 | 'rootname': 'root', 54 | 'root': root, 55 | } 56 | parent_nodes = [] 57 | isCDATA = False 58 | 59 | def default(data): 60 | if data.strip(): 61 | logger.debug('Unhandled XML data: %r', data) 62 | 63 | def startXML(version, encoding, standalone): 64 | xml['version'] = version 65 | xml['encoding'] = encoding 66 | xml['standalone'] = standalone 67 | 68 | def getNSPrefix(ns): 69 | nodes = parent_nodes.copy() 70 | while nodes: 71 | xmlns = nodes.pop().get('@xmlns') 72 | if ns in xmlns: 73 | return xmlns[ns] 74 | 75 | def sortAttributes(attributes): 76 | if not attributes: 77 | return {} 78 | xmlns = {} 79 | attrs = {} 80 | for k, v in attributes.items(): 81 | ks = k.split(':', 1) 82 | if ks[0] == 'xmlns': 83 | if len(ks) == 2 : 84 | k = ks[1] 85 | assert k, 'Missing namespace declaration prefix!' 86 | else: 87 | k = '' 88 | xmlns[k] = v 89 | else: 90 | attrs['@' + k] = _convert(v) 91 | if xmlns: 92 | attrs['@xmlns'] = xmlns 93 | return attrs 94 | 95 | def startRoot(name, attributes): 96 | xml['rootname'] = name 97 | parent_nodes.append(root) 98 | if attributes: 99 | root.update(sortAttributes(attributes)) 100 | 101 | def startElement(name, attributes): 102 | if not parent_nodes: 103 | return startRoot(name, attributes) 104 | node = sortAttributes(attributes) 105 | node['#text'] = [] 106 | parent_node = parent_nodes[-1] 107 | if name not in parent_node: 108 | parent_node[name] = [] 109 | parent_node[name].append(node) 110 | parent_nodes.append(node) 111 | 112 | def startCDATA(): 113 | nonlocal isCDATA 114 | isCDATA = True 115 | 116 | def endCDATA(): # void handle to skip default 117 | pass 118 | 119 | def characters(data): 120 | data = data.strip() 121 | if data: 122 | parent_nodes[-1]['#text'].append(data) 123 | 124 | def endElement(name): 125 | 126 | def replaceNode(data): 127 | parent_node = parent_nodes[-1][name] 128 | assert parent_node.pop() is node, 'Unkown error during endElement()' 129 | parent_node.append(data) 130 | 131 | nonlocal isCDATA 132 | node = parent_nodes.pop() 133 | if node.get('@xsi:nil') in (True, 1) and \ 134 | getNSPrefix('xsi') == xml_schema_instance: 135 | replaceNode(None) 136 | else: 137 | text = node.pop('#text') 138 | if node: 139 | node.update({k: _get1(v) for k, v in node.items()}) 140 | if text: 141 | name = parent_nodes and name or None 142 | if name and not node and len(text) == 1 and not isCDATA: 143 | data = _convert(text[0]) # no attributes & sub-elements 144 | else: 145 | data = '\n'.join(text) # prefer a string than a list 146 | if name and not node: 147 | replaceNode(data) # no attributes & sub-elements 148 | else: 149 | node['#text'] = data 150 | elif not node: 151 | replaceNode('') # placeholder use to keep the structure 152 | isCDATA = False # ends here, not CDATA's end 153 | 154 | parser.DefaultHandler = default 155 | parser.XmlDeclHandler = startXML 156 | parser.StartElementHandler = startElement 157 | parser.EndElementHandler = endElement 158 | parser.StartCdataSectionHandler = startCDATA 159 | parser.EndCdataSectionHandler = endCDATA 160 | parser.CharacterDataHandler = characters 161 | 162 | if isinstance(source, str): 163 | parser.Parse(source, True) 164 | elif hasattr(source, 'read'): 165 | parser.ParseFile(source) 166 | else: 167 | for s in source: 168 | parser.Parse(s, False) 169 | parser.Parse(type(s)(), True) 170 | return xml 171 | -------------------------------------------------------------------------------- /ykdl/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.8.3.dev' 2 | --------------------------------------------------------------------------------