├── Mooc ├── Icourse163 │ ├── __init__.py │ ├── Icourse163_Config.py │ ├── Icourse163_Base.py │ └── Icourse163_Mooc.py ├── Icourses │ ├── __init__.py │ ├── Icourse_Config.py │ ├── Icourse_Base.py │ ├── Icourse_Cuoc.py │ └── Icourse_Mooc.py ├── __main__.py ├── Mooc.ico ├── Alipay.jpg ├── aria2c.exe ├── Mooc_Main.py ├── __init__.py ├── Mooc_Potplayer.py ├── Mooc_Config.py ├── Mooc_Request.py ├── Mooc_Download.py ├── Mooc_Interface.py └── Mooc_Base.py ├── 图片 ├── copy1.png ├── copy2.png ├── demo1.png ├── demo2.png ├── help1.png ├── QQ_group.jpg └── package.png ├── .gitignore ├── Mooc.spec ├── README.md └── HELP.md /Mooc/Icourse163/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mooc/Icourses/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Mooc/__main__.py: -------------------------------------------------------------------------------- 1 | from Mooc.Mooc_Main import main 2 | 3 | main() 4 | -------------------------------------------------------------------------------- /Mooc/Mooc.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyJun/Mooc_Downloader/HEAD/Mooc/Mooc.ico -------------------------------------------------------------------------------- /图片/copy1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyJun/Mooc_Downloader/HEAD/图片/copy1.png -------------------------------------------------------------------------------- /图片/copy2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyJun/Mooc_Downloader/HEAD/图片/copy2.png -------------------------------------------------------------------------------- /图片/demo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyJun/Mooc_Downloader/HEAD/图片/demo1.png -------------------------------------------------------------------------------- /图片/demo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyJun/Mooc_Downloader/HEAD/图片/demo2.png -------------------------------------------------------------------------------- /图片/help1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyJun/Mooc_Downloader/HEAD/图片/help1.png -------------------------------------------------------------------------------- /Mooc/Alipay.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyJun/Mooc_Downloader/HEAD/Mooc/Alipay.jpg -------------------------------------------------------------------------------- /Mooc/aria2c.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyJun/Mooc_Downloader/HEAD/Mooc/aria2c.exe -------------------------------------------------------------------------------- /图片/QQ_group.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyJun/Mooc_Downloader/HEAD/图片/QQ_group.jpg -------------------------------------------------------------------------------- /图片/package.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PyJun/Mooc_Downloader/HEAD/图片/package.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | Mooc/__pycache__ 3 | Mooc/Icourse163/__pycache__ 4 | Mooc/Icourses/__pycache__ 5 | build 6 | dist -------------------------------------------------------------------------------- /Mooc/Icourse163/Icourse163_Config.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Icourse163 模块包的配置文件 3 | ''' 4 | 5 | COURSENAME = '{1}--课程' 6 | IS_SHD, IS_HD, IS_SD, ONLY_PDF = 1, 2, 3, 4 7 | LEN_S = 96 -------------------------------------------------------------------------------- /Mooc/Icourses/Icourse_Config.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Icourse 模块包的配置文件 3 | ''' 4 | 5 | 6 | COURSENAME = '{1}--课程' 7 | PAPERNAME = '{2}--试卷' 8 | SOURCENAME = '{3}--资源' 9 | IS_MP4, IS_PDF, IS_PAPER, IS_SOURCE = 1, 2, 4, 8 10 | LEN_S = 96 11 | LEN_ = 48 -------------------------------------------------------------------------------- /Mooc/Mooc_Main.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Mooc下载器主程序 3 | 4 | 作者:PyJun 5 | 邮箱:py.jun@qq.com 6 | ''' 7 | 8 | if __package__ is None: 9 | import sys 10 | sys.path.append('.\\') 11 | sys.path.append('..\\') 12 | from Mooc.Mooc_Interface import * 13 | 14 | def main(): 15 | try: 16 | mooc_interface() 17 | except: 18 | pass 19 | 20 | if __name__ == '__main__': 21 | main() 22 | -------------------------------------------------------------------------------- /Mooc/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Mooc下载器版本 3.4.1 3 | 作者 PyJun 4 | 邮箱 py.jun@qq.com 5 | github https://github.com/PyJun/Mooc_Downloader 6 | 博客 https://blog.csdn.net/qq_16166591/article/details/85249743 7 | ''' 8 | 9 | __version__ = 'Mooc-3.4.1' 10 | __author__ = 'PyJun' 11 | __email__ = 'py.jun@qq.com' 12 | __github__ = 'https://github.com/PyJun/Mooc_Downloader' 13 | __blog__ = 'https://blog.csdn.net/qq_16166591/article/details/85249743' 14 | -------------------------------------------------------------------------------- /Mooc.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python -*- 2 | 3 | block_cipher = None 4 | 5 | 6 | a = Analysis(['Mooc\\Mooc_Main.py'], 7 | pathex=['.'], 8 | binaries=[], 9 | datas=[ 10 | ('Mooc\\aria2c.exe', '.'), 11 | ('Mooc\\Alipay.jpg', '.') 12 | ], 13 | hiddenimports=[], 14 | hookspath=[], 15 | runtime_hooks=[], 16 | excludes=[], 17 | win_no_prefer_redirects=False, 18 | win_private_assemblies=False, 19 | cipher=block_cipher, 20 | noarchive=False) 21 | pyz = PYZ(a.pure, a.zipped_data, 22 | cipher=block_cipher) 23 | exe = EXE(pyz, 24 | a.scripts, 25 | a.binaries, 26 | a.zipfiles, 27 | a.datas, 28 | [], 29 | name='Mooc-3.4.2', 30 | debug=False, 31 | bootloader_ignore_signals=False, 32 | strip=False, 33 | upx=True, 34 | runtime_tmpdir=None, 35 | console=True , icon='Mooc\\Mooc.ico') 36 | -------------------------------------------------------------------------------- /Mooc/Icourse163/Icourse163_Base.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Icourse163 抽象基类 3 | ''' 4 | 5 | import os 6 | if __package__ is None: 7 | import sys 8 | sys.path.append('../') 9 | from Mooc.Mooc_Base import * 10 | from Mooc.Mooc_Download import * 11 | from Mooc.Mooc_Request import * 12 | from Mooc.Mooc_Potplayer import * 13 | 14 | __all__ = [ 15 | "Icourse163_Base" 16 | ] 17 | 18 | class Icourse163_Base(Mooc_Base): 19 | potplayer = Mooc_Potplayer() 20 | 21 | def __init__(self): 22 | super().__init__() 23 | self.infos = {} # 课程视频和文件的链接请求信息,包含id等 24 | self.__term_id = None # 下载课程的标题 ID 25 | 26 | @property 27 | def term_id(self): 28 | return self.__term_id 29 | 30 | @term_id.setter 31 | def term_id(self, term_id): 32 | self.__term_id = term_id 33 | 34 | def set_mode(self): 35 | while True: 36 | try: 37 | instr = input("请输入一个0-4的数选择性下载内容(1:超高清, 2:高清, 3:标清, 4:仅下载课件) [0退出]: ") 38 | if not instr: 39 | continue 40 | try: 41 | innum = int(instr) 42 | if innum == 0: 43 | return False 44 | elif 1 <= innum <= 4: 45 | self.mode = innum 46 | return True 47 | else: 48 | print("请输入一个0-4之间的整数!") 49 | continue 50 | except ValueError: 51 | print("请输入一个0-4之间的整数!") 52 | except KeyboardInterrupt: 53 | pass 54 | 55 | @classmethod 56 | @potplayer 57 | def download_video(cls, video_url, video_name, video_dir): 58 | return super().download_video(video_url, video_name, video_dir) 59 | -------------------------------------------------------------------------------- /Mooc/Mooc_Potplayer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Mooc 生成 potplayer 播放列表 dpl 文件的类 3 | ''' 4 | 5 | import os 6 | from functools import wraps 7 | from Mooc.Mooc_Config import * 8 | 9 | __all__ = [ 10 | "Mooc_Potplayer" 11 | ] 12 | 13 | class Mooc_Potplayer(): 14 | def __init__(self): 15 | self.cnt = 0 16 | self.lines = [] 17 | self.available = False 18 | 19 | def init(self, rootdir): 20 | self.rootdir = rootdir 21 | self.listpath = os.path.join(rootdir, PLAYLIST) 22 | self.listpath_back = os.path.join(rootdir, PALYBACK) 23 | self.batpath = os.path.join(rootdir, BATNAME) 24 | 25 | def __call__(self, func): 26 | @wraps(func) 27 | def wrap_func(*args, **kwargs): 28 | succeed = func(*args, **kwargs) 29 | if self.available and succeed: 30 | self.cnt += 1 31 | video_dir = kwargs['video_dir'] 32 | video_name = kwargs['video_name'] 33 | video_path = os.path.join(video_dir, video_name+'.mp4') 34 | video_relpath = os.path.relpath(video_path, self.rootdir) 35 | if self.lines == [] and self.cnt == 1: 36 | self.lines.append('DAUMPLAYLIST\n') 37 | self.lines.append("playname=%s\n"%(video_relpath)) 38 | with open(self.batpath, 'w') as batfile: 39 | batfile.write(BATSTRING) 40 | self.lines.append("%d*file*%s\n"%(self.cnt,video_relpath)) 41 | self.lines.append("%d*title*%s\n"%(self.cnt,video_name)) 42 | self.update() 43 | return succeed 44 | return wrap_func 45 | 46 | def update(self): 47 | with open(self.listpath, 'w', encoding='utf8') as listfile: 48 | listfile.writelines(self.lines) 49 | with open(self.listpath_back, 'w', encoding='utf8') as listfile: 50 | listfile.writelines(self.lines) 51 | 52 | def enable(self): 53 | self.cnt = 0 54 | self.lines = [] 55 | self.available = True 56 | 57 | def disable(self): 58 | self.available = False 59 | -------------------------------------------------------------------------------- /Mooc/Mooc_Config.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Mooc 总项目的配置文件 3 | ''' 4 | 5 | import sys 6 | import os 7 | import re 8 | 9 | 10 | # 常量,固定参数 11 | __QQgroup__ = "196020837" 12 | __email__ = "py.jun@qq.com" 13 | if hasattr(sys, 'frozen'): 14 | PATH = os.path.dirname(sys.executable) 15 | else: 16 | PATH = os.path.dirname(os.path.abspath(__file__)) # 程序当前路径 17 | winre = re.compile(r'[?*|<>:"/\\\s]') # windoes 文件非法字符匹配 18 | WIN_LENGTH = 64 19 | TIMEOUT = 60 # 请求超时时间 20 | PLAYLIST = '播放列表.dpl' 21 | PALYBACK = 'DPL_PYJUN' 22 | BATNAME = '修复播放列表.bat' 23 | BATSTRING = '''\ 24 | @echo off 25 | copy {0} {1} 26 | echo 成功修复“{1}” 27 | echo 请用Potplayer播放器打开“{1}”观看视频(未安装Potplayer自行百度下载安装) 28 | pause 29 | '''.format(PALYBACK, PLAYLIST) 30 | LENGTH = 80 31 | 32 | # 变量,可修改的参数 33 | download_speed = "0" 34 | if getattr(sys, 'frozen', False): #是否打包 35 | aria2_path = os.path.join(sys._MEIPASS, "aria2c.exe") 36 | alipay_path = os.path.join(sys._MEIPASS, "Alipay.jpg") 37 | else: 38 | aria2_path = os.path.join(PATH, "aria2c.exe") 39 | alipay_path = os.path.join(PATH, "Alipay.jpg") 40 | aira2_cmd = '%s --header "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36 -- fUcIvJ01pZVQhNq23lXm9gjazkeonsCx" --check-certificate=false -x 16 -s 64 -j 64 -k 2M --disk-cache 128M --max-overall-download-limit %s "{url:}" -d "{dirname:}" -o "{filename:}"'%(aria2_path, download_speed) 41 | 42 | # 课程链接的正则匹配 43 | courses_re = { 44 | "icourse163_mooc": re.compile(r'\s*https?://www.icourse163.org/((learn)|(course))/(.*?)(#/.*)?$'), 45 | "icourse_cuoc": re.compile(r'\s*https?://www.icourses.cn/web/sword/portal/videoDetail\?courseId=([\w-]*)'), 46 | "icourse_mooc": re.compile(r'\s*((https?://www.icourses.cn/sCourse/course_(\d+).html)|' 47 | r'(https?://www.icourses.cn/web/sword/portal/shareDetails\?cId=(\d+)))') 48 | } 49 | 50 | __all__ = [ 51 | "__QQgroup__", "__email__", "PATH", "winre", "TIMEOUT", "PLAYLIST", "PALYBACK", 52 | "BATNAME", "BATSTRING", "LENGTH", "WIN_LENGTH", 53 | 54 | "download_speed", "aria2_path", "aira2_cmd", "courses_re", "alipay_path" 55 | ] 56 | -------------------------------------------------------------------------------- /Mooc/Icourses/Icourse_Base.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 定义一个爱课程 Icourse 的虚基类 3 | 用于派生 Icourse_Cuoc 和 Icourse_Mooc 4 | ''' 5 | 6 | import os 7 | from abc import abstractmethod 8 | if __package__ is None: 9 | import sys 10 | sys.path.append('../') 11 | from Mooc.Mooc_Config import * 12 | from Mooc.Mooc_Base import * 13 | from Mooc.Mooc_Download import * 14 | from Mooc.Mooc_Request import * 15 | from Mooc.Mooc_Potplayer import * 16 | 17 | __all__ = [ 18 | "Icourse_Base" 19 | ] 20 | 21 | class Icourse_Base(Mooc_Base): 22 | potplayer = Mooc_Potplayer() 23 | 24 | def __init__(self): 25 | super().__init__() 26 | self.__infos = [] 27 | self.__cid = None 28 | 29 | def prepare(self, url): 30 | getattr(self, "_get_cid")(url) 31 | getattr(self, "_get_title")() 32 | getattr(self, "_get_infos")() 33 | 34 | def download(self): 35 | if self.cid and self.title and self.infos: 36 | getattr(self, "_download")() 37 | return True 38 | return False 39 | 40 | @property 41 | def cid(self): 42 | return self.__cid 43 | 44 | @cid.setter 45 | def cid(self, cid): 46 | self.__cid = cid 47 | 48 | @abstractmethod 49 | def _get_cid(self, url): 50 | pass 51 | 52 | def set_mode(self): 53 | return True 54 | 55 | @classmethod 56 | @potplayer 57 | def download_video(cls, video_url, video_name, video_dir): 58 | return super().download_video(video_url, video_name, video_dir) 59 | 60 | @classmethod 61 | def download_video_list(cls, dirpath, mp4list, prefix=''): 62 | for cnt, videos in enumerate(mp4list,1): 63 | mp4_url, mp4_name = videos 64 | mp4_name = winre.sub('', '['+prefix+str(cnt)+']--'+mp4_name).rstrip('.mp4')[:WIN_LENGTH] 65 | cls.download_video(video_url=mp4_url, video_name=mp4_name, video_dir=dirpath) 66 | 67 | @classmethod 68 | def download_pdf_list(cls, dirpath, pdflist, prefix=''): 69 | for cnt, pdfs in enumerate(pdflist,1): 70 | pdf_url, pdf_name = pdfs 71 | pdf_name = winre.sub('', '('+prefix+str(cnt)+')--'+pdf_name).rstrip('.pdf')[:WIN_LENGTH] 72 | cls.download_pdf(pdf_url, pdf_name, dirpath) 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### 基于Python 爬虫的慕课视频下载【开源代码停止维护,软件仍在维护更新】 2 | 3 | ##### 1. 项目简介: 4 | 5 | - 项目环境为 Windows10, Python3 6 | - 用 Python3.6 urllib3 模块爬虫,涉及模块包括标准库、三方库和其它开源组件,已打包成exe文件 7 | - 支持Mooc视频,字幕,课件下载,课程以目录树形式下载到硬盘,支持Potplayer播放 8 | - 支持中国大学,网易云课堂,网易公开课,有道精品课,有道领世,启航教育,腾讯课堂,腾讯会议,钉钉,飞书,中公网校,荔枝微课,海豚知道,伯索云学堂,爱问云,百家云,学浪,抖音课堂,B站课堂,希望学,希望学素养,希望优课,研途考研,高途,途途,高途高中规划,高途素养,千聊,兴趣岛,橙啦,爱课程,学堂在线,超星学习通(学银在线),知到智慧树,智慧职教,华尔街学堂,等网课的视频课程下载,核心下载调用 Aria2c 9 | - 用户可以直接下载 Release 下的 [学无止下载器](https://github.com/PyJun/Mooc_Downloader/releases) 安装即可使用 10 | - 有关下载器的使用以及相关问题,点击查看[Mooc下载器帮助文档](https://github.com/PyJun/Mooc_Downloader/wiki) 11 | 12 | ##### 2. 功能演示: 13 | 14 | ![demo1.png](http://xuewuzhi.cn/images/demo1.png) 15 | 16 | ![demo2.png](http://xuewuzhi.cn/images/demo2.png) 17 | 18 | ##### 4.项目文件 19 | 20 | - Mooc_Main.py 整个项目的主程序, 其实是调用了 Mooc_Interface 21 | - Mooc_Interface.py 人机交互接口模块 22 | - Mooc_Config.py Mooc 的配置文件 23 | - Mooc_Base.py Mooc 抽象基类 24 | - Mooc_Potplayer.py 用于生成专用于 Potplayer 播放的 dpl 文件 25 | - Mooc_Request.py 用 urllib 包装的一个Mooc请求库 26 | - Mooc_Download.py 调用 Aira2c 下载的命令接口 27 | - Icourses 有关爱课程的模块包 28 | - Icourse_Base.py 爱课程下载器的基类,继承自 Mooc_Base 29 | - Icourse_Config.py 配置文件 30 | - Icourse_Cuoc.py 爱课程视频公开课的下载的子类,http://www.icourses.cn/cuoc/ 31 | - Icourse_Mooc.py 爱课程资源共享课的下载的子类,http://www.icourses.cn/mooc/ 32 | 33 | - Icourse163 有关中国大学慕课的模块包 34 | - Icourse163_Base.py 中国大学慕课下载器的基类,继承自 Mooc_Base 35 | - Icourse163_Config.py 配置文件 36 | - Icourse163_Mooc.py 中国大学慕课下载器得子类,继承自 Icourse163_Base.py 37 | 38 | ##### 5.运行项目 39 | 40 | 请确保在项目工程的根目录下,然后在终端输入以下指令(python3 环境,无依赖的第三方模块) 41 | 42 | ```powershell 43 | python -m Mooc 44 | ``` 45 | 46 | ##### 6.打包指令 47 | 48 | 1. 首先确保已经安装 **pyinstaller**,若未安装,则用 pip 安装,打开终端,输入: 49 | 50 | ```powershell 51 | pip install pyinstaller 52 | ``` 53 | 54 | 2. 然后在项目工程的根目录下,终端输入: 55 | 56 | ```powershell 57 | pyinstaller Mooc.spec 58 | ``` 59 | 60 | 3. 最后会在项目工程根目录下出现一个**dist**文件夹,该文件夹会出现一个**Mooc-3.4.0.exe**程序 61 | 62 | ![package.png](http://xuewuzhi.cn/images/package.png) 63 | 64 | 65 | ##### 7.注意事项 66 | 项目代码已好久未更新,Releases下有我打包好的exe文件,可直接下载使用~ 67 | 【该项目为早期开源的代码,最新版本代码未开源】 68 | 1. 新版代码涉及网站爬虫、解析、解密,开源后容易和谐失效 69 | 2. 新版本涉及太多的模块依赖(包括且不限于nodejs,electron,ariac2,annie,ffmpeg,wkhtmltopdf和一些自编译的python依赖库),难以分离出可独立可用的开源版 70 | 3. 实在没有精力同时维护二个开源和闭源版本的代码 71 | 4. 该项目并非完整的开源项目,提供的软件无病毒,可免费使用(也包含付费功能) 72 | -------------------------------------------------------------------------------- /HELP.md: -------------------------------------------------------------------------------- 1 | ### 学无止网课下载器帮助文档 2 | 3 | 4 | 5 | #### 一. 软件下载: 6 | 7 | ------ 8 | 9 | 1. [Github Releases](https://github.com/PyJun/Mooc_Downloader/releases) 10 | 2. [百度云链接](https://pan.baidu.com/s/1G43ZZCTc5XtYCeZWUs4uTA) 11 | 3. [蓝奏云](https://lanzouw.com/b00n4ln4b) 12 | 13 | 14 | 15 | #### 二. 使用说明 16 | 17 | ------ 18 | 19 | ##### 1.从课程官网下选择任意一个课程复制其网址,如下图: 20 | 21 | ![](http://118.31.48.9/images/downloader/copy1.png) 22 | 23 | ![](http://118.31.48.9/images/downloader/copy2.png) 24 | 25 | 26 | 27 | ##### 2.然后粘贴到下载器中,并按要求输入指令,会自动下载相应课程的视频和课件,如下图: 28 | 29 | ![](http://118.31.48.9/images/downloader/demo1.png) 30 | 31 | ![](http://118.31.48.9/images/downloader/demo2.png) 32 | 33 | 34 | 35 | #### 三.常见问题 36 | 37 | ------ 38 | 39 | ##### 1.学无止下载器支持哪些网站的视频下载? 40 | 41 | 答:目前已支持以下网课网站视频课程下载,官方网址如下: 42 | 43 | 1. [腾讯课堂](https://ke.qq.com/) 44 | 2. [网易云课堂](https://study.163.com/) 45 | 3. [有道精品课](https://ke.youdao.com/) 46 | 4. [有道领世](https://c.youdao.com/ydls/pc-download.html) 47 | 5. [高途课堂](https://www.gaotu.cn/) 48 | 6. [途途课堂](https://gaotu100.com/) 49 | 7. [高途高中规划](https://www.gtgz.cn/) 50 | 8. [中国大学](https://www.icourse163.org/) 51 | 9. [哔哩哔哩](https://www.bilibili.com/) 52 | 10. [抖音课堂](https://www.xuelangapp.com/) 53 | 11. [中公网校](https://www.eoffcn.com/) 54 | 12. [新东方在线](https://www.koolearn.com/) 55 | 13. [新东方云教室](https://roombox.xdf.cn/) 56 | 14. [伯索云学堂](https://www.plaso.cn/app/) 57 | 15. [橙啦](https://www.orangevip.com/) 58 | 16. [千聊](https://www.qlchat.com/) 59 | 17. [兴趣岛](https://m.qianliao.net/) 60 | 18. [超星学习通(学银在线)](http://www.xueyinonline.com/) 61 | 19. [知到智慧树](https://www.zhihuishu.com/) 62 | 20. [智慧职教(职教云)](https://www.icve.com.cn/) 63 | 21. [爱课程](http://www.icourses.cn/) 64 | 22. [学堂在线](https://next.xuetangx.com/) 65 | 66 | 67 | 有关更多慕课网站的课程下载,敬请期待 68 | 69 | ##### 2.关于课程还未开课无法下载, 或者正在开课无法下载全部课程问题? 70 | 71 | 答:若课程有前几次开课,选择图片的版本一般选择最近一次的开课,然后复制链接进行下载。 72 | PS:关于每次开课内容一般大致相同,新课程可能会有少量更新等。一般不会影响学习。 73 | 74 | ![图片](http://118.31.48.9/images/downloader/help1.png) 75 | 76 | ------ 77 | 78 | ##### 3.是否可以下载已经结束的课程? 79 | 80 | 答:丝毫没影响,依然可以下载,直接复制链接到下载器中下载即可 81 | 82 | ##### 4.收费视频可以下载吗? 83 | 84 | 答:因为版权问题,未购买的收费视频该软件不提供下载 85 | 86 | ##### 5. “播放列表.dpl” 文件有什么用? 87 | 88 | 答:电脑下载安装 **Potplayer** 播放器后,然后右键用potplayer打开“播放列表.dpl”文件, 89 | 90 | 即可顺序播放器所有视频,可以更方便的观看 91 | 92 | ##### 6.“修复播放列表.bat” 文件有什么用? 93 | 94 | 答:当你手动把整个课程目录拷贝到其它地方后,你会发现 “播放列表.dpl”文件会失效,这时可以通过双击“修复播放列表.bat”文件来修复它。 95 | 96 | ##### 7.软件出现请求异常和下载异常的问题? 97 | 98 | 答:一般情况是你的本地网络出现的问题,请检查网络是否正常连接。如果确认网络良好还是出现了这样的问题,那么欢迎反馈给我们 99 | 100 | -------------------------------------------------------------------------------- /Mooc/Mooc_Request.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Mooc 的请求模块:包含 get, post, head 常用的三大请求 3 | ''' 4 | 5 | from time import sleep 6 | from functools import wraps 7 | from socket import timeout, setdefaulttimeout 8 | from urllib import request, parse 9 | from urllib.error import ContentTooShortError, URLError, HTTPError 10 | from Mooc.Mooc_Config import * 11 | 12 | __all__ = [ 13 | 'RequestFailed', 'request_get', 'request_post', 'request_head', 'request_check' 14 | ] 15 | 16 | headers = ("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36") #这里模拟浏览器 17 | opener = request.build_opener() 18 | opener.addheaders = [headers] 19 | request.install_opener(opener) 20 | setdefaulttimeout(TIMEOUT) 21 | 22 | class RequestFailed(Exception): 23 | pass 24 | 25 | def request_decorate(count=3): 26 | def decorate(func): 27 | @wraps(func) 28 | def wrap_func(*args, **kwargs): 29 | cnt = 0 30 | while True: 31 | try: 32 | return func(*args, **kwargs) 33 | except (ContentTooShortError, URLError, HTTPError, ConnectionResetError): 34 | cnt += 1 35 | if cnt >= count: 36 | break 37 | sleep(0.32) 38 | except (timeout): 39 | break 40 | raise RequestFailed("request failed") 41 | return wrap_func 42 | return decorate 43 | 44 | @request_decorate() 45 | def request_get(url, decoding='utf8'): 46 | '''get请求''' 47 | req = request.Request(url=url) 48 | response = request.urlopen(req, timeout=TIMEOUT) 49 | text = response.read().decode(decoding) 50 | response.close() 51 | return text 52 | 53 | @request_decorate() 54 | def request_post(url, data, decoding='utf8'): 55 | '''post请求''' 56 | data = parse.urlencode(data).encode('utf8') 57 | req = request.Request(url=url, data=data, method='POST') 58 | response = request.urlopen(req, timeout=TIMEOUT) 59 | text = response.read().decode(decoding) 60 | response.close() 61 | return text 62 | 63 | @request_decorate() 64 | def request_head(url): 65 | '''head请求''' 66 | req = request.Request(url=url); 67 | response = request.urlopen(req, timeout=TIMEOUT) 68 | header = dict(response.getheaders()) 69 | response.close() 70 | return header 71 | 72 | @request_decorate(1) 73 | def request_check(url): 74 | '''检查url是否可以访问''' 75 | req = request.Request(url=url); 76 | response = request.urlopen(req, timeout=TIMEOUT//10) 77 | response.close() 78 | -------------------------------------------------------------------------------- /Mooc/Mooc_Download.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Mooc 下载功能模块:调用 aria2c.exe 下载文件 3 | ''' 4 | 5 | import os 6 | import re 7 | import subprocess 8 | from time import sleep 9 | from Mooc.Mooc_Config import * 10 | 11 | __all__ = [ 12 | "aria2_download_file", "DownloadFailed" 13 | ] 14 | 15 | RE_SPEED = re.compile(r'\d+MiB/(\d+)MiB\((\d+)%\).*?DL:(\d*?\.?\d*?)([KM])iB') 16 | RE_AVESPEED = re.compile(r'\|\s*?([\S]*?)([KM])iB/s\|') 17 | 18 | class DownloadFailed(Exception): 19 | pass 20 | 21 | def aria2_download_file(url, filename, dirname='.'): 22 | cnt = 0 23 | while cnt < 3: 24 | p = None 25 | try: 26 | cmd = aira2_cmd.format(url=url, dirname=dirname, filename=filename) 27 | p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, universal_newlines=True, encoding='utf8') 28 | lines = '' 29 | while p.poll() is None: 30 | line = p.stdout.readline().strip() 31 | if filename.endswith('.mp4') and line: 32 | lines += line 33 | match = RE_SPEED.search(line) 34 | if match: 35 | size, percent, speed, unit = match.groups() 36 | percent = float(percent) 37 | speed = float(speed) 38 | if unit == 'K': 39 | speed /= 1024 40 | per = min(int(LENGTH*percent/100) , LENGTH) 41 | print('\r |-['+per*'*'+(LENGTH-per)*'.'+'] {:.0f}% {:.2f}M/s'.format(percent,speed),end=' (ctrl+c中断)') 42 | if p.returncode != 0: 43 | cnt += 1 44 | if cnt==1: 45 | clear_files(dirname, filename) 46 | sleep(0.16) 47 | else: 48 | if filename.endswith('.mp4'): 49 | match = RE_AVESPEED.search(lines) 50 | if match: 51 | ave_speed, unit = match.groups() 52 | ave_speed = float(ave_speed) 53 | if unit == 'K': 54 | ave_speed /= 1024 55 | print('\r |-['+LENGTH*'*'+'] {:.0f}% {:.2f}M/s'.format(100,ave_speed),end=' (完成) \n') 56 | return 57 | finally: 58 | if p: 59 | p.kill() # 保证子进程已终止 60 | clear_files(dirname, filename) 61 | raise DownloadFailed("download failed") 62 | 63 | 64 | def clear_files(dirname, filename): 65 | filepath = os.path.join(dirname, filename) 66 | if os.path.exists(filepath): 67 | os.remove(filepath) 68 | if os.path.exists(filepath+'.aria2'): 69 | os.remove(filepath+'.aria2') 70 | -------------------------------------------------------------------------------- /Mooc/Icourses/Icourse_Cuoc.py: -------------------------------------------------------------------------------- 1 | ''' 2 | www.icourses.cn/cuoc/ 下的视频公开课下载解析 3 | ''' 4 | 5 | import os 6 | import re 7 | import json 8 | if __package__ is None: 9 | import sys 10 | sys.path.append('..\\') 11 | sys.path.append('..\\..\\') 12 | from Mooc.Mooc_Config import * 13 | from Mooc.Mooc_Request import * 14 | from Mooc.Icourses.Icourse_Config import * 15 | from Mooc.Icourses.Icourse_Base import * 16 | 17 | __all__ = [ 18 | "Icourse_Cuoc" 19 | ] 20 | 21 | class Icourse_Cuoc(Icourse_Base): 22 | url_course = "http://www.icourses.cn/web/sword/portal/videoDetail?courseId=" 23 | def __init__(self): 24 | super().__init__() 25 | 26 | def _get_cid(self, url): 27 | self.cid = None 28 | match = courses_re.get('icourse_cuoc').match(url) 29 | if match: 30 | self.cid = match.group(1) 31 | 32 | def _get_title(self): 33 | if self.cid is None: 34 | return 35 | self.title = None 36 | url = self.url_course + self.cid 37 | text = request_get(url) 38 | match_title = re.search(r"_courseTitle.*?=.*?'(.*?)';", text) 39 | match_school = re.search(r'(.*?)', text) 40 | if match_title and match_school: 41 | title_name = match_title.group(1)+'__'+match_school.group(1) 42 | self.title = winre.sub('', title_name)[:WIN_LENGTH] 43 | 44 | def _get_infos(self): 45 | if self.cid is None: 46 | return 47 | self.infos = [] 48 | url = self.url_course + self.cid 49 | text = request_get(url) 50 | match_courses = re.search(r'_sourceArrStr *?= *?(\[.*?\]);\s*?var +?_shareUrl', text) 51 | if match_courses: 52 | #!!! except json.decoder.JSONDecodeError 53 | courses = json.loads(match_courses.group(1)) 54 | self.infos = [{'url':course['fullLinkUrl'], 'name':winre.sub('',course['title'])[:WIN_LENGTH]} for course in courses] 55 | 56 | def _download(self): 57 | print('\n{:^{}s}'.format(self.title, LEN_S)) 58 | self.rootDir = rootDir = os.path.join(PATH, self.title) 59 | courseDir = os.path.join(rootDir, COURSENAME) 60 | if not os.path.exists(courseDir): 61 | os.makedirs(courseDir) 62 | print(COURSENAME) 63 | Icourse_Base.potplayer.init(rootDir) 64 | mp4_list = [(info['url'], info['name']) for info in self.infos] 65 | Icourse_Base.potplayer.enable() 66 | self.download_video_list(courseDir, mp4_list) 67 | 68 | 69 | def main(): 70 | # url = 'http://www.icourses.cn/web/sword/portal/videoDetail?courseId=9fe9d456-1327-1000-9193-4876d02411f6' 71 | url = 'http://www.icourses.cn/web/sword/portal/videoDetail?courseId=9fe99900-1327-1000-9191-4876d02411f6#/?resId=d0fff67d-1334-1000-8f6b-1d109e90c3cf' 72 | # url = 'http://www.icourses.cn/web/sword/portal/videoDetail?courseId=9feeeee3-1327-1000-91e3-4876d02411f6#/?resId=d119afd8-1334-1000-9042-1d109e90c3cf' 73 | icourse_cuoc = Icourse_Cuoc() 74 | icourse_cuoc.prepare(url) 75 | icourse_cuoc.download() 76 | 77 | 78 | if __name__ == '__main__': 79 | main() 80 | -------------------------------------------------------------------------------- /Mooc/Mooc_Interface.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Mooc 人机交互的接口函数 3 | ''' 4 | 5 | import os 6 | import re 7 | if __package__ is None: 8 | import sys 9 | sys.path.append('.\\') 10 | sys.path.append("..\\") 11 | from Mooc.Mooc_Config import * 12 | from Mooc.Mooc_Request import * 13 | from Mooc.Mooc_Download import * 14 | from Mooc.Icourse163.Icourse163_Mooc import * 15 | from Mooc.Icourses.Icourse_Cuoc import * 16 | from Mooc.Icourses.Icourse_Mooc import * 17 | 18 | __all__ = [ 19 | "mooc_interface" 20 | ] 21 | 22 | # 课程名对应的Mooc类 23 | courses_mooc = { 24 | "icourse163_mooc": Icourse163_Mooc, 25 | "icourse_cuoc": Icourse_Cuoc, 26 | "icourse_mooc": Icourse_Mooc 27 | } 28 | 29 | def mooc_interface(): 30 | try: 31 | while True: 32 | os.system("cls") 33 | print("\t"+"="*91) 34 | print('\t|\t\t 慕课下载器(免费版v3.4.2) \tQQ群: {:^27s} |'.format(__QQgroup__)) 35 | print("\t|\t\t icourse163.org, icourses.cn \t邮箱: {:^27s} |".format(__email__)) 36 | print("\t"+"="*91) 37 | print("\t{:^90}".format("Github: https://github.com/PyJun/Mooc_Downloader")) 38 | print("\t{:^90}".format("博客: https://blog.csdn.net/qq_16166591/article/details/85249743")) 39 | print("\t{:^90}".format("下载路径: "+PATH)) 40 | urlstr = None 41 | while not urlstr: 42 | try: 43 | urlstr = input('\n输入一个视频课程网址(q退出): ') 44 | except KeyboardInterrupt: 45 | print() 46 | if urlstr == 'q': 47 | break 48 | mooc = match_mooc(urlstr) 49 | if not mooc: 50 | input("视频课程链接不合法,请回车继续...") 51 | continue 52 | if not mooc.set_mode(): 53 | continue 54 | print("正在连接资源......") 55 | try: 56 | mooc.prepare(urlstr) 57 | except RequestFailed: 58 | print("网路请求异常!") 59 | input("请按回车键返回主界面...") 60 | continue 61 | while True: 62 | try: 63 | isdownload = mooc.download() 64 | if isdownload: 65 | print('"{}" 下载完毕!'.format(mooc.title)) 66 | print("下载路径: {}".format(mooc.rootDir)) 67 | os.startfile(mooc.rootDir) 68 | else: 69 | print('"{}" 还未开课!'.format(mooc.title)) 70 | input("请按回车键返回主界面...") 71 | break 72 | except (RequestFailed, DownloadFailed) as err: 73 | if isinstance(err, RequestFailed): 74 | print("网路请求异常!") 75 | else: 76 | print("文件下载异常!") 77 | if inquire(): 78 | continue 79 | else: 80 | break 81 | except KeyboardInterrupt: 82 | print() 83 | if inquire(): 84 | continue 85 | else: 86 | break 87 | except: 88 | print("程序异常退出,希望反馈作者!") 89 | return 90 | except KeyboardInterrupt: 91 | input("程序退出...") 92 | finally: 93 | # if (input("\n小哥哥,小姐姐,打个赏再走呗 …(⊙_⊙)… [y/n]: ") != 'n'): 94 | # os.startfile(alipay_path) 95 | os.system("pause") 96 | 97 | def inquire(): 98 | redown = None 99 | while redown not in ('y','n'): 100 | try: 101 | redown = input("是否继续[y/n]: ") 102 | except (KeyboardInterrupt, EOFError): 103 | print() 104 | return redown=='y' 105 | 106 | def match_mooc(url): 107 | mooc = None 108 | for mooc_name in courses_mooc: 109 | if courses_re.get(mooc_name).match(url): 110 | mooc = courses_mooc.get(mooc_name)() 111 | break 112 | return mooc 113 | 114 | def main(): 115 | mooc_interface() 116 | 117 | if __name__ == '__main__': 118 | main() 119 | -------------------------------------------------------------------------------- /Mooc/Mooc_Base.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Mooc 的虚基类:用于派生所有Mooc子类 3 | ''' 4 | 5 | import os 6 | from abc import ABC, abstractmethod 7 | from Mooc.Mooc_Config import * 8 | from Mooc.Mooc_Download import * 9 | from Mooc.Mooc_Request import * 10 | 11 | __all__ = [ 12 | "Mooc_Base" 13 | ] 14 | 15 | class Mooc_Base(ABC): 16 | def __init__(self): 17 | self.__mode = None 18 | self.__cid = None 19 | self.__title = None 20 | self.__infos = None 21 | self.__rootDir = None 22 | 23 | @property 24 | def mode(self): 25 | '''下载模式: 用于选择性下载''' 26 | return self.__mode 27 | 28 | @mode.setter 29 | def mode(self, mode): 30 | self.__mode = mode 31 | 32 | @property 33 | def cid(self): 34 | '''课程的 ID''' 35 | return self.__cid 36 | 37 | @cid.setter 38 | def cid(self, cid): 39 | self.__cid = cid 40 | 41 | @property 42 | def title(self): 43 | '''课程的标题''' 44 | return self.__title 45 | 46 | @title.setter 47 | def title(self, title): 48 | self.__title = title 49 | 50 | @property 51 | def infos(self): 52 | '''解析后的课程信息''' 53 | return self.__infos 54 | 55 | @property 56 | def rootDir(self): 57 | return self.__rootDir 58 | 59 | @rootDir.setter 60 | def rootDir(self, rootDir): 61 | self.__rootDir = rootDir 62 | 63 | @infos.setter 64 | def infos(self, infos): 65 | self.__infos = infos 66 | 67 | @abstractmethod 68 | def _get_cid(self): 69 | pass 70 | 71 | @abstractmethod 72 | def _get_title(self): 73 | pass 74 | 75 | @abstractmethod 76 | def _get_infos(self): 77 | pass 78 | 79 | @abstractmethod 80 | def _download(self): 81 | pass 82 | 83 | @abstractmethod 84 | def set_mode(self): 85 | pass 86 | 87 | @abstractmethod 88 | def prepare(self, url): 89 | pass 90 | 91 | @abstractmethod 92 | def download(self): 93 | pass 94 | 95 | @classmethod 96 | def download_video(cls, video_url, video_name, video_dir): 97 | '''下载 MP4 视频文件''' 98 | succeed = True 99 | if not cls.judge_file_existed(video_dir, video_name, '.mp4'): 100 | try: 101 | header = request_head(video_url) 102 | size = float(header['Content-Length']) / (1024*1024) 103 | print(" |-{} [mp4] 大小: {:.2f}M".format(cls.align(video_name,LENGTH), size)) 104 | aria2_download_file(video_url, video_name+'.mp4', video_dir) 105 | except DownloadFailed: 106 | print(" |-{} [mp4] 资源无法下载!".format(cls.align(video_name,LENGTH))) 107 | succeed = False 108 | else: 109 | print(" |-{} [mp4] 已经成功下载!".format(cls.align(video_name,LENGTH))) 110 | return succeed 111 | 112 | @classmethod 113 | def download_pdf(cls, pdf_url, pdf_name, pdf_dir): 114 | '''下载 PDF ''' 115 | succeed = True 116 | if not cls.judge_file_existed(pdf_dir, pdf_name, '.pdf'): 117 | try: 118 | aria2_download_file(pdf_url, pdf_name+'.pdf', pdf_dir) 119 | print(" |-{} (pdf) 已经成功下载!".format(cls.align(pdf_name,LENGTH))) 120 | except DownloadFailed: 121 | print(" |-{} (pdf) 资源无法下载!".format(cls.align(pdf_name,LENGTH))) 122 | succeed = False 123 | else: 124 | print(" |-{} (pdf) 已经成功下载!".format(cls.align(pdf_name,LENGTH))) 125 | return succeed 126 | 127 | @classmethod 128 | def download_sub(cls, sub_url, sub_name, sub_dir): 129 | '''下载字幕''' 130 | succeed = True 131 | if not cls.judge_file_existed(sub_dir, sub_name, '.srt'): 132 | try: 133 | aria2_download_file(sub_url, sub_name+'.srt', sub_dir) 134 | except DownloadFailed: 135 | succeed = False 136 | return succeed 137 | 138 | @staticmethod 139 | def judge_file_existed(dirname, filename, fmt): 140 | ''' 141 | judge_file_existed(dirname, filename, fmt) 142 | 判断在 dirname 目录下是否存在已下载成功的格式为 fmt 且文件名为 filename 的文件 143 | ''' 144 | filepath = os.path.join(dirname, filename) 145 | exist1 = os.path.exists(filepath+fmt) 146 | exist2 = os.path.exists(filepath+fmt+'.aria2') 147 | return exist1 and not exist2 148 | 149 | @staticmethod 150 | def align(string, width): # 对齐汉字字符窜,同时截断多余输出 151 | ''' 152 | align(string, width) 根据width宽度居中对齐字符窜 string,主要用于汉字居中 153 | ''' 154 | res = "" 155 | size = 0 156 | for ch in string: 157 | if (size+3 > width): 158 | break 159 | size += 1 if ord(ch) <= 127 else 2 160 | res += ch 161 | res += (width-size)*' ' 162 | return res 163 | -------------------------------------------------------------------------------- /Mooc/Icourse163/Icourse163_Mooc.py: -------------------------------------------------------------------------------- 1 | ''' 2 | www.icourse163.org 下所有免费课程的下载和解析 3 | ''' 4 | 5 | import os 6 | import re 7 | if __package__ is None: 8 | import sys 9 | sys.path.append('..\\') 10 | sys.path.append("..\\..\\") 11 | from Mooc.Mooc_Config import * 12 | from Mooc.Mooc_Base import * 13 | from Mooc.Mooc_Download import * 14 | from Mooc.Mooc_Request import * 15 | from Mooc.Mooc_Potplayer import * 16 | from Mooc.Icourse163.Icourse163_Config import * 17 | from Mooc.Icourse163.Icourse163_Base import * 18 | 19 | __all__ = [ 20 | "Icourse163_Mooc" 21 | ] 22 | 23 | class Icourse163_Mooc(Icourse163_Base): 24 | course_url = "https://www.icourse163.org/course/" 25 | infos_url = 'https://www.icourse163.org/dwr/call/plaincall/CourseBean.getMocTermDto.dwr' 26 | parse_url = 'https://www.icourse163.org/dwr/call/plaincall/CourseBean.getLessonUnitLearnVo.dwr' 27 | infos_data = { 28 | 'callCount':'1', 29 | 'scriptSessionId':'${scriptSessionId}190', 30 | 'c0-scriptName':'CourseBean', 31 | 'c0-methodName':'getMocTermDto', 32 | 'c0-id':'0', 33 | 'c0-param0':None, # 'number:'+self.term_id, 34 | 'c0-param1':'number:0', 35 | 'c0-param2':'boolean:true', 36 | 'batchId':'1543633161622' 37 | } 38 | parse_data = { 39 | 'callCount': '1', 40 | 'scriptSessionId': '${scriptSessionId}190', 41 | 'c0-scriptName':'CourseBean', 42 | 'c0-methodName':'getLessonUnitLearnVo', 43 | 'httpSessionId':'5531d06316b34b9486a6891710115ebc', 44 | 'c0-id': '0', 45 | 'c0-param0':None, #'number:'+meta[0], 46 | 'c0-param1':None, #'number:'+meta[1], 47 | 'c0-param2':'number:0', 48 | 'c0-param3':None, #'number:'+meta[2], 49 | 'batchId': '1543633161622' 50 | } 51 | 52 | def __init__(self, mode=IS_SHD): 53 | super().__init__() 54 | self.mode = mode 55 | 56 | def _get_cid(self, url): 57 | self.cid = None 58 | match = courses_re['icourse163_mooc'].match(url) 59 | if match and match.group(4): 60 | self.cid = match.group(4) 61 | 62 | def _get_title(self): 63 | if self.cid is None: 64 | return 65 | self.title = self.term_id = None 66 | url = self.course_url + self.cid 67 | text = request_get(url) 68 | match = re.search(r'termId : "(\d+)"', text) 69 | if match: 70 | self.term_id = match.group(1) 71 | names = re.findall(r'name:"(.+)"', text) 72 | if names: 73 | title = '__'.join(names) 74 | self.title = winre.sub('', title)[:WIN_LENGTH] # 用于除去win文件非法字符 75 | 76 | def _get_infos(self): 77 | if self.term_id is None: 78 | return 79 | self.infos = {} 80 | self.infos_data['c0-param0'] = 'number:'+self.term_id 81 | text = request_post(self.infos_url, self.infos_data, decoding='unicode_escape') 82 | chapters = re.findall(r'homeworks=\w+;.+?id=(\d+).+?name="((.|\n)+?)";',text) 83 | for i,chapter in enumerate(chapters,1): 84 | chapter_title = winre.sub('', '{'+str(i)+'}--'+chapter[1])[:WIN_LENGTH] 85 | self.infos[chapter_title] = {} 86 | lessons = re.findall(r'chapterId='+chapter[0]+r'.+?contentType=1.+?id=(\d+).+?isTestChecked=false.+?name="((.|\n)+?)".+?test', text) 87 | for j,lesson in enumerate(lessons,1): 88 | lesson_title = winre.sub('', '{'+str(j)+'}--'+lesson[1])[:WIN_LENGTH] 89 | self.infos[chapter_title][lesson_title] = {} 90 | videos = re.findall(r'contentId=(\d+).+contentType=(1).+id=(\d+).+lessonId=' + 91 | lesson[0] + r'.+name="(.+)"', text) 92 | pdfs = re.findall(r'contentId=(\d+).+contentType=(3).+id=(\d+).+lessonId=' + 93 | lesson[0] + r'.+name="(.+)"', text) 94 | video_source = [{'params':video[:3], 'name':winre.sub('','[{}.{}.{}]--{}'.format(i,j,k,video[3])).rstrip('.mp4')[:WIN_LENGTH]} for k,video in enumerate(videos,1)] 95 | pdf_source = [{'params':pdf[:3], 'name':winre.sub('','({}.{}.{})--{}'.format(i,j,k,pdf[3])).rstrip('.pdf')[:WIN_LENGTH]} for k,pdf in enumerate(pdfs,1)] 96 | self.infos[chapter_title][lesson_title]['videos'] = video_source 97 | self.infos[chapter_title][lesson_title]['pdfs'] = pdf_source 98 | 99 | def _get_source_text(self, params): 100 | self.parse_data['c0-param0'] = params[0] 101 | self.parse_data['c0-param1'] = params[1] 102 | self.parse_data['c0-param3'] = params[2] 103 | text = request_post(self.parse_url, self.parse_data, decoding='unicode_escape') 104 | return text 105 | 106 | def _get_pdf_url(self, params): 107 | text = self._get_source_text(params) 108 | pdf_match = re.search(r'textOrigUrl:"(.*?)"', text) 109 | pdf_url = None 110 | if pdf_match: 111 | pdf_url = pdf_match.group(1) 112 | return pdf_url 113 | 114 | def _get_video_url(self, params): 115 | text = self._get_source_text(params) 116 | sub_match = re.search(r'name=".+";.*url="(.*?)"', text) 117 | video_url = sub_url = None 118 | if sub_match: 119 | sub_url = sub_match.group(1) 120 | resolutions = ['Shd', 'Hd', 'Sd'] 121 | for index, sp in enumerate(resolutions,1): 122 | video_match = re.search(r'(?Pmp4)%sUrl="(?P.*?\.(?P=ext).*?)"' % sp, text) 123 | if video_match: 124 | video_url, _ = video_match.group('url', 'ext') 125 | if index >= self.mode: break 126 | return video_url, sub_url 127 | 128 | def _download(self): # 根据课程视频链接来下载高清MP4慕课视频, 成功下载完毕返回 True 129 | print('\n{:^{}s}'.format(self.title, LEN_S)) 130 | self.rootDir = rootDir = os.path.join(PATH, self.title) 131 | courseDir = os.path.join(rootDir, COURSENAME) 132 | if not os.path.exists(courseDir): 133 | os.makedirs(courseDir) 134 | Icourse163_Base.potplayer.init(rootDir) 135 | Icourse163_Base.potplayer.enable() 136 | for i,chapter in enumerate(self.infos,1): # 去除 win 文价夹中的非法字符 137 | print(chapter) 138 | chapterDir = os.path.join(courseDir, chapter) 139 | if not os.path.exists(chapterDir): 140 | os.mkdir(chapterDir) 141 | for j,lesson in enumerate(self.infos[chapter],1): 142 | lessonDir = os.path.join(chapterDir, lesson) 143 | if not os.path.exists(lessonDir): 144 | os.mkdir(lessonDir) 145 | print(" "+lesson) 146 | sources = self.infos[chapter][lesson] 147 | for k,pdf_source in enumerate(sources['pdfs'],1): 148 | params, pdf_name = pdf_source['params'], pdf_source['name'] 149 | pdf_url= self._get_pdf_url(params) 150 | if pdf_url: 151 | self.download_pdf(pdf_url, pdf_name, lessonDir) 152 | if self.mode == ONLY_PDF: 153 | continue 154 | for k,video_source in enumerate(sources['videos'],1): 155 | params, name = video_source['params'], video_source['name'] 156 | video_name = sub_name = name 157 | video_url, sub_url = self._get_video_url(params) 158 | if video_url: 159 | self.download_video(video_url=video_url, video_name=video_name, video_dir=lessonDir) 160 | if sub_url: 161 | self.download_sub(sub_url, sub_name, lessonDir) 162 | 163 | def prepare(self, url): 164 | self._get_cid(url) 165 | self._get_title() 166 | self._get_infos() 167 | 168 | def download(self): 169 | if self.cid and self.title and self.term_id and self.infos: 170 | self._download() 171 | return True 172 | return False 173 | 174 | 175 | def main(): 176 | # url = 'http://www.icourse163.org/course/GDUFS-1002493010' 177 | # url = 'https://www.icourse163.org/course/WHU-1001539003' 178 | url = 'https://www.icourse163.org/course/XHDX-1205600803' 179 | icourse163_mooc = Icourse163_Mooc() 180 | if (icourse163_mooc.set_mode()): 181 | icourse163_mooc.prepare(url) 182 | icourse163_mooc.download() 183 | 184 | if __name__ == '__main__': 185 | main() 186 | -------------------------------------------------------------------------------- /Mooc/Icourses/Icourse_Mooc.py: -------------------------------------------------------------------------------- 1 | ''' 2 | www.icourses.cn/mooc/ 下的资源共享课下载解析 3 | ''' 4 | 5 | import os 6 | import re 7 | import json 8 | if __package__ is None: 9 | import sys 10 | sys.path.append('..\\') 11 | sys.path.append('..\\..\\') 12 | from Mooc.Mooc_Config import * 13 | from Mooc.Mooc_Download import * 14 | from Mooc.Mooc_Request import * 15 | from Mooc.Icourses.Icourse_Config import * 16 | from Mooc.Icourses.Icourse_Base import * 17 | 18 | __all__ = [ 19 | "Icourse_Mooc" 20 | ] 21 | 22 | class Icourse_Mooc(Icourse_Base): 23 | url_title = 'http://www.icourses.cn/sCourse/course_{}.html' 24 | url_id = 'http://www.icourses.cn/web/sword/portal/shareChapter?cid=' 25 | url_course = 'http://www.icourses.cn/web//sword/portal/getRess' 26 | url_assign = 'http://www.icourses.cn/web/sword/portal/assignments?cid=' 27 | url_paper = 'http://www.icourses.cn/web/sword/portal/testPaper?cid=' 28 | url_source = 'http://www.icourses.cn/web/sword/portal/sharerSource?cid=' 29 | 30 | def __init__(self, mode=IS_MP4|IS_PDF|IS_PAPER|IS_SOURCE): 31 | super().__init__() 32 | self.mode = mode 33 | 34 | def _get_cid(self, url): 35 | self.cid = None 36 | match = courses_re.get('icourse_mooc').match(url) 37 | if match: 38 | cid = match.group(3) or match.group(5) 39 | self.cid = cid 40 | 41 | def _get_title(self): 42 | if not self.cid: 43 | return 44 | self.title = None 45 | url = self.url_title.format(self.cid) 46 | text = request_get(url) 47 | match_name = re.search(r'
\s*

(.*?)

', text) 48 | match_school = re.search(r'学校:\s*

(.*?)

', text) 49 | if match_name and match_school: 50 | title_name = match_name.group(1) + '__' + match_school.group(1) 51 | self.title = winre.sub('', title_name)[:WIN_LENGTH] 52 | 53 | def _get_infos(self): 54 | if not self.cid: 55 | return 56 | self.infos = [] 57 | url1 = self.url_id + self.cid 58 | url2 = self.url_assign + self.cid 59 | text1 = request_get(url1) 60 | text2 = request_get(url2) 61 | chapter_ids = re.findall(r'
  • ', text1) 62 | chapter_names = re.findall(r'([\s\S]*?)', text1) 63 | chapter_ptext = re.findall(r'', text2) 64 | match_str = r'' 65 | re_pdf = re.compile(r'data-class="media"[\s\S]*?data-title="([\s\S]*?)"[\s\S]*?data-url="(.*?)"') 66 | for _id, name in zip(chapter_ids, chapter_names): 67 | self.infos.append({'id': _id, 'name': winre.sub('',name)[:WIN_LENGTH], 'units':[], 'pdfs':[]}) 68 | for index, ptext in chapter_ptext: 69 | inx = int(index)-1 70 | pdfs = re_pdf.findall(ptext) 71 | pdf_list = [{'name':winre.sub('', pdf[0])[:WIN_LENGTH], 'url':pdf[1]} for pdf in pdfs] 72 | self.infos[inx]['pdfs'] = pdf_list 73 | unit_list = re.findall(r'(\d+).\s*?(\d+)(.*?)', text1) 74 | for unit_id,unit_inx1, unit_inx2,unit_name in unit_list: 75 | inx1 = int(unit_inx1)-1 76 | inx2 = int(unit_inx2)-1 77 | self.infos[inx1]['units'].append({'id': unit_id, 'name': winre.sub('',unit_name)[:WIN_LENGTH], 'pdfs':[]}) 78 | m_str = match_str.format(unit_inx1, unit_inx2) 79 | match_ptext = re.search(m_str, text2) 80 | if match_ptext: 81 | ptext = match_ptext.group(1) 82 | pdfs = re_pdf.findall(ptext) 83 | pdf_list = [{'name':winre.sub('', pdf[0])[:WIN_LENGTH], 'url':pdf[1]} for pdf in pdfs] 84 | self.infos[inx1]['units'][inx2]['pdfs'] = pdf_list 85 | 86 | def _get_course_links(self, sid): 87 | mp4_list = [] 88 | pdf_list = [] 89 | data = {'sectionId': sid} 90 | text = request_post(self.url_course, data) 91 | #!!! except json.decoder.JSONDecodeError 92 | infos = json.loads(text) 93 | if infos['model']['listRes'] : 94 | reslist = infos['model']['listRes'] 95 | for res in reslist: 96 | if res['mediaType'] == 'mp4': 97 | if 'fullResUrl' in res: 98 | mp4_list.append((res['fullResUrl'], res['title'])) 99 | elif res['mediaType'] in ('ppt', 'pdf'): 100 | if 'fullResUrl' in res: 101 | pdf_list.append((res['fullResUrl'], res['title'])) 102 | return mp4_list, pdf_list 103 | 104 | def _get_paper_links(self): 105 | url = self.url_paper + self.cid 106 | paper_list = [] 107 | text = request_get(url) 108 | match_text = re.findall(r'', text) 109 | re_url = re.compile(r'data-url="(.*?)"') 110 | re_title = re.compile(r'data-title="(.*?)"') 111 | for m_text in match_text: 112 | link_list = re_url.findall(m_text[0]) 113 | title_list = re_title.findall(m_text[0]) 114 | paper_list += list(zip(link_list, title_list)) 115 | return paper_list 116 | 117 | def _get_source_links(self): 118 | url = self.url_source + self.cid 119 | source_list = [] 120 | text = request_get(url) 121 | match_text = re.findall(r'', text) 122 | re_url = re.compile(r'data-url="(.*?)"') 123 | re_title = re.compile(r'data-title="(.*?)"') 124 | for m_text in match_text: 125 | link_list = re_url.findall(m_text[0]) 126 | title_list = re_title.findall(m_text[0]) 127 | source_list += list(zip(link_list, title_list)) 128 | return source_list 129 | 130 | def _download(self): 131 | print('\n{:^{}s}'.format(self.title, LEN_S)) 132 | self.rootDir = rootDir = os.path.join(PATH, self.title) 133 | if not os.path.exists(rootDir): 134 | os.mkdir(rootDir) 135 | Icourse_Base.potplayer.init(rootDir) 136 | if (self.mode & IS_MP4) or (self.mode & IS_PDF): 137 | courseDir = os.path.join(rootDir, COURSENAME) 138 | if not os.path.exists(courseDir): 139 | os.mkdir(courseDir) 140 | print('-'*LEN_+'下载课程'+'-'*LEN_) 141 | Icourse_Base.potplayer.enable() 142 | for cnt1, info in enumerate(self.infos, 1): 143 | chapter = '{'+str(cnt1)+'}--'+info['name'] 144 | print(chapter) 145 | chapterDir = os.path.join(courseDir, chapter) 146 | if not os.path.exists(chapterDir): 147 | os.mkdir(chapterDir) 148 | mp4_list, pdf_list = self._get_course_links(info['id']) 149 | pdf_list += [(pdf['url'], pdf['name']) for pdf in info['pdfs']] 150 | if self.mode & IS_PDF: 151 | self.download_pdf_list(chapterDir, pdf_list, '{}.'.format(cnt1)) 152 | if self.mode & IS_MP4: 153 | self.download_video_list(chapterDir, mp4_list, '{}.'.format(cnt1)) 154 | for cnt2, unit in enumerate(info['units'],1): 155 | lesson = '{'+str(cnt2)+'}--'+unit['name'] 156 | print(" "+lesson) 157 | lessonDir = os.path.join(chapterDir, lesson) 158 | if not os.path.exists(lessonDir): 159 | os.mkdir(lessonDir) 160 | mp4_list, pdf_list = self._get_course_links(unit['id']) 161 | pdf_list += [(pdf['url'], pdf['name']) for pdf in unit['pdfs']] 162 | if self.mode & IS_PDF: 163 | self.download_pdf_list(lessonDir, pdf_list, '{}.{}.'.format(cnt1,cnt2)) 164 | if self.mode & IS_MP4: 165 | self.download_video_list(lessonDir, mp4_list, '{}.{}.'.format(cnt1,cnt2)) 166 | if self.mode & IS_PAPER: 167 | paperDir = os.path.join(rootDir, PAPERNAME) 168 | if not os.path.exists(paperDir): 169 | os.mkdir(paperDir) 170 | print("-"*LEN_+"下载试卷"+"-"*LEN_) 171 | paper_list = self._get_paper_links() 172 | self.download_pdf_list(paperDir, paper_list) 173 | if self.mode & IS_SOURCE: 174 | sourceDir = os.path.join(rootDir, SOURCENAME) 175 | if not os.path.exists(sourceDir): 176 | os.mkdir(sourceDir) 177 | print("-"*LEN_+"下载资源"+"-"*LEN_) 178 | Icourse_Base.potplayer.disable() 179 | source_list = self._get_source_links() 180 | pdf_list = list(filter(lambda x:x[0].endswith('.pdf'), source_list)) 181 | mp4_list = list(filter(lambda x:x[0].endswith('.mp4'), source_list)) 182 | self.download_pdf_list(sourceDir, pdf_list) 183 | self.download_video_list(sourceDir, mp4_list) 184 | 185 | def set_mode(self): 186 | while True: 187 | try: 188 | instr = input( 189 | " 视频:[1] + 课件:[2] + 试卷:[4] + 资源:[8]\n" 190 | "请输入一个0-15的数选择性下载内容(如15表示全部下载,15=1+2+4+8) [0退出]: " 191 | ) 192 | if not instr: 193 | continue 194 | try: 195 | innum = int(instr) 196 | if innum == 0: 197 | return False 198 | elif 1 <= innum <= 15: 199 | self.mode = innum 200 | return True 201 | else: 202 | print("请输入一个0-15之间的整数!") 203 | continue 204 | except ValueError: 205 | print("请输入一个0-15之间的整数!") 206 | except KeyboardInterrupt: 207 | print() 208 | 209 | 210 | def main(): 211 | # url = 'http://www.icourses.cn/sCourse/course_4860.html' 212 | url = 'http://www.icourses.cn/web/sword/portal/shareDetails?cId=4860#/course/chapter' 213 | # url = 'https://www.icourses.cn/sCourse/course_6661.html' 214 | # url = 'http://www.icourses.cn/sCourse/course_3459.html' 215 | icourse_mooc = Icourse_Mooc() 216 | if (icourse_mooc.set_mode()): 217 | icourse_mooc.prepare(url) 218 | icourse_mooc.download() 219 | 220 | 221 | if __name__ == '__main__': 222 | main() 223 | --------------------------------------------------------------------------------