├── LICENSE ├── README.md ├── mooc.py └── mooc_cookie.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 fichas 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Down_Mooc 2 | 一个基于 Python3 的超星 mooc 课程内容获取工具,方便离线观看。 3 | 4 | 或者 您也可以不用安装python,选用最新的releases:https://github.com/fichas/Down_Mooc/releases 5 | ### 安装 6 | 7 | 请安装最新版的Python3,并且使用 `pip` 安装 3 个库:`requests` `BeautifulSoup4` `lxml` 8 | 9 | ```python 10 | pip install requests BeautifulSoup4 lxml 11 | ``` 12 | 13 | 然后[下载最新程序](https://github.com/fichas/Down_Mooc/archive/master.zip)并解压。(也可以使用`git clone https://github.com/fichas/Down_Mooc.git`) 14 | 15 | ### 使用方法如下 16 | 17 | 1. #### mooc.py(无法下载具有权限的课程) 18 | 19 | 找到你课程的主页,一个形如下方链接格式的网址: 20 | https://mooc1-2.chaoxing.com/course/206751495.html 21 | 然后把程序解压,在cmd中运行 22 | 23 | ```python 24 | python mooc.py https://mooc1-2.chaoxing.com/course/206751495.html 25 | ``` 26 | 27 | 程序会自动在此目录下生成两个文件: 28 | 29 | ```c++ 30 | output.txt //课程的视频链接,需自行下载(有的视频是有权限的,无法下载) 31 | name.bat //将此文件放在视频存放的位置,会把视频的名称自动修正(有的视频有问题,可能老师的命名方式比较奇怪,先鸽着) 32 | ``` 33 | 34 | 2. #### mooc_cookie.py(可以下载已选课程中已经开放的章节,需要获取cookies) 35 | 36 | 在课程中心,打开想要下载的课程,课程链接形式如下: 37 | 38 | https://mooc1-1.chaoxing.com/mycourse/studentcourse?courseId=1234561234&vc=1&clazzid=12345123&enc=1234567123456123456 39 | 40 | 在浏览器中登录后参照[教程链接](https://jingyan.baidu.com/article/0aa2237505193488cd0d647f.html)里的方式可以获取到cookie。 41 | 42 | 43 | 在调用程序获取课程的时候,会自动要求输入 cookies,粘贴便是。 44 | 45 | 46 | 47 | 在解压好的目录下,在cmd中运行 48 | 49 | ```python 50 | python mooc_cookie.py 51 | ``` 52 | 53 | 按照相应的提示输入课程链接和cookies就可以获取到课程内容了。 54 | 55 | 程序会在当前目录下自动生成 56 | 57 | ```c++ 58 | output.txt //课程的视频链接,需自行下载(可以使用IDM或迅雷等工具自行下载) 59 | ``` 60 | 61 | 62 | ~~*碎碎念:好像把clazzid改成一个加1或减1的链接可以获取到未开放的课程?:eyes*~~ 63 | 64 | ### 特别感谢 65 | 66 | [Foair的Course Crawler](https://github.com/Foair/course-crawler) 67 | 68 | ### 声明 69 | 70 | 仅限个人学习和研究使用,切勿用于其他用途。 71 | 72 | 本程序主体功能只是下载课件和附件,无任何手段获得付费课程,也没有以任何方式向任何人收取费用。 73 | 74 | 如果将程序用于商业用途或其他非法用途,一切后果由用户自负。 75 | 76 | ### 许可协议 77 | 78 | 请遵照 MIT 许可使用该程序。 79 | -------------------------------------------------------------------------------- /mooc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import re 3 | import sys 4 | import os 5 | import requests 6 | 7 | headers = {'Referer':'http://d0.ananas.chaoxing.com/','User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'} 8 | 9 | url=sys.argv[1] 10 | #print(url) 11 | 12 | req = requests.get(url, headers=headers) 13 | strr=req.text 14 | 15 | 16 | patt=re.compile(r'[a-zA-z]+://cs.[^\s][^\_\$]*') 17 | res=patt.findall(strr) 18 | 19 | f=open('output.txt','w') 20 | for i in res : 21 | f.write(i) 22 | f.write('\n') 23 | f.close() 24 | patt=re.compile(r']*>(.*?)+]*>(.*?)') 25 | res=patt.findall(strr) 26 | f=open('name.bat','w') 27 | for i in res : 28 | s1=str(i[0]) 29 | s2=str(i[1]) 30 | s1=s1.strip() 31 | s2=s2.strip() 32 | stri='ren '+s1+'.mp4 '+s1+'_'+s2+'.mp4' 33 | f.write(stri) 34 | f.write('\n') 35 | f.close() 36 | 37 | -------------------------------------------------------------------------------- /mooc_cookie.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import requests 4 | import json 5 | import lxml 6 | from time import sleep 7 | import urllib.parse 8 | from bs4 import BeautifulSoup 9 | import sys 10 | import os 11 | 12 | 13 | class ShowProcess(): 14 | """ 15 | 显示处理进度的类 16 | 调用该类相关函数即可实现处理进度的显示 17 | """ 18 | i = 0 19 | max_steps = 0 20 | max_arrow = 50 21 | infoDone = 'done' 22 | 23 | def __init__(self, max_steps, infoDone = 'Done'): 24 | self.max_steps = max_steps 25 | self.i = 0 26 | self.infoDone = infoDone 27 | 28 | def show_process(self, i=None): 29 | if i is not None: 30 | self.i = i 31 | else: 32 | self.i += 1 33 | num_arrow = int(self.i * self.max_arrow / self.max_steps) 34 | num_line = self.max_arrow - num_arrow 35 | percent = self.i * 100.0 / self.max_steps 36 | process_bar = '[' + '>' * num_arrow + '-' * num_line + ']'\ 37 | + '%.2f' % percent + '%' + '\r' 38 | sys.stdout.write(process_bar) 39 | sys.stdout.flush() 40 | if self.i >= self.max_steps: 41 | self.close() 42 | 43 | def close(self): 44 | print('') 45 | print(self.infoDone) 46 | self.i = 0 47 | 48 | 49 | def objectid(strings): 50 | soup = BeautifulSoup(strings, 'lxml') 51 | anss=[] 52 | for i in list(soup.select('script')): 53 | # print(i) 54 | ss = str(i) 55 | if ss.find('function()') != -1: 56 | ii = ss.split('try{')[1] 57 | ii = ii.lstrip() 58 | ii = ii.split('}catch') 59 | ii = ii[0].split('=', 1)[1] 60 | ii = ii.split(';')[0] 61 | ii = str(ii) 62 | try: 63 | text = json.loads(ii) 64 | for dic in text['attachments']: 65 | if dic['property']['module']=='insertdoc'or dic['property']['module']=='insertvideo': 66 | anss.append(dic['property']['objectid']) 67 | except: 68 | anss=[] 69 | return anss 70 | 71 | def get_cookie(strr): 72 | cookie_dict = {} 73 | s = strr.split(';') 74 | for i in s: 75 | key, value = i.lstrip().split('=', 1) 76 | cookie_dict[key] = value 77 | return cookie_dict 78 | 79 | def obj(strings): 80 | soup = BeautifulSoup(strings, 'lxml') 81 | anss = [] 82 | for i in list(soup.select('iframe')): 83 | # print(i) 84 | ss=str(i['data']) 85 | ii=ss 86 | try: 87 | text = json.loads(ii) 88 | anss.append(text['objectid']) 89 | except: 90 | anss = anss 91 | return anss 92 | 93 | if __name__ == '__main__': 94 | print('请输入要抓取的课程链接:') 95 | url=input('> ') 96 | query = dict(urllib.parse.parse_qsl(urllib.parse.urlsplit(url).query)) 97 | courseid=str(query['courseId']) 98 | clazzid=str(query['clazzid']) 99 | knows=[] 100 | ans=[] 101 | url='https://mooc1-1.chaoxing.com/mycourse/studentstudycourselist?courseId='+courseid+'&clazzid='+clazzid 102 | print('请输入cookies:') 103 | strr = input('> ') 104 | cookie_dict=get_cookie(strr) 105 | 106 | headers = { 107 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36' 108 | } 109 | 110 | response = requests.request("GET", url, headers=headers, cookies=cookie_dict) 111 | soup=BeautifulSoup(response.text.encode('utf8'),'lxml') 112 | 113 | for i in soup.select('a'): 114 | ss=str(i['href']) 115 | know=ss.split('(')[1].split(')')[0].split(',')[-1] 116 | know=know[1:-1] 117 | knows.append(know) 118 | 119 | print('获取到 %d 条任务,正在生成下载链接...'%len(knows)) 120 | 121 | process_bar = ShowProcess(len(knows), '下载链接已保存到output.txt') 122 | for i in knows: 123 | process_bar.show_process() 124 | url='http://mooc1-1.chaoxing.com/knowledge/cards?clazzid='+clazzid+'&courseid='+courseid+'&knowledgeid='+str(i) 125 | sleep(0.5) 126 | response = requests.request("GET", url, headers=headers, cookies=cookie_dict) 127 | tmp=obj(response.text.encode('utf8')) 128 | ans+=tmp 129 | 130 | f = open("output.txt", "w") 131 | for i in ans: 132 | f.write('http://cs.ananas.chaoxing.com/download/'+str(i)+'\n') 133 | f.close() 134 | 135 | 136 | 137 | --------------------------------------------------------------------------------