├── LICENSE
├── README.md
├── mooc.py
└── mooc_cookie.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 fichas
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Down_Mooc
 2 | 一个基于 Python3 的超星 mooc 课程内容获取工具，方便离线观看。
 3 | 
 4 | 或者 您也可以不用安装python，选用最新的releases：https://github.com/fichas/Down_Mooc/releases
 5 | ### 安装
 6 | 
 7 | 请安装最新版的Python3，并且使用 `pip` 安装 3 个库：`requests` `BeautifulSoup4` `lxml`
 8 | 
 9 | ```python
10 | pip install requests BeautifulSoup4 lxml
11 | ```
12 | 
13 | 然后[下载最新程序](https://github.com/fichas/Down_Mooc/archive/master.zip)并解压。(也可以使用`git clone https://github.com/fichas/Down_Mooc.git`)
14 | 
15 | ### 使用方法如下
16 | 
17 | 1. ####  mooc.py(无法下载具有权限的课程)
18 | 
19 |    找到你课程的主页，一个形如下方链接格式的网址：
20 |    https://mooc1-2.chaoxing.com/course/206751495.html
21 |    然后把程序解压，在cmd中运行  
22 | 
23 |    ```python
24 |     python mooc.py https://mooc1-2.chaoxing.com/course/206751495.html
25 |    ```
26 | 
27 |    程序会自动在此目录下生成两个文件：
28 | 
29 |    ```c++
30 |    output.txt //课程的视频链接，需自行下载(有的视频是有权限的，无法下载)
31 |    name.bat //将此文件放在视频存放的位置，会把视频的名称自动修正(有的视频有问题，可能老师的命名方式比较奇怪，先鸽着)
32 |    ```
33 | 
34 | 2. #### mooc_cookie.py(可以下载已选课程中已经开放的章节，需要获取cookies)
35 | 
36 |    在课程中心，打开想要下载的课程，课程链接形式如下：
37 | 
38 |    https://mooc1-1.chaoxing.com/mycourse/studentcourse?courseId=1234561234&vc=1&clazzid=12345123&enc=1234567123456123456
39 | 
40 |    在浏览器中登录后参照[教程链接](https://jingyan.baidu.com/article/0aa2237505193488cd0d647f.html)里的方式可以获取到cookie。
41 |    
42 | 
43 |    在调用程序获取课程的时候，会自动要求输入 cookies，粘贴便是。
44 | 
45 |    
46 | 
47 |    在解压好的目录下，在cmd中运行
48 | 
49 |    ```python
50 |    python mooc_cookie.py
51 |    ```
52 | 
53 |    按照相应的提示输入课程链接和cookies就可以获取到课程内容了。
54 | 
55 |    程序会在当前目录下自动生成
56 | 
57 |    ```c++
58 |    output.txt //课程的视频链接，需自行下载(可以使用IDM或迅雷等工具自行下载)
59 |    ```
60 |    
61 | 
62 | ~~*碎碎念：好像把clazzid改成一个加1或减1的链接可以获取到未开放的课程？:eyes*~~
63 | 
64 | ### 特别感谢
65 | 
66 | [Foair的Course Crawler](https://github.com/Foair/course-crawler)
67 | 
68 | ### 声明
69 | 
70 | 仅限个人学习和研究使用，切勿用于其他用途。
71 | 
72 | 本程序主体功能只是下载课件和附件，无任何手段获得付费课程，也没有以任何方式向任何人收取费用。
73 | 
74 | 如果将程序用于商业用途或其他非法用途，一切后果由用户自负。
75 | 
76 | ### 许可协议
77 | 
78 | 请遵照 MIT 许可使用该程序。
79 | 


--------------------------------------------------------------------------------
/mooc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import re
 3 | import sys
 4 | import os
 5 | import requests
 6 | 
 7 | headers = {'Referer':'http://d0.ananas.chaoxing.com/','User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}  
 8 | 
 9 | url=sys.argv[1]
10 | #print(url)
11 | 
12 | req = requests.get(url, headers=headers)
13 | strr=req.text
14 | 
15 | 
16 | patt=re.compile(r'[a-zA-z]+://cs.[^\s][^\_\$]*')
17 | res=patt.findall(strr)
18 | 
19 | f=open('output.txt','w')
20 | for i in res :
21 |     f.write(i)
22 |     f.write('\n')
23 | f.close()
24 | patt=re.compile(r'<i[^>]*>(.*?)</i>+<a[^>]*>(.*?)</a>')
25 | res=patt.findall(strr)
26 | f=open('name.bat','w')
27 | for i in res :
28 |     s1=str(i[0])
29 |     s2=str(i[1])
30 |     s1=s1.strip()
31 |     s2=s2.strip()
32 |     stri='ren '+s1+'.mp4 '+s1+'_'+s2+'.mp4'
33 |     f.write(stri)
34 |     f.write('\n')
35 | f.close()
36 | 
37 | 


--------------------------------------------------------------------------------
/mooc_cookie.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import requests
  4 | import json
  5 | import lxml
  6 | from time import sleep
  7 | import urllib.parse
  8 | from bs4 import BeautifulSoup
  9 | import sys
 10 | import os
 11 | 
 12 | 
 13 | class ShowProcess():
 14 |     """
 15 |     显示处理进度的类
 16 |     调用该类相关函数即可实现处理进度的显示
 17 |     """
 18 |     i = 0
 19 |     max_steps = 0
 20 |     max_arrow = 50
 21 |     infoDone = 'done'
 22 | 
 23 |     def __init__(self, max_steps, infoDone = 'Done'):
 24 |         self.max_steps = max_steps
 25 |         self.i = 0
 26 |         self.infoDone = infoDone
 27 | 
 28 |     def show_process(self, i=None):
 29 |         if i is not None:
 30 |             self.i = i
 31 |         else:
 32 |             self.i += 1
 33 |         num_arrow = int(self.i * self.max_arrow / self.max_steps)
 34 |         num_line = self.max_arrow - num_arrow
 35 |         percent = self.i * 100.0 / self.max_steps
 36 |         process_bar = '[' + '>' * num_arrow + '-' * num_line + ']'\
 37 |                       + '%.2f' % percent + '%' + '\r'
 38 |         sys.stdout.write(process_bar)
 39 |         sys.stdout.flush()
 40 |         if self.i >= self.max_steps:
 41 |             self.close()
 42 | 
 43 |     def close(self):
 44 |         print('')
 45 |         print(self.infoDone)
 46 |         self.i = 0
 47 | 
 48 | 
 49 | def objectid(strings):
 50 |     soup = BeautifulSoup(strings, 'lxml')
 51 |     anss=[]
 52 |     for i in list(soup.select('script')):
 53 |         #    print(i)
 54 |         ss = str(i)
 55 |         if ss.find('function()') != -1:
 56 |             ii = ss.split('try{')[1]
 57 |             ii = ii.lstrip()
 58 |             ii = ii.split('}catch')
 59 |             ii = ii[0].split('=', 1)[1]
 60 |             ii = ii.split(';')[0]
 61 |             ii = str(ii)
 62 |             try:
 63 |                 text = json.loads(ii)
 64 |                 for dic in text['attachments']:
 65 |                     if dic['property']['module']=='insertdoc'or dic['property']['module']=='insertvideo':
 66 |                         anss.append(dic['property']['objectid'])
 67 |             except:
 68 |                 anss=[]
 69 |     return anss
 70 | 
 71 | def get_cookie(strr):
 72 |     cookie_dict = {}
 73 |     s = strr.split(';')
 74 |     for i in s:
 75 |         key, value = i.lstrip().split('=', 1)
 76 |         cookie_dict[key] = value
 77 |     return cookie_dict
 78 | 
 79 | def obj(strings):
 80 |     soup = BeautifulSoup(strings, 'lxml')
 81 |     anss = []
 82 |     for i in list(soup.select('iframe')):
 83 |         #    print(i)
 84 |         ss=str(i['data'])
 85 |         ii=ss
 86 |         try:
 87 |             text = json.loads(ii)
 88 |             anss.append(text['objectid'])
 89 |         except:
 90 |             anss = anss
 91 |     return anss
 92 | 
 93 | if __name__ == '__main__':
 94 |     print('请输入要抓取的课程链接:')
 95 |     url=input('> ')
 96 |     query = dict(urllib.parse.parse_qsl(urllib.parse.urlsplit(url).query))
 97 |     courseid=str(query['courseId'])
 98 |     clazzid=str(query['clazzid'])
 99 |     knows=[]
100 |     ans=[]
101 |     url='https://mooc1-1.chaoxing.com/mycourse/studentstudycourselist?courseId='+courseid+'&clazzid='+clazzid
102 |     print('请输入cookies:')
103 |     strr = input('> ')
104 |     cookie_dict=get_cookie(strr)
105 | 
106 |     headers = {
107 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36'
108 |     }
109 | 
110 |     response = requests.request("GET", url, headers=headers, cookies=cookie_dict)
111 |     soup=BeautifulSoup(response.text.encode('utf8'),'lxml')
112 | 
113 |     for i in soup.select('a'):
114 |         ss=str(i['href'])
115 |         know=ss.split('(')[1].split(')')[0].split(',')[-1]
116 |         know=know[1:-1]
117 |         knows.append(know)
118 | 
119 |     print('获取到 %d 条任务,正在生成下载链接...'%len(knows))
120 | 
121 |     process_bar = ShowProcess(len(knows), '下载链接已保存到output.txt')
122 |     for i in knows:
123 |         process_bar.show_process()
124 |         url='http://mooc1-1.chaoxing.com/knowledge/cards?clazzid='+clazzid+'&courseid='+courseid+'&knowledgeid='+str(i)
125 |         sleep(0.5)
126 |         response = requests.request("GET", url, headers=headers, cookies=cookie_dict)
127 |         tmp=obj(response.text.encode('utf8'))
128 |         ans+=tmp
129 | 
130 |     f = open("output.txt", "w")
131 |     for i in ans:
132 |         f.write('http://cs.ananas.chaoxing.com/download/'+str(i)+'\n')
133 |     f.close()
134 | 
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------