├── LICENSE
├── README.md
├── mooc.py
└── mooc_cookie.py
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 fichas
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Down_Mooc
2 | 一个基于 Python3 的超星 mooc 课程内容获取工具,方便离线观看。
3 |
4 | 或者 您也可以不用安装python,选用最新的releases:https://github.com/fichas/Down_Mooc/releases
5 | ### 安装
6 |
7 | 请安装最新版的Python3,并且使用 `pip` 安装 3 个库:`requests` `BeautifulSoup4` `lxml`
8 |
9 | ```python
10 | pip install requests BeautifulSoup4 lxml
11 | ```
12 |
13 | 然后[下载最新程序](https://github.com/fichas/Down_Mooc/archive/master.zip)并解压。(也可以使用`git clone https://github.com/fichas/Down_Mooc.git`)
14 |
15 | ### 使用方法如下
16 |
17 | 1. #### mooc.py(无法下载具有权限的课程)
18 |
19 | 找到你课程的主页,一个形如下方链接格式的网址:
20 | https://mooc1-2.chaoxing.com/course/206751495.html
21 | 然后把程序解压,在cmd中运行
22 |
23 | ```python
24 | python mooc.py https://mooc1-2.chaoxing.com/course/206751495.html
25 | ```
26 |
27 | 程序会自动在此目录下生成两个文件:
28 |
29 | ```c++
30 | output.txt //课程的视频链接,需自行下载(有的视频是有权限的,无法下载)
31 | name.bat //将此文件放在视频存放的位置,会把视频的名称自动修正(有的视频有问题,可能老师的命名方式比较奇怪,先鸽着)
32 | ```
33 |
34 | 2. #### mooc_cookie.py(可以下载已选课程中已经开放的章节,需要获取cookies)
35 |
36 | 在课程中心,打开想要下载的课程,课程链接形式如下:
37 |
38 | https://mooc1-1.chaoxing.com/mycourse/studentcourse?courseId=1234561234&vc=1&clazzid=12345123&enc=1234567123456123456
39 |
40 | 在浏览器中登录后参照[教程链接](https://jingyan.baidu.com/article/0aa2237505193488cd0d647f.html)里的方式可以获取到cookie。
41 |
42 |
43 | 在调用程序获取课程的时候,会自动要求输入 cookies,粘贴便是。
44 |
45 |
46 |
47 | 在解压好的目录下,在cmd中运行
48 |
49 | ```python
50 | python mooc_cookie.py
51 | ```
52 |
53 | 按照相应的提示输入课程链接和cookies就可以获取到课程内容了。
54 |
55 | 程序会在当前目录下自动生成
56 |
57 | ```c++
58 | output.txt //课程的视频链接,需自行下载(可以使用IDM或迅雷等工具自行下载)
59 | ```
60 |
61 |
62 | ~~*碎碎念:好像把clazzid改成一个加1或减1的链接可以获取到未开放的课程?:eyes*~~
63 |
64 | ### 特别感谢
65 |
66 | [Foair的Course Crawler](https://github.com/Foair/course-crawler)
67 |
68 | ### 声明
69 |
70 | 仅限个人学习和研究使用,切勿用于其他用途。
71 |
72 | 本程序主体功能只是下载课件和附件,无任何手段获得付费课程,也没有以任何方式向任何人收取费用。
73 |
74 | 如果将程序用于商业用途或其他非法用途,一切后果由用户自负。
75 |
76 | ### 许可协议
77 |
78 | 请遵照 MIT 许可使用该程序。
79 |
--------------------------------------------------------------------------------
/mooc.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import re
3 | import sys
4 | import os
5 | import requests
6 |
7 | headers = {'Referer':'http://d0.ananas.chaoxing.com/','User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}
8 |
9 | url=sys.argv[1]
10 | #print(url)
11 |
12 | req = requests.get(url, headers=headers)
13 | strr=req.text
14 |
15 |
16 | patt=re.compile(r'[a-zA-z]+://cs.[^\s][^\_\$]*')
17 | res=patt.findall(strr)
18 |
19 | f=open('output.txt','w')
20 | for i in res :
21 | f.write(i)
22 | f.write('\n')
23 | f.close()
24 | patt=re.compile(r']*>(.*?)+]*>(.*?)')
25 | res=patt.findall(strr)
26 | f=open('name.bat','w')
27 | for i in res :
28 | s1=str(i[0])
29 | s2=str(i[1])
30 | s1=s1.strip()
31 | s2=s2.strip()
32 | stri='ren '+s1+'.mp4 '+s1+'_'+s2+'.mp4'
33 | f.write(stri)
34 | f.write('\n')
35 | f.close()
36 |
37 |
--------------------------------------------------------------------------------
/mooc_cookie.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | import requests
4 | import json
5 | import lxml
6 | from time import sleep
7 | import urllib.parse
8 | from bs4 import BeautifulSoup
9 | import sys
10 | import os
11 |
12 |
13 | class ShowProcess():
14 | """
15 | 显示处理进度的类
16 | 调用该类相关函数即可实现处理进度的显示
17 | """
18 | i = 0
19 | max_steps = 0
20 | max_arrow = 50
21 | infoDone = 'done'
22 |
23 | def __init__(self, max_steps, infoDone = 'Done'):
24 | self.max_steps = max_steps
25 | self.i = 0
26 | self.infoDone = infoDone
27 |
28 | def show_process(self, i=None):
29 | if i is not None:
30 | self.i = i
31 | else:
32 | self.i += 1
33 | num_arrow = int(self.i * self.max_arrow / self.max_steps)
34 | num_line = self.max_arrow - num_arrow
35 | percent = self.i * 100.0 / self.max_steps
36 | process_bar = '[' + '>' * num_arrow + '-' * num_line + ']'\
37 | + '%.2f' % percent + '%' + '\r'
38 | sys.stdout.write(process_bar)
39 | sys.stdout.flush()
40 | if self.i >= self.max_steps:
41 | self.close()
42 |
43 | def close(self):
44 | print('')
45 | print(self.infoDone)
46 | self.i = 0
47 |
48 |
49 | def objectid(strings):
50 | soup = BeautifulSoup(strings, 'lxml')
51 | anss=[]
52 | for i in list(soup.select('script')):
53 | # print(i)
54 | ss = str(i)
55 | if ss.find('function()') != -1:
56 | ii = ss.split('try{')[1]
57 | ii = ii.lstrip()
58 | ii = ii.split('}catch')
59 | ii = ii[0].split('=', 1)[1]
60 | ii = ii.split(';')[0]
61 | ii = str(ii)
62 | try:
63 | text = json.loads(ii)
64 | for dic in text['attachments']:
65 | if dic['property']['module']=='insertdoc'or dic['property']['module']=='insertvideo':
66 | anss.append(dic['property']['objectid'])
67 | except:
68 | anss=[]
69 | return anss
70 |
71 | def get_cookie(strr):
72 | cookie_dict = {}
73 | s = strr.split(';')
74 | for i in s:
75 | key, value = i.lstrip().split('=', 1)
76 | cookie_dict[key] = value
77 | return cookie_dict
78 |
79 | def obj(strings):
80 | soup = BeautifulSoup(strings, 'lxml')
81 | anss = []
82 | for i in list(soup.select('iframe')):
83 | # print(i)
84 | ss=str(i['data'])
85 | ii=ss
86 | try:
87 | text = json.loads(ii)
88 | anss.append(text['objectid'])
89 | except:
90 | anss = anss
91 | return anss
92 |
93 | if __name__ == '__main__':
94 | print('请输入要抓取的课程链接:')
95 | url=input('> ')
96 | query = dict(urllib.parse.parse_qsl(urllib.parse.urlsplit(url).query))
97 | courseid=str(query['courseId'])
98 | clazzid=str(query['clazzid'])
99 | knows=[]
100 | ans=[]
101 | url='https://mooc1-1.chaoxing.com/mycourse/studentstudycourselist?courseId='+courseid+'&clazzid='+clazzid
102 | print('请输入cookies:')
103 | strr = input('> ')
104 | cookie_dict=get_cookie(strr)
105 |
106 | headers = {
107 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36'
108 | }
109 |
110 | response = requests.request("GET", url, headers=headers, cookies=cookie_dict)
111 | soup=BeautifulSoup(response.text.encode('utf8'),'lxml')
112 |
113 | for i in soup.select('a'):
114 | ss=str(i['href'])
115 | know=ss.split('(')[1].split(')')[0].split(',')[-1]
116 | know=know[1:-1]
117 | knows.append(know)
118 |
119 | print('获取到 %d 条任务,正在生成下载链接...'%len(knows))
120 |
121 | process_bar = ShowProcess(len(knows), '下载链接已保存到output.txt')
122 | for i in knows:
123 | process_bar.show_process()
124 | url='http://mooc1-1.chaoxing.com/knowledge/cards?clazzid='+clazzid+'&courseid='+courseid+'&knowledgeid='+str(i)
125 | sleep(0.5)
126 | response = requests.request("GET", url, headers=headers, cookies=cookie_dict)
127 | tmp=obj(response.text.encode('utf8'))
128 | ans+=tmp
129 |
130 | f = open("output.txt", "w")
131 | for i in ans:
132 | f.write('http://cs.ananas.chaoxing.com/download/'+str(i)+'\n')
133 | f.close()
134 |
135 |
136 |
137 |
--------------------------------------------------------------------------------