├── .gitnore
├── private.txt
├── license.txt
├── MyOCR.py
├── readme.md
├── LoginUCAS.py
└── main.py


/.gitnore:
--------------------------------------------------------------------------------
1 | .idea
2 | *.pyc
3 | *~
4 | *.swp


--------------------------------------------------------------------------------
/private.txt:
--------------------------------------------------------------------------------
1 | username
2 | password
3 | E:\OneDrive\文档\UCAS
4 | 16-17春季


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 hrwhisper
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MyOCR.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Date    : 2017/2/1
 3 | # @Author  : hrwhisper
 4 | 
 5 | import os
 6 | from sys import exit
 7 | import re
 8 | import subprocess
 9 | from PIL import Image
10 | 
11 | devnull = open(os.devnull, 'w')
12 | cut_size = 1
13 | 
14 | 
15 | def pre_process(func):
16 |     def _wrapper(filename):
17 |         image = Image.open(filename).point(lambda p: 255 if p > 127 else 0).convert("1")
18 |         w, h = image.size
19 |         image = image.crop((cut_size, cut_size, w - cut_size, h - cut_size))
20 |         save_name = filename  # + '1.jpg'
21 |         image.save(save_name)
22 |         try:
23 |             res = func(save_name)
24 |             os.remove(save_name)
25 |             return res
26 |         except FileNotFoundError:
27 |             print('请检查是否安装tesseract-OCR')
28 |             os.remove(save_name)
29 |             os.system("pause")
30 |             exit(1)
31 | 
32 |     return _wrapper
33 | 
34 | 
35 | @pre_process
36 | def image_to_string(img):
37 |     res = subprocess.check_output('tesseract ' + img + ' stdout', stderr=devnull).decode()  # tesseract a.png result
38 |     return (re.subn('\W', '', res.strip()) if res else ('', ''))[0].lower()
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     print(image_to_string('ucas_code1.jpg'))
43 |     print(image_to_string('ucas_code2.jpg'))
44 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # UCAS 课件自动下载
 2 | 
 3 | ## 使用方法
 4 | 
 5 | 两种使用方法。
 6 | 
 7 | ### 小白用户
 8 | 
 9 | 该方法直接运行exe文件，exe文件可以在 https://github.com/youqingxiaozhua/Ucas_course_ppt_auto_download/releases 中下载
10 | 
11 | 修改**private.txt**文件，然后双击运行main.exe即可
12 | 
13 | ps: 
14 | 
15 | - private.txt与main.exe在**同一目录**下即可
16 | 
17 | 
18 | 
19 | ### 高级用户
20 | 
21 | 修改根目录的private文件，然后python main.py即可。
22 | 
23 | 需要全部的环境（包括python），见下方环境要求，以及参考对应的安装方法
24 | 
25 | > 可以设置alias实现快速调用，或者添加计划任务每天自动同步
26 | 
27 | 
28 | ### private文件说明
29 | 
30 | private中，各行表示意义如下：
31 | 
32 | 1. 第一行为登录选课系统的账号
33 | 2. 第二行为密码
34 | 3. 第三行为要保存的路径
35 | 4. 第四行为当前的学期，如16-17春季（没有则全部下载）
36 | 
37 | 
38 | 
39 | ##环境要求
40 | 
41 | - python 3.5.2
42 | - requests 2.11
43 | - BeautifulSoup
44 | - 可选环境：
45 |   - PIL
46 |   - Tesseract-OCR
47 | 
48 | ### 安装方法
49 | - pip install beautifulsoup4
50 | - pip install requests
51 | - pip install Pillow
52 | - 登录网址默认为 http://onestop.ucas.ac.cn/home/index ，如果为这个网站挂了，将使用sep.ucas.as.cn 登录，当你在校外的时候那么需要在安装如下环境以支持验证码识别：
53 |   - Tesseract-OCR
54 |     - windows下安装：http://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-setup-3.05.00dev.exe
55 |       - 安装时候勾选Registry settings
56 |     - Linux  \  MAC OS X安装见 https://github.com/tesseract-ocr/tesseract/wiki
57 | 
58 | 
59 | 
60 | 
61 | ## 其它
62 | 
63 | - 暂时没有android / IOS的计划。
64 | - 建议云盘如OneDrive连用，这样在电脑上下载到OneDrive文件夹中，手机上也可以收到。
65 | - **觉得好用点个star吧~**
66 | 
67 | ## 更新说明
68 | - 2020-2-14 新增升级提示
69 | - 2020-2-14 适配课程网站升级为HTTPS
70 | - 更新适配到2019年秋季
71 | - 新增登陆网址，不用验证码
72 | - 修复因为微软CMD下编码不一致导致程序crash
73 | - 支持最新验证码登录（校内校外不一致）
74 |   - 校内不需要验证码，校外需要
75 | - 多线程下载
76 | - 自定义当前学期，只下载当前学期的课程PPT
77 | - 修复文件夹判断问题（有的老师课件命名没有'.'）
78 | - 添加EXE执行程序（使用 PyInstaller 打包）
79 | - 修复课件名称含有空格导致解析失败问题
80 | - 修复课件里文件夹没有遍历下载的问题
81 | - 修复部分课程给出链接后下载失效(如计算机算法设计与分析,老师给出两个链接)
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/LoginUCAS.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Date    : 2017/2/1
  3 | # @Author  : hrwhisper
  4 | import codecs
  5 | import json
  6 | import os
  7 | import time
  8 | from sys import exit
  9 | import requests
 10 | from MyOCR import image_to_string
 11 | 
 12 | 
 13 | class UserNameOrPasswordError(Exception):
 14 |     pass
 15 | 
 16 | 
 17 | class LoginUCAS(object):
 18 |     def __init__(self, use_onestop=True, vercode_save_name='certCode.jpg'):
 19 |         self.username, self.password = LoginUCAS._read_username_and_password()
 20 |         self.cnt = 0
 21 |         self.__BEAUTIFULSOUPPARSE = 'html5lib'  # or use 'lxml'
 22 |         self.session = requests.session()
 23 |         self.vercode_save_name = vercode_save_name
 24 |         self.use_onestop = use_onestop
 25 |         self._init_login_url()
 26 | 
 27 |     def _init_login_url(self):
 28 |         if self.use_onestop:
 29 |             self._onestop_init()
 30 |         else:
 31 |             self._sep_init()
 32 | 
 33 |     def _onestop_init(self):
 34 |         self.url = {
 35 |             'base_url': 'http://onestop.ucas.ac.cn/home/index',
 36 |             'verification_code': None,
 37 |             'login_url': 'http://onestop.ucas.ac.cn/Ajax/Login/0'
 38 |         }
 39 |         # self.session.get(self.url['base_url'])
 40 |         self.headers = {
 41 |             'Host': 'onestop.ucas.ac.cn',
 42 |             "Connection": "keep-alive",
 43 |             'Referer': 'http://onestop.ucas.ac.cn/home/index',
 44 |             'X-Requested-With': 'XMLHttpRequest',
 45 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36",
 46 |         }
 47 |         self.post_data = {
 48 |             "username": self.username,
 49 |             "password": self.password,
 50 |             "remember": 'checked',
 51 |         }
 52 | 
 53 |     def _sep_init(self):
 54 |         self.url = {
 55 |             'base_url': 'http://sep.ucas.ac.cn/',
 56 |             'verification_code': 'http://sep.ucas.ac.cn/changePic',
 57 |             'login_url': "http://sep.ucas.ac.cn/slogin"
 58 |         }
 59 |         self.headers = {
 60 |             "Host": "sep.ucas.ac.cn",
 61 |             "Connection": "keep-alive",
 62 |             "Upgrade-Insecure-Requests": "1",
 63 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36",
 64 |             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
 65 |             "Accept-Encoding": "gzip, deflate, sdch",
 66 |             "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4",
 67 |         }
 68 |         self.post_data = {
 69 |             "userName": self.username,
 70 |             "pwd": self.password,
 71 |             "sb": "sb",
 72 |             "rememberMe": 1,
 73 |         }
 74 | 
 75 |     @classmethod
 76 |     def _read_username_and_password(cls):
 77 |         with codecs.open('./private.txt', "r", "utf-8") as f:
 78 |             username = password = None
 79 |             for i, line in enumerate(f):
 80 |                 if i == 0:
 81 |                     line = bytes(line.encode('utf-8'))
 82 |                     if line[:3] == codecs.BOM_UTF8:
 83 |                         line = line[3:]
 84 |                     username = line.decode('utf-8').strip()
 85 |                 elif i == 1:
 86 |                     password = line.strip()
 87 |                 else:
 88 |                     break
 89 |         return username, password
 90 | 
 91 |     def _download_verification_code(self):
 92 |         r = self.session.get(self.url['verification_code'], stream=True, headers=self.headers)
 93 |         with open(self.vercode_save_name, 'wb') as f:
 94 |             for chunk in r.iter_content(chunk_size=1024):
 95 |                 if chunk:  # filter out keep-alive new chunks
 96 |                     f.write(chunk)
 97 |                     f.flush()
 98 |         return self.vercode_save_name
 99 | 
100 |     def _need_verification_code(self):
101 |         r = self.session.get(self.url['base_url'])
102 |         return r.text.find('验证码') != -1
103 | 
104 |     def login_sep(self):
105 |         try:
106 |             if not self.cnt:
107 |                 print('Login....' + self.url['base_url'])
108 |             if self.use_onestop:
109 |                 html = self.session.post(
110 |                     self.url['login_url'], data=self.post_data, headers=self.headers).text
111 |                 res = json.loads(html)
112 |                 if not res['f']:
113 |                     raise UserNameOrPasswordError
114 |                 else:
115 |                     html = self.session.get(res['msg']).text
116 |                     print("登录成功 {}".format(self.cnt))
117 |             else:
118 |                 # 登录sep
119 |                 try:
120 |                     if self._need_verification_code():
121 |                         cert_code = image_to_string(self._download_verification_code())
122 |                         while not cert_code or len(cert_code) < 4:
123 |                             cert_code = image_to_string(self._download_verification_code())
124 |                             self.post_data["certCode"] = cert_code
125 |                     html = self.session.post(self.url['login_url'], data=self.post_data, headers=self.headers).text
126 |                     if html.find('密码错误') != -1:
127 |                         raise UserNameOrPasswordError
128 |                     elif html.find('验证码错误') != -1:
129 |                         time.sleep(2)
130 |                         self.cnt += 1
131 |                         return self.login_sep()
132 |                     print("登录成功 {}".format(self.cnt))
133 |                 except requests.exceptions.ConnectionError:
134 |                     print('请检查网络连接')
135 |                     exit(1)
136 |         except UserNameOrPasswordError:
137 |             print('用户名或者密码错误，请检查private文件')
138 |             os.system("pause")
139 |             exit(1)
140 |         except requests.exceptions.ConnectionError:
141 |             self.use_onestop = not self.use_onestop
142 |             self._init_login_url()
143 |             print("login time out, change to " + self.url['base_url'])
144 |             self.cnt += 1
145 |             if self.cnt > 20:
146 |                 print("估计是教务处挂了")
147 |                 exit(1)
148 |             return self.login_sep()
149 |         return self
150 | 
151 | 
152 | if __name__ == '__main__':
153 |     LoginUCAS(True).login_sep()
154 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Date    : 2016/9/9
  3 | # @Author  : hrwhisper
  4 | import codecs
  5 | import json
  6 | import re
  7 | import os
  8 | import multiprocessing
  9 | from multiprocessing.dummy import Pool
 10 | from datetime import datetime
 11 | import urllib.parse
 12 | import requests
 13 | from bs4 import BeautifulSoup
 14 | from LoginUCAS import LoginUCAS
 15 | 
 16 | 
 17 | class UCASCourse(object):
 18 |     def __init__(self, time_out=5, check_version=True):
 19 |         self.__BEAUTIFULSOUPPARSE = 'html.parser'  # or use 'lxml'
 20 |         self.semester = None
 21 |         self.save_base_path, self.semester = UCASCourse._read_info_from_file()
 22 |         self.session = None
 23 |         self.headers = None
 24 |         self._init_session()
 25 |         self.course_list = []
 26 |         self.to_download = []
 27 |         self.lock = multiprocessing.Lock()
 28 |         self._time_out = time_out
 29 |         self.version = '1.4'
 30 |         self.check_version = check_version
 31 | 
 32 |     def _check_version(self):
 33 |         r = requests.get('https://api.github.com/repos/youqingxiaozhua/Ucas_course_ppt_auto_download/releases/latest')
 34 |         github_latest = json.loads(r.text)
 35 |         version = github_latest['name']
 36 |         version_note = github_latest['body']
 37 |         if version != self.version:
 38 |             print('\nA new version (v%s: %s) have been released, please download from this link:' % (version, version_note))
 39 |             print('https://github.com/youqingxiaozhua/Ucas_course_ppt_auto_download/releases\n')
 40 | 
 41 |     def _init_session(self):
 42 |         t = LoginUCAS().login_sep()
 43 |         self.session = t.session
 44 |         self.headers = t.headers
 45 | 
 46 |     @classmethod
 47 |     def _read_info_from_file(cls):
 48 |         with codecs.open('./private.txt', "r", "utf-8") as f:
 49 |             save_base_path = semester = None
 50 |             for i, line in enumerate(f):
 51 |                 if i < 2: continue
 52 |                 if i == 2:
 53 |                     save_base_path = line.strip()
 54 |                 if i == 3:
 55 |                     semester = line.strip()
 56 |         return save_base_path, semester
 57 | 
 58 |     def _get_course_page(self):
 59 |         # 从sep中获取Identity Key来登录课程系统，并获取课程信息
 60 |         url = "http://sep.ucas.ac.cn/portal/site/16/801"
 61 |         r = self.session.get(url, headers=self.headers)
 62 |         url = re.findall(r'<meta http-equiv="refresh" content="0;url=([^"]*)">', r.text)[0]
 63 | 
 64 |         self.headers['Host'] = "course.ucas.ac.cn"
 65 |         html = self.session.get(url, headers=self.headers).text
 66 |         return html
 67 | 
 68 |     def _parse_course_list(self):
 69 |         # 获取课程的所有URL
 70 |         html = self._get_course_page()
 71 |         self.course_list = ['https://course.ucas.ac.cn/portal/site/' + x for x in
 72 |                             re.findall(r'https://course.ucas.ac.cn/portal/site/([\d]+)"', html)]
 73 | 
 74 |     def _get_all_resource_url(self):
 75 |         # 从课程的所有URL中获取对应的所有课件
 76 |         print('读取课件中......')
 77 |         base_url = 'https://course.ucas.ac.cn/access/content/group/'
 78 |         urls = [base_url + x.split('/')[-1] + '/' for x in self.course_list]
 79 |         list(map(self._get_resource_url, urls))
 80 | 
 81 |     def _get_resource_url(self, base_url, _path='', source_name=None):
 82 |         html = self.session.get(base_url, headers=self.headers).text
 83 |         tds = BeautifulSoup(html, self.__BEAUTIFULSOUPPARSE).find_all('li')
 84 |         if not source_name:
 85 |             source_name = BeautifulSoup(html, self.__BEAUTIFULSOUPPARSE).find('h3').text
 86 |             if self.semester and source_name.find(self.semester) == -1: return  # download only current semester
 87 |         res = set()
 88 |         for td in tds:
 89 |             url = td.find('a')
 90 |             if not url: continue
 91 |             url = urllib.parse.unquote(url['href'])
 92 |             if url == '../': continue
 93 |             # if 'Folder' in td.text:  # directory
 94 |             if 'folder' in td.attrs['class']:  # directory
 95 |                 # folder_name = td.text
 96 |                 self._get_resource_url(base_url + url, _path + '/' + url, source_name)
 97 |             if url.startswith('http:__'):  # Fix can't download when given a web link. eg: 计算机算法分析与设计
 98 |                 try:
 99 |                     res.add((self.session.get(base_url + url, headers=self.headers, timeout=self._time_out).url, _path))
100 |                 except requests.exceptions.ReadTimeout:
101 |                     print("Error-----------: ", base_url + url, "添加进下载路径失败,服务器长时间无响应")
102 |                 except requests.exceptions.ConnectionError as e:
103 |                     print("Error-----------: ", base_url + url, "添加进下载路径失败,服务器长时间无响应")
104 |             else:
105 |                 res.add((base_url + url, _path))
106 | 
107 |         for url, _path in res:
108 |             self.to_download.append((source_name, _path, url))
109 | 
110 |     def _start_download(self):
111 |         # 多线程下载
112 |         p = Pool()
113 |         p.map(self._download_file, self.to_download)
114 |         p.close()
115 |         p.join()
116 | 
117 |     def _download_file(self, param):
118 |         # 下载文件
119 |         dic_name, sub_directory, url = param
120 |         save_path = self.save_base_path + '/' + dic_name + '/' + sub_directory
121 |         with self.lock:
122 |             if not os.path.exists(save_path):  # To create directory
123 |                 os.makedirs(save_path)
124 | 
125 |         filename = url.split('/')[-1]
126 |         save_path += '/' + filename
127 |         if not os.path.exists(save_path):  # To prevent download exists files
128 |             try:
129 |                 r = self.session.get(url, stream=True, timeout=self._time_out)
130 |             except requests.exceptions.ReadTimeout as e:
131 |                 print('Error-----------文件下载失败,服务器长时间无响应: ', save_path)
132 |             except requests.exceptions.ConnectionError as e:
133 |                 print('Error-----------文件下载失败,服务器长时间无响应: ', save_path)
134 | 
135 |             try:
136 |                 # HTML file does not have Content Length attr
137 |                 size_mb = int(r.headers.get('Content-Length')) / (1024 ** 2)
138 |             except TypeError:
139 |                 size_mb = 0.33  # html文件直接指定大小 :)
140 |             try:
141 |                 # print('Start download {dic_name}  >> {sub_directory}{filename}  {size_mb:.2f}MB'.format(**locals()))
142 |                 with open(save_path, 'wb') as f:
143 |                     for chunk in r.iter_content(chunk_size=1024):
144 |                         if chunk:  # filter out keep-alive new chunks
145 |                             f.write(chunk)
146 |                             f.flush()
147 |                 print('{dic_name}  >> {sub_directory}{filename}   Download success'.format(**locals()))
148 |             except UnicodeEncodeError:
149 |                 print('{dic_name}  >> {sub_directory} Download a file'.format(**locals()))
150 | 
151 |     def start(self):
152 |         if self.check_version:
153 |             self._check_version()
154 |         self._parse_course_list()
155 |         self._get_all_resource_url()
156 |         self._start_download()
157 | 
158 | 
159 | if __name__ == '__main__':
160 |     base_path = os.path.dirname(os.path.abspath(__file__))
161 |     os.chdir(base_path)
162 |     start = datetime.now()
163 |     s = UCASCourse()
164 |     s.start()
165 |     print('Task complete, total time:', datetime.now() - start)
166 |     os.system("pause")
167 | 


--------------------------------------------------------------------------------