├── .gitnore ├── private.txt ├── license.txt ├── MyOCR.py ├── readme.md ├── LoginUCAS.py └── main.py /.gitnore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.pyc 3 | *~ 4 | *.swp -------------------------------------------------------------------------------- /private.txt: -------------------------------------------------------------------------------- 1 | username 2 | password 3 | E:\OneDrive\文档\UCAS 4 | 16-17春季 -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 hrwhisper 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MyOCR.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2017/2/1 3 | # @Author : hrwhisper 4 | 5 | import os 6 | from sys import exit 7 | import re 8 | import subprocess 9 | from PIL import Image 10 | 11 | devnull = open(os.devnull, 'w') 12 | cut_size = 1 13 | 14 | 15 | def pre_process(func): 16 | def _wrapper(filename): 17 | image = Image.open(filename).point(lambda p: 255 if p > 127 else 0).convert("1") 18 | w, h = image.size 19 | image = image.crop((cut_size, cut_size, w - cut_size, h - cut_size)) 20 | save_name = filename # + '1.jpg' 21 | image.save(save_name) 22 | try: 23 | res = func(save_name) 24 | os.remove(save_name) 25 | return res 26 | except FileNotFoundError: 27 | print('请检查是否安装tesseract-OCR') 28 | os.remove(save_name) 29 | os.system("pause") 30 | exit(1) 31 | 32 | return _wrapper 33 | 34 | 35 | @pre_process 36 | def image_to_string(img): 37 | res = subprocess.check_output('tesseract ' + img + ' stdout', stderr=devnull).decode() # tesseract a.png result 38 | return (re.subn('\W', '', res.strip()) if res else ('', ''))[0].lower() 39 | 40 | 41 | if __name__ == '__main__': 42 | print(image_to_string('ucas_code1.jpg')) 43 | print(image_to_string('ucas_code2.jpg')) 44 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # UCAS 课件自动下载 2 | 3 | ## 使用方法 4 | 5 | 两种使用方法。 6 | 7 | ### 小白用户 8 | 9 | 该方法直接运行exe文件,exe文件可以在 https://github.com/youqingxiaozhua/Ucas_course_ppt_auto_download/releases 中下载 10 | 11 | 修改**private.txt**文件,然后双击运行main.exe即可 12 | 13 | ps: 14 | 15 | - private.txt与main.exe在**同一目录**下即可 16 | 17 | 18 | 19 | ### 高级用户 20 | 21 | 修改根目录的private文件,然后python main.py即可。 22 | 23 | 需要全部的环境(包括python),见下方环境要求,以及参考对应的安装方法 24 | 25 | > 可以设置alias实现快速调用,或者添加计划任务每天自动同步 26 | 27 | 28 | ### private文件说明 29 | 30 | private中,各行表示意义如下: 31 | 32 | 1. 第一行为登录选课系统的账号 33 | 2. 第二行为密码 34 | 3. 第三行为要保存的路径 35 | 4. 第四行为当前的学期,如16-17春季(没有则全部下载) 36 | 37 | 38 | 39 | ##环境要求 40 | 41 | - python 3.5.2 42 | - requests 2.11 43 | - BeautifulSoup 44 | - 可选环境: 45 | - PIL 46 | - Tesseract-OCR 47 | 48 | ### 安装方法 49 | - pip install beautifulsoup4 50 | - pip install requests 51 | - pip install Pillow 52 | - 登录网址默认为 http://onestop.ucas.ac.cn/home/index ,如果为这个网站挂了,将使用sep.ucas.as.cn 登录,当你在校外的时候那么需要在安装如下环境以支持验证码识别: 53 | - Tesseract-OCR 54 | - windows下安装:http://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-setup-3.05.00dev.exe 55 | - 安装时候勾选Registry settings 56 | - Linux \ MAC OS X安装见 https://github.com/tesseract-ocr/tesseract/wiki 57 | 58 | 59 | 60 | 61 | ## 其它 62 | 63 | - 暂时没有android / IOS的计划。 64 | - 建议云盘如OneDrive连用,这样在电脑上下载到OneDrive文件夹中,手机上也可以收到。 65 | - **觉得好用点个star吧~** 66 | 67 | ## 更新说明 68 | - 2020-2-14 新增升级提示 69 | - 2020-2-14 适配课程网站升级为HTTPS 70 | - 更新适配到2019年秋季 71 | - 新增登陆网址,不用验证码 72 | - 修复因为微软CMD下编码不一致导致程序crash 73 | - 支持最新验证码登录(校内校外不一致) 74 | - 校内不需要验证码,校外需要 75 | - 多线程下载 76 | - 自定义当前学期,只下载当前学期的课程PPT 77 | - 修复文件夹判断问题(有的老师课件命名没有'.') 78 | - 添加EXE执行程序(使用 PyInstaller 打包) 79 | - 修复课件名称含有空格导致解析失败问题 80 | - 修复课件里文件夹没有遍历下载的问题 81 | - 修复部分课程给出链接后下载失效(如计算机算法设计与分析,老师给出两个链接) 82 | 83 | 84 | -------------------------------------------------------------------------------- /LoginUCAS.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2017/2/1 3 | # @Author : hrwhisper 4 | import codecs 5 | import json 6 | import os 7 | import time 8 | from sys import exit 9 | import requests 10 | from MyOCR import image_to_string 11 | 12 | 13 | class UserNameOrPasswordError(Exception): 14 | pass 15 | 16 | 17 | class LoginUCAS(object): 18 | def __init__(self, use_onestop=True, vercode_save_name='certCode.jpg'): 19 | self.username, self.password = LoginUCAS._read_username_and_password() 20 | self.cnt = 0 21 | self.__BEAUTIFULSOUPPARSE = 'html5lib' # or use 'lxml' 22 | self.session = requests.session() 23 | self.vercode_save_name = vercode_save_name 24 | self.use_onestop = use_onestop 25 | self._init_login_url() 26 | 27 | def _init_login_url(self): 28 | if self.use_onestop: 29 | self._onestop_init() 30 | else: 31 | self._sep_init() 32 | 33 | def _onestop_init(self): 34 | self.url = { 35 | 'base_url': 'http://onestop.ucas.ac.cn/home/index', 36 | 'verification_code': None, 37 | 'login_url': 'http://onestop.ucas.ac.cn/Ajax/Login/0' 38 | } 39 | # self.session.get(self.url['base_url']) 40 | self.headers = { 41 | 'Host': 'onestop.ucas.ac.cn', 42 | "Connection": "keep-alive", 43 | 'Referer': 'http://onestop.ucas.ac.cn/home/index', 44 | 'X-Requested-With': 'XMLHttpRequest', 45 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36", 46 | } 47 | self.post_data = { 48 | "username": self.username, 49 | "password": self.password, 50 | "remember": 'checked', 51 | } 52 | 53 | def _sep_init(self): 54 | self.url = { 55 | 'base_url': 'http://sep.ucas.ac.cn/', 56 | 'verification_code': 'http://sep.ucas.ac.cn/changePic', 57 | 'login_url': "http://sep.ucas.ac.cn/slogin" 58 | } 59 | self.headers = { 60 | "Host": "sep.ucas.ac.cn", 61 | "Connection": "keep-alive", 62 | "Upgrade-Insecure-Requests": "1", 63 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36", 64 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", 65 | "Accept-Encoding": "gzip, deflate, sdch", 66 | "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4", 67 | } 68 | self.post_data = { 69 | "userName": self.username, 70 | "pwd": self.password, 71 | "sb": "sb", 72 | "rememberMe": 1, 73 | } 74 | 75 | @classmethod 76 | def _read_username_and_password(cls): 77 | with codecs.open('./private.txt', "r", "utf-8") as f: 78 | username = password = None 79 | for i, line in enumerate(f): 80 | if i == 0: 81 | line = bytes(line.encode('utf-8')) 82 | if line[:3] == codecs.BOM_UTF8: 83 | line = line[3:] 84 | username = line.decode('utf-8').strip() 85 | elif i == 1: 86 | password = line.strip() 87 | else: 88 | break 89 | return username, password 90 | 91 | def _download_verification_code(self): 92 | r = self.session.get(self.url['verification_code'], stream=True, headers=self.headers) 93 | with open(self.vercode_save_name, 'wb') as f: 94 | for chunk in r.iter_content(chunk_size=1024): 95 | if chunk: # filter out keep-alive new chunks 96 | f.write(chunk) 97 | f.flush() 98 | return self.vercode_save_name 99 | 100 | def _need_verification_code(self): 101 | r = self.session.get(self.url['base_url']) 102 | return r.text.find('验证码') != -1 103 | 104 | def login_sep(self): 105 | try: 106 | if not self.cnt: 107 | print('Login....' + self.url['base_url']) 108 | if self.use_onestop: 109 | html = self.session.post( 110 | self.url['login_url'], data=self.post_data, headers=self.headers).text 111 | res = json.loads(html) 112 | if not res['f']: 113 | raise UserNameOrPasswordError 114 | else: 115 | html = self.session.get(res['msg']).text 116 | print("登录成功 {}".format(self.cnt)) 117 | else: 118 | # 登录sep 119 | try: 120 | if self._need_verification_code(): 121 | cert_code = image_to_string(self._download_verification_code()) 122 | while not cert_code or len(cert_code) < 4: 123 | cert_code = image_to_string(self._download_verification_code()) 124 | self.post_data["certCode"] = cert_code 125 | html = self.session.post(self.url['login_url'], data=self.post_data, headers=self.headers).text 126 | if html.find('密码错误') != -1: 127 | raise UserNameOrPasswordError 128 | elif html.find('验证码错误') != -1: 129 | time.sleep(2) 130 | self.cnt += 1 131 | return self.login_sep() 132 | print("登录成功 {}".format(self.cnt)) 133 | except requests.exceptions.ConnectionError: 134 | print('请检查网络连接') 135 | exit(1) 136 | except UserNameOrPasswordError: 137 | print('用户名或者密码错误,请检查private文件') 138 | os.system("pause") 139 | exit(1) 140 | except requests.exceptions.ConnectionError: 141 | self.use_onestop = not self.use_onestop 142 | self._init_login_url() 143 | print("login time out, change to " + self.url['base_url']) 144 | self.cnt += 1 145 | if self.cnt > 20: 146 | print("估计是教务处挂了") 147 | exit(1) 148 | return self.login_sep() 149 | return self 150 | 151 | 152 | if __name__ == '__main__': 153 | LoginUCAS(True).login_sep() 154 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Date : 2016/9/9 3 | # @Author : hrwhisper 4 | import codecs 5 | import json 6 | import re 7 | import os 8 | import multiprocessing 9 | from multiprocessing.dummy import Pool 10 | from datetime import datetime 11 | import urllib.parse 12 | import requests 13 | from bs4 import BeautifulSoup 14 | from LoginUCAS import LoginUCAS 15 | 16 | 17 | class UCASCourse(object): 18 | def __init__(self, time_out=5, check_version=True): 19 | self.__BEAUTIFULSOUPPARSE = 'html.parser' # or use 'lxml' 20 | self.semester = None 21 | self.save_base_path, self.semester = UCASCourse._read_info_from_file() 22 | self.session = None 23 | self.headers = None 24 | self._init_session() 25 | self.course_list = [] 26 | self.to_download = [] 27 | self.lock = multiprocessing.Lock() 28 | self._time_out = time_out 29 | self.version = '1.4' 30 | self.check_version = check_version 31 | 32 | def _check_version(self): 33 | r = requests.get('https://api.github.com/repos/youqingxiaozhua/Ucas_course_ppt_auto_download/releases/latest') 34 | github_latest = json.loads(r.text) 35 | version = github_latest['name'] 36 | version_note = github_latest['body'] 37 | if version != self.version: 38 | print('\nA new version (v%s: %s) have been released, please download from this link:' % (version, version_note)) 39 | print('https://github.com/youqingxiaozhua/Ucas_course_ppt_auto_download/releases\n') 40 | 41 | def _init_session(self): 42 | t = LoginUCAS().login_sep() 43 | self.session = t.session 44 | self.headers = t.headers 45 | 46 | @classmethod 47 | def _read_info_from_file(cls): 48 | with codecs.open('./private.txt', "r", "utf-8") as f: 49 | save_base_path = semester = None 50 | for i, line in enumerate(f): 51 | if i < 2: continue 52 | if i == 2: 53 | save_base_path = line.strip() 54 | if i == 3: 55 | semester = line.strip() 56 | return save_base_path, semester 57 | 58 | def _get_course_page(self): 59 | # 从sep中获取Identity Key来登录课程系统,并获取课程信息 60 | url = "http://sep.ucas.ac.cn/portal/site/16/801" 61 | r = self.session.get(url, headers=self.headers) 62 | url = re.findall(r'', r.text)[0] 63 | 64 | self.headers['Host'] = "course.ucas.ac.cn" 65 | html = self.session.get(url, headers=self.headers).text 66 | return html 67 | 68 | def _parse_course_list(self): 69 | # 获取课程的所有URL 70 | html = self._get_course_page() 71 | self.course_list = ['https://course.ucas.ac.cn/portal/site/' + x for x in 72 | re.findall(r'https://course.ucas.ac.cn/portal/site/([\d]+)"', html)] 73 | 74 | def _get_all_resource_url(self): 75 | # 从课程的所有URL中获取对应的所有课件 76 | print('读取课件中......') 77 | base_url = 'https://course.ucas.ac.cn/access/content/group/' 78 | urls = [base_url + x.split('/')[-1] + '/' for x in self.course_list] 79 | list(map(self._get_resource_url, urls)) 80 | 81 | def _get_resource_url(self, base_url, _path='', source_name=None): 82 | html = self.session.get(base_url, headers=self.headers).text 83 | tds = BeautifulSoup(html, self.__BEAUTIFULSOUPPARSE).find_all('li') 84 | if not source_name: 85 | source_name = BeautifulSoup(html, self.__BEAUTIFULSOUPPARSE).find('h3').text 86 | if self.semester and source_name.find(self.semester) == -1: return # download only current semester 87 | res = set() 88 | for td in tds: 89 | url = td.find('a') 90 | if not url: continue 91 | url = urllib.parse.unquote(url['href']) 92 | if url == '../': continue 93 | # if 'Folder' in td.text: # directory 94 | if 'folder' in td.attrs['class']: # directory 95 | # folder_name = td.text 96 | self._get_resource_url(base_url + url, _path + '/' + url, source_name) 97 | if url.startswith('http:__'): # Fix can't download when given a web link. eg: 计算机算法分析与设计 98 | try: 99 | res.add((self.session.get(base_url + url, headers=self.headers, timeout=self._time_out).url, _path)) 100 | except requests.exceptions.ReadTimeout: 101 | print("Error-----------: ", base_url + url, "添加进下载路径失败,服务器长时间无响应") 102 | except requests.exceptions.ConnectionError as e: 103 | print("Error-----------: ", base_url + url, "添加进下载路径失败,服务器长时间无响应") 104 | else: 105 | res.add((base_url + url, _path)) 106 | 107 | for url, _path in res: 108 | self.to_download.append((source_name, _path, url)) 109 | 110 | def _start_download(self): 111 | # 多线程下载 112 | p = Pool() 113 | p.map(self._download_file, self.to_download) 114 | p.close() 115 | p.join() 116 | 117 | def _download_file(self, param): 118 | # 下载文件 119 | dic_name, sub_directory, url = param 120 | save_path = self.save_base_path + '/' + dic_name + '/' + sub_directory 121 | with self.lock: 122 | if not os.path.exists(save_path): # To create directory 123 | os.makedirs(save_path) 124 | 125 | filename = url.split('/')[-1] 126 | save_path += '/' + filename 127 | if not os.path.exists(save_path): # To prevent download exists files 128 | try: 129 | r = self.session.get(url, stream=True, timeout=self._time_out) 130 | except requests.exceptions.ReadTimeout as e: 131 | print('Error-----------文件下载失败,服务器长时间无响应: ', save_path) 132 | except requests.exceptions.ConnectionError as e: 133 | print('Error-----------文件下载失败,服务器长时间无响应: ', save_path) 134 | 135 | try: 136 | # HTML file does not have Content Length attr 137 | size_mb = int(r.headers.get('Content-Length')) / (1024 ** 2) 138 | except TypeError: 139 | size_mb = 0.33 # html文件直接指定大小 :) 140 | try: 141 | # print('Start download {dic_name} >> {sub_directory}{filename} {size_mb:.2f}MB'.format(**locals())) 142 | with open(save_path, 'wb') as f: 143 | for chunk in r.iter_content(chunk_size=1024): 144 | if chunk: # filter out keep-alive new chunks 145 | f.write(chunk) 146 | f.flush() 147 | print('{dic_name} >> {sub_directory}{filename} Download success'.format(**locals())) 148 | except UnicodeEncodeError: 149 | print('{dic_name} >> {sub_directory} Download a file'.format(**locals())) 150 | 151 | def start(self): 152 | if self.check_version: 153 | self._check_version() 154 | self._parse_course_list() 155 | self._get_all_resource_url() 156 | self._start_download() 157 | 158 | 159 | if __name__ == '__main__': 160 | base_path = os.path.dirname(os.path.abspath(__file__)) 161 | os.chdir(base_path) 162 | start = datetime.now() 163 | s = UCASCourse() 164 | s.start() 165 | print('Task complete, total time:', datetime.now() - start) 166 | os.system("pause") 167 | --------------------------------------------------------------------------------