├── Analyzer.py ├── LICENSE ├── README.md ├── config.json ├── example └── res_example.jpg ├── getLessonDict.py ├── jsReader.py └── main.py /Analyzer.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import json 4 | import xlwt 5 | from os import listdir, path, mkdir 6 | 7 | 8 | class Analyzer(): 9 | def __init__(self): 10 | if not path.exists('./res'): 11 | mkdir('res') 12 | self.wb = xlwt.Workbook() 13 | 14 | def writeExcel(self, path, rankingList): 15 | # wb = xlwt.Workbook() 16 | ws = self.wb.add_sheet(path[17:-5].replace(':', '_')) 17 | 18 | for row in range(len(rankingList)): 19 | for column in range(len(rankingList[row])): 20 | ws.write(row, column, rankingList[row][column]) 21 | 22 | # wb.save('./res/res.xls') 23 | 24 | def analyzeJSON(self, path): 25 | with open(path, 'r') as f: 26 | JSON = f.read() 27 | # 编号 名称 教师 sc lc sc/lc 28 | lessonsDict = json.loads(JSON) 29 | rankingList = [] 30 | for k, v in lessonsDict.items(): 31 | if int(v['lc']) != 0: 32 | lessonTuple = ( 33 | k, v['name'], v['teachers'].replace(',', ' '), v['sc'], v['lc'], int(v['sc']) / int(v['lc'])) 34 | rankingList.append(lessonTuple) 35 | rankingList.sort(key=lambda lesson: lesson[-1], reverse=True) # [(),(),(),(),...] 36 | # res = '\n'.join(['{},{},{},{},{},{}'.format(*t) for t in rankingList]) 37 | # res = '课程序号,名称,教师,选课人数,开课名额,选课比例\n' + res 38 | # with open('./res/'+path[7:-5]+'.csv', 'w') as f: 39 | # f.write(res) 40 | return rankingList 41 | 42 | def analyzeAndWriteAll(self): 43 | fileList = [file for file in listdir('./data') if file[:5] == 'data_'] 44 | fileList.sort() 45 | for file in fileList: 46 | path = './data/' + file 47 | self.writeExcel(path, self.analyzeJSON(path)) 48 | self.wb.save('./res/res.xls') 49 | 50 | 51 | analyzer = Analyzer() 52 | 53 | if __name__ == '__main__': 54 | analyzer.analyzeAndWriteAll() 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 CLDXiang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 复旦大学选课 - 课程受欢迎程度排行 2 | ========================== 3 | [![license](https://img.shields.io/github/license/mashape/apistatus.svg)]() 4 | 5 | 6 | 一个基于Requests库的小爬虫,可以爬取当前选课系统中的课程信息(缺省设置仅包括思政、七模、英语及军理),并对爬取到的选课量进行简单处理得到课程受欢迎程度排行。 7 | 8 | ![Res Example](https://github.com/CLDXiang/FDUCourseRanking/blob/master/example/res_example.jpg) 9 | 10 | 11 | ## 依赖 12 | 13 | * Bash(或其它可运行python脚本的工具) 14 | * [Python3](https://www.python.org/downloads/release/python-363/) 15 | * [Requests](http://docs.python-requests.org/zh_CN/latest/user/quickstart.html) 16 | * [xlwt](https://pypi.python.org/pypi/xlwt) 17 | 18 | 推荐使用pip安装Requests和xlwt库 19 | 20 | ```bash 21 | pip3 install requests 22 | pip3 install xlwt 23 | ``` 24 | 25 | ## 快速开始 26 | 27 | #### 下载源码 28 | 29 | ```bash 30 | git clone https://github.com/CLDXiang/FDUCourseRanking.git 31 | cd FDUCourseRanking 32 | ``` 33 | 34 | #### 配置信息 35 | 36 | 进入```config.json```,分别将```username```和```password```的值填写为你的学号和密码(用于登录选课系统),如: 37 | ```json 38 | { 39 | "username": 16302333333, 40 | "password": "LongMayTheSunshine" 41 | } 42 | ``` 43 | 44 | 如果你熟悉(或觉得能看懂)JSON的语法,也可以在```config.json```自行配置想要进行搜索的课程,其中```lessonNo```、```courseCode```、```courseName```分别对应选课界面的课程序号、课程代码和课程名称,填写时注意满足选课系统的搜索条件(如最低字数)。 45 | 46 | #### 爬取课程信息 47 | 48 | ```bash 49 | python3 main.py 50 | ``` 51 | 或使用你自己的工具执行```main.py``` 52 | 53 | ```main.py```会一直运行下去,每一小时爬取一次```config.json```中设置的课程信息,并将其以JSON格式存到```./data```目录下。你可以随时终止进程。 54 | 55 | #### 获取排行 56 | 57 | ```bash 58 | python3 Analyzer.py 59 | ``` 60 | 或使用你自己的工具执行```Analyzer.py``` 61 | 62 | ```Analyzer.py```会自动读取```./data```中所有包含课程信息的JSON文件,并按照设定好的评价指标(缺省设置为选课人数/开课名额)将排行榜写入excel工作表。 63 | 64 | 输出的excel文件```res.xls```位于```./res```目录。每一个工作表代表一次爬取的结果(即```./data```目录中的一个JSON文件)。 65 | 66 | 每一行的内容依次为:课程代码,课程名称,教师,选课人数,开课名额,选课人数/开课名额 67 | 68 | ## 注意 69 | 70 | * 私以为第一轮选课的数据更具有参考价值,故建议仅在第一轮选课期间使用。 71 | * 每一次选课系统重新开放时都会有一些调整,导致代码无法继续使用,我应该会尽快更新代码(不排除弃坑的可能)。 72 | * 如果你不再使用这个脚本,请记得及时删除```config.json```文件中自己的学号密码信息,保护好自己的隐私。 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": , 3 | "password": "", 4 | "searchList": [ 5 | { 6 | "lessonNo": "", 7 | "courseCode": "ENGL1100", 8 | "courseName": "", 9 | "searchType": 0 10 | }, 11 | { 12 | "lessonNo": "", 13 | "courseCode": "PTSS1100", 14 | "courseName": "", 15 | "searchType": 0 16 | }, 17 | { 18 | "lessonNo": "", 19 | "courseCode": "HIST1190", 20 | "courseName": "", 21 | "searchType": 0 22 | }, 23 | { 24 | "lessonNo": "", 25 | "courseCode": "PHIL1190", 26 | "courseName": "", 27 | "searchType": 0 28 | }, 29 | { 30 | "lessonNo": "", 31 | "courseCode": "POLI1190", 32 | "courseName": "", 33 | "searchType": 0 34 | }, 35 | { 36 | "lessonNo": "", 37 | "courseCode": "ECON1190", 38 | "courseName": "", 39 | "searchType": 0 40 | }, 41 | { 42 | "lessonNo": "", 43 | "courseCode": "JOUR1190", 44 | "courseName": "", 45 | "searchType": 0 46 | }, 47 | { 48 | "lessonNo": "", 49 | "courseCode": "LAWS1190", 50 | "courseName": "", 51 | "searchType": 0 52 | }, 53 | { 54 | "lessonNo": "", 55 | "courseCode": "COMP1190", 56 | "courseName": "", 57 | "searchType": 0 58 | }, 59 | { 60 | "lessonNo": "", 61 | "courseCode": "INFO1190", 62 | "courseName": "", 63 | "searchType": 0 64 | }, 65 | { 66 | "lessonNo": "", 67 | "courseCode": "MACR1190", 68 | "courseName": "", 69 | "searchType": 0 70 | }, 71 | { 72 | "lessonNo": "", 73 | "courseCode": "MECH1190", 74 | "courseName": "", 75 | "searchType": 0 76 | }, 77 | { 78 | "lessonNo": "", 79 | "courseCode": "PHYS1190", 80 | "courseName": "", 81 | "searchType": 0 82 | }, 83 | { 84 | "lessonNo": "", 85 | "courseCode": "SOFT1190", 86 | "courseName": "", 87 | "searchType": 0 88 | }, 89 | { 90 | "lessonNo": "", 91 | "courseCode": "TCPH1190", 92 | "courseName": "", 93 | "searchType": 0 94 | }, 95 | { 96 | "lessonNo": "", 97 | "courseCode": "BIOL1190", 98 | "courseName": "", 99 | "searchType": 0 100 | }, 101 | { 102 | "lessonNo": "", 103 | "courseCode": "ENVI1190", 104 | "courseName": "", 105 | "searchType": 0 106 | }, 107 | { 108 | "lessonNo": "", 109 | "courseCode": "PHAR1190", 110 | "courseName": "", 111 | "searchType": 0 112 | }, 113 | { 114 | "lessonNo": "", 115 | "courseCode": "PHPM1190", 116 | "courseName": "", 117 | "searchType": 0 118 | }, 119 | { 120 | "lessonNo": "", 121 | "courseCode": "PTSS1190", 122 | "courseName": "", 123 | "searchType": 0 124 | }, 125 | { 126 | "lessonNo": "", 127 | "courseCode": "FINE1190", 128 | "courseName": "", 129 | "searchType": 0 130 | }, 131 | { 132 | "lessonNo": "", 133 | "courseCode": "MUSE1190", 134 | "courseName": "", 135 | "searchType": 0 136 | }, 137 | { 138 | "lessonNo": "", 139 | "courseCode": "FINE1100", 140 | "courseName": "", 141 | "searchType": 0 142 | }, 143 | { 144 | "lessonNo": "", 145 | "courseCode": "NDEC1100", 146 | "courseName": "", 147 | "searchType": 0 148 | }, 149 | { 150 | "lessonNo": "", 151 | "courseCode": "FORE1100", 152 | "courseName": "", 153 | "searchType": 0 154 | }, 155 | { 156 | "lessonNo": "", 157 | "courseCode": "CHIN1190", 158 | "courseName": "", 159 | "searchType": 0 160 | } 161 | ], 162 | "passKeyword": [ 163 | ] 164 | } -------------------------------------------------------------------------------- /example/res_example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLDXiang/FDUCourseRanking/d9b6ddf3288321c43291ea1c778f8e95ce681ba1/example/res_example.jpg -------------------------------------------------------------------------------- /getLessonDict.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import requests 4 | import json 5 | from time import sleep 6 | from jsReader import js2Dict 7 | 8 | 9 | def getLessonsDict(): 10 | lessonsDict = {} 11 | with open('config.json', 'r') as f: 12 | config = json.load(f) 13 | 14 | session_requests = requests.session() 15 | 16 | username = str(config['username']) 17 | password = str(config['password']) 18 | 19 | # 登录 20 | sleep(5) 21 | print('正在登录选课系统...') 22 | res1 = session_requests.post( 23 | url='http://xk.fudan.edu.cn/xk/login.action', 24 | headers={'Accept-Encoding': 'gzip, deflate', 25 | 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Origin': 'http', 'Referer': 'http', 26 | 'Upgrade-Insecure-Requests': '1', 27 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 28 | 'Content-Type': 'application/x-www-form-urlencoded', 'Accept-Language': 'zh-CN,zh;q=0.8', 29 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36', 30 | 'Host': 'xk.fudan.edu.cn', 'Content-Length': '94'}, 31 | data= 32 | { 33 | 'username': username, 34 | 'password': password, 35 | 'encodedPassword': '', 36 | 'session_locale': 'zh_CN' 37 | } 38 | ) 39 | print('登录成功') 40 | 41 | with open('res1.html', 'wb') as f: 42 | f.write(res1.content) 43 | 44 | sleep(5) 45 | print('正在进入选课前页...') 46 | res2 = session_requests.post( 47 | url='http://xk.fudan.edu.cn/xk/stdElectCourse!innerIndex.action', 48 | headers= 49 | { 50 | 'Host': 'xk.fudan.edu.cn', 51 | 'Connection': 'keep-alive', 52 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3260.0 Safari/537.36', 53 | 'Upgrade-Insecure-Requests': '1', 54 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 55 | 'Accept-Encoding': 'gzip, deflate', 56 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8' 57 | } 58 | ) 59 | print('进入选课前页成功') 60 | 61 | with open('res2.html', 'wb') as f: 62 | f.write(res2.content) 63 | 64 | sleep(5) 65 | print('正在进入选课界面...') 66 | res3 = session_requests.post( 67 | url='http://xk.fudan.edu.cn/xk/stdElectCourse!defaultPage.action', 68 | headers= 69 | { 70 | 'Host': 'xk.fudan.edu.cn', 71 | 'Connection': 'keep-alive', 72 | 'Content-Length': '22', 73 | 'Cache-Control': 'max-age=0', 74 | 'Origin': 'http://xk.fudan.edu.cn', 75 | 'Upgrade-Insecure-Requests': '1', 76 | 'Content-Type': 'application/x-www-form-urlencoded', 77 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3260.0 Safari/537.36', 78 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 79 | 'Referer': 'http://xk.fudan.edu.cn/xk/stdElectCourse!innerIndex.action', 80 | 'Accept-Encoding': 'gzip, deflate', 81 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8' 82 | }, 83 | data={'electionProfile.id': '724'} 84 | ) 85 | print('进入选课界面成功') 86 | 87 | with open('res3.html', 'wb') as f: 88 | f.write(res3.content) 89 | 90 | print('正在请求课程信息...') 91 | 92 | cnt = 0 93 | for search in config['searchList']: 94 | cnt += 1 95 | print('正在请求{} {}/{}'.format(search['courseCode'], str(cnt), str(len(config['searchList'])))) 96 | # 请求课程信息 97 | sleep(5) 98 | res = session_requests.post( 99 | url='http://xk.fudan.edu.cn/xk/stdElectCourse!queryLesson.action?profileId=724', 100 | headers= 101 | {'Host': 'xk.fudan.edu.cn', 102 | 'Connection': 'keep-alive', 103 | 'Content-Length': '39', 104 | 'Accept': '*/*', 105 | 'Origin': 'http://xk.fudan.edu.cn', 106 | 'X-Requested-With': 'XMLHttpRequest', 107 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3212.0 Safari/537.36', 108 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 109 | 'Referer': 'http://xk.fudan.edu.cn/xk/stdElectCourse!defaultPage.action', 110 | 'Accept-Encoding': 'gzip, deflate', 111 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'}, 112 | data={ 113 | 'lessonNo': str(search['lessonNo']), 114 | 'courseCode': str(search['courseCode']), 115 | 'courseName': str(search['courseName']) 116 | } 117 | ) 118 | 119 | with open('res.js', 'wb') as f: 120 | f.write(res.content) 121 | 122 | newLessonsDict = js2Dict() 123 | 124 | for lesson in newLessonsDict: 125 | if not str(search['lessonNo']) in str(newLessonsDict[lesson]['no']): 126 | continue 127 | if not str(search['courseCode']) in str(newLessonsDict[lesson]['code']): 128 | continue 129 | if not str(search['courseName']) in str(newLessonsDict[lesson]['name']): 130 | continue 131 | if any([(keyword in str(newLessonsDict[lesson]['name'])) for keyword in config['passKeyword']]): 132 | continue 133 | else: 134 | lessonsDict[lesson] = newLessonsDict[lesson] 135 | # lessonsDict[lesson]['searchType'] = search['searchType'] 136 | 137 | print('请求课程信息完成') 138 | return lessonsDict 139 | 140 | 141 | if __name__ == '__main__': 142 | print(getLessonsDict()) 143 | -------------------------------------------------------------------------------- /jsReader.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import re 4 | import json 5 | 6 | 7 | def js2Dict(): 8 | key_list = ['id', 'no', 'name', 'code', 'credits', 'courseId', 'examTime', 'startWeek', 'endWeek', 'courseTypeId', 9 | 'courseTypeName', 'courseTypeCode', 'scheduled', 'hasTextBook', 'period', 'weekHour', 'withdrawable', 10 | 'textbooks', 'teachers', 'campusCode', 'campusName', 'remark', 'arrangeInfo', 'weekDay', 'weekState', 11 | 'startUnit', 'endUnit', 'weekStateDigest', 'rooms'] 12 | 13 | with open('res.js', 'r') as f: 14 | text1 = f.readline() 15 | lessonJSONs = re.findall('\[{.+}\]', text1)[0] 16 | text2 = f.readline() 17 | lessonId2Counts = re.findall('{.+}', text2)[0] 18 | 19 | for key in key_list: 20 | lessonJSONs = re.sub(key + ':', '\"' + key + '\":', lessonJSONs) 21 | lessonJSONs = re.sub('\'', '\"', lessonJSONs) 22 | 23 | for key in ['sc', 'lc']: 24 | lessonId2Counts = re.sub(key, '\"' + key + '\"', lessonId2Counts) 25 | lessonId2Counts = re.sub('\'', '\"', lessonId2Counts) 26 | 27 | with open('lessonJSONs.json', 'w') as f: 28 | f.write(lessonJSONs) 29 | 30 | with open('lessonId2Counts.json', 'w') as f: 31 | f.write(lessonId2Counts) 32 | 33 | with open('lessonJSONs.json', 'r') as f: 34 | lessonJSONs = json.load(f) 35 | 36 | with open('lessonId2Counts.json', 'r') as f: 37 | lessonId2Counts = json.load(f) 38 | 39 | for lesson in lessonJSONs: 40 | lesson['sc'] = lessonId2Counts[str(lesson['id'])]['sc'] 41 | lesson['lc'] = lessonId2Counts[str(lesson['id'])]['lc'] 42 | 43 | # key:课程代码 value:课程信息 44 | lessonDict = dict(zip([lesson['no'] for lesson in lessonJSONs], lessonJSONs)) 45 | 46 | return lessonDict 47 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import json 4 | from time import sleep 5 | from datetime import datetime 6 | from getLessonDict import getLessonsDict 7 | from os import path, mkdir 8 | 9 | 10 | def main(test=False): 11 | print('<--选课量统计程序启动-->') 12 | print('版本号v1.0.0') 13 | if not path.exists('./data'): 14 | mkdir('data') 15 | while True: 16 | try: 17 | print(datetime.now(), '开始爬取选课量...') 18 | lessonsDict = getLessonsDict() 19 | lessonsJson = json.dumps(lessonsDict, ensure_ascii=False) 20 | print('正在写入data_{}.json'.format(datetime.now().strftime('%Y-%m-%d_%H:%M'))) 21 | with open('./data/data_{}.json'.format(datetime.now().strftime('%Y-%m-%d_%H:%M')), 'w', 22 | encoding='utf-8') as f: 23 | f.write(lessonsJson) 24 | print(datetime.now(), '写入完成,等待一小时...') 25 | sleep(3600) 26 | except: 27 | sleep(900) 28 | 29 | 30 | if __name__ == '__main__': 31 | main(test=True) 32 | --------------------------------------------------------------------------------