├── .github └── workflows │ └── unittest.yml ├── .gitignore ├── LICENSE ├── README.md ├── client.py ├── config.py ├── crypto.py ├── exceptions.py ├── format_tool.py ├── main.py ├── patterns.py ├── requirements.txt ├── sso.py └── test.py /.github/workflows/unittest.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: [ main ] 9 | pull_request: 10 | branches: [ main ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python 3.10 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: "3.10" 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 27 | - name: Test with unittest 28 | run: | 29 | python test.py 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | config.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Dr-Bluemond 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BuaaBykcCrawler 2 | 北航博雅课程 Python + requests 爬虫接口。 3 | 4 | 提供最基本的登录,查询,选课,退选接口。 5 | 6 | 你可以在此之上开发自己的程序。 7 | 8 | 2022年9月6日更新,适配了新的接口协议。 9 | 10 | 2022年3月3日更新,适配了新的加密模式。 11 | 12 | # Usage(demo) 13 | 14 | 1. 安装 python 15 | 16 | 2. 安装依赖包,在终端输入以下命令,如果出现报错请咨询你的计算机专业的朋友。 17 | 18 | ``` 19 | pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple 20 | ``` 21 | 22 | 3. 输入以下指令,运行bykc.py。第一次运行会生成一个config.json并直接退出。 23 | 24 | ``` 25 | python3 main.py 26 | ``` 27 | 28 | 4. 填写config.json中的username和password字段。 29 | 30 | 5. 重新运行main.py。 31 | -------------------------------------------------------------------------------- /client.py: -------------------------------------------------------------------------------- 1 | import binascii 2 | import datetime 3 | import json 4 | import time 5 | import warnings 6 | from typing import Optional, overload 7 | 8 | import requests 9 | 10 | import patterns 11 | from config import root, ua, Config 12 | from exceptions import ApiError, LoginError, AlreadyChosen, FailedToChoose, FailedToDelChosen 13 | from sso import SsoApi 14 | from crypto import * 15 | 16 | 17 | class Client: 18 | def __init__(self, config: Config): 19 | self.config: Config = config 20 | self.session: Optional[requests.Session] = None 21 | self.token: Optional[str] = None 22 | 23 | def soft_login(self): 24 | """ 25 | first try to login with token that is stored in config file, if failed, login with username and password 26 | """ 27 | if self.config.token: 28 | self.token = self.config.token 29 | if self.session is None: 30 | self.session = requests.Session() 31 | try: 32 | result = self.get_user_profile() 33 | if result['employeeId'] == self.config.username: 34 | print("soft login success") 35 | return True 36 | except Exception: 37 | pass 38 | return self.login() 39 | 40 | def login(self): 41 | """ 42 | login through sso 43 | """ 44 | if self.session is None: 45 | self.session = requests.Session() 46 | url = root + "/casLogin" 47 | ticket_url = SsoApi(self.session, self.config.username, self.config.password).login_sso(url) 48 | url = ticket_url 49 | while True: 50 | resp = self.session.get(url, allow_redirects=False) # manually redirect 51 | searching_token = patterns.token.search(url) 52 | if searching_token: 53 | self.token = searching_token.group(1) 54 | print('login success') 55 | self.config.token = self.token 56 | break 57 | elif resp.status_code in [301, 302]: 58 | url = resp.headers['Location'] 59 | continue 60 | else: 61 | raise LoginError 62 | 63 | def logout(self): 64 | """ 65 | clear session and logout 66 | """ 67 | self.session.close() 68 | self.token = None 69 | self.session = None 70 | 71 | def _call_api(self, api_name: str, data: dict): 72 | """ 73 | an intermediate method to call api which deals with crypto and auth 74 | :param api_name: could be found in `app.js`. To find this file, open the network tab in devtools, refresh the page 75 | :param data: could also be found in `app.js` 76 | :return: raw data returned by the api 77 | """ 78 | if self.session is None: 79 | raise LoginError("you must call `login` or `soft_login` before calling other apis") 80 | url = root + '/' + api_name 81 | data_str = json.dumps(data).encode() 82 | aes_key = generate_aes_key() 83 | ak = rsa_encrypt(aes_key) 84 | data_sign = sign(data_str) 85 | sk = rsa_encrypt(data_sign) 86 | ts = str(int(time.time() * 1000)) 87 | 88 | data_encrypted = base64.b64encode(aes_encrypt(data_str, aes_key)) 89 | headers = { 90 | 'Content-Type': 'application/json;charset=utf-8', 91 | 'User-Agent': ua, 92 | 'auth_token': self.token, 93 | 'authtoken': self.token, 94 | 'ak': ak.decode(), 95 | 'sk': sk.decode(), 96 | 'ts': ts, 97 | } 98 | 99 | resp = self.session.post(url, data=data_encrypted, headers=headers) 100 | text = resp.content 101 | if resp.status_code != 200: 102 | raise ApiError(f"server panics with http status code: {resp.status_code}") 103 | try: 104 | message_decode_b64 = base64.b64decode(text) 105 | except binascii.Error: 106 | raise ApiError(f"unable to parse response: {text}") 107 | 108 | try: 109 | api_resp = json.loads(aes_decrypt(message_decode_b64, aes_key)) 110 | except ValueError: 111 | raise LoginError("failed to decrypt response, it's usually because your login has expired") 112 | 113 | if api_resp['status'] != '0': 114 | if api_resp['errmsg'].find('已报名过该课程,请不要重复报名') >= 0: 115 | raise AlreadyChosen("已报名过该课程,请不要重复报名") 116 | if api_resp['errmsg'].find('选课失败,该课程不可选择') >= 0: 117 | raise FailedToChoose('选课失败,该课程不可选择') 118 | if api_resp['errmsg'].find('报名失败,该课程人数已满!') >= 0: 119 | raise FailedToChoose("报名失败,该课程人数已满!") 120 | if api_resp['errmsg'].find('退选失败,未找到退选课程或已超过退选时间') >= 0: 121 | raise FailedToDelChosen("退选失败,未找到退选课程或已超过退选时间") 122 | print(api_resp) 123 | raise ApiError(f"server returns a non zero api status code: {api_resp['status']}") 124 | return api_resp['data'] 125 | 126 | def get_user_profile(self): 127 | """ 128 | get your profile 129 | :return: an object contains your profile 130 | """ 131 | result = self._call_api('getUserProfile', {}) 132 | return result 133 | 134 | def query_student_semester_course_by_page(self, page_number: int, page_size: int): 135 | """ 136 | query all recent courses 137 | :param page_number: page number 138 | :param page_size: page size 139 | :return: an object contains a list of courses and a total count 140 | """ 141 | result = self._call_api('queryStudentSemesterCourseByPage', {'pageNumber': page_number, 'pageSize': page_size}) 142 | return result 143 | 144 | def query_fore_course(self): 145 | warnings.warn('this api is not officially supported by bykc system, \n' 146 | 'use query_student_semester_course_by_page instead', DeprecationWarning) 147 | result = self._call_api('queryForeCourse', {}) 148 | return result 149 | 150 | def query_selectable_course(self): 151 | warnings.warn('this api is not officially supported by bykc system, \n' 152 | 'use query_student_semester_course_by_page instead', DeprecationWarning) 153 | result = self._call_api('querySelectableCourse', {}) 154 | return result 155 | 156 | def get_all_config(self): 157 | """ 158 | :return: all config contains campus, college, role, semester, term 159 | """ 160 | result = self._call_api('getAllConfig', {}) 161 | return result 162 | 163 | @overload 164 | def query_chosen_course(self): 165 | """ 166 | :return: the chosen courses of current semester 167 | """ 168 | ... 169 | 170 | @overload 171 | def query_chosen_course(self, semester_id: int): 172 | """ 173 | :param semester_id: the semester id, could be obtained from `get_all_config` 174 | :return: the chosen courses of the semester 175 | """ 176 | ... 177 | 178 | @overload 179 | def query_chosen_course(self, start_date: datetime.datetime, end_date: datetime.datetime): 180 | """ 181 | :param start_date: the start date to query 182 | :param end_date: the end date to query 183 | :return: the chosen courses in the period 184 | """ 185 | ... 186 | 187 | def query_chosen_course(self, arg0=None, arg1=None): # get chosen courses in the specified time range 188 | if arg0 is None: 189 | all_config = self.get_all_config() 190 | semester_start_date = all_config['semester'][0]['semesterStartDate'] 191 | semester_end_date = all_config['semester'][0]['semesterEndDate'] 192 | data = { 193 | "startDate": semester_start_date, 194 | "endDate": semester_end_date, 195 | } 196 | elif isinstance(arg0, int): 197 | all_config = self.get_all_config() 198 | semester = None 199 | for s in all_config['semester']: 200 | if s['id'] == arg0: 201 | semester = s 202 | break 203 | if semester is None: 204 | raise ValueError(f"no such semester: {arg0}") 205 | semester_start_date = semester['semesterStartDate'] 206 | semester_end_date = semester['semesterEndDate'] 207 | data = { 208 | "startDate": semester_start_date, 209 | "endDate": semester_end_date, 210 | } 211 | else: 212 | data = { 213 | "startDate": arg0.strftime("%Y-%m-%d %H:%M:%S"), 214 | "endDate": arg1.strftime("%Y-%m-%d %H:%M:%S") 215 | } 216 | result = self._call_api('queryChosenCourse', data) 217 | return result 218 | 219 | def chose_course(self, course_id: int): 220 | """ 221 | choose a course 222 | :param course_id: the course id, could be obtained from `query_student_semester_course_by_page` 223 | :return: some useless data if success 224 | :raise AlreadyChosen: if the course has already been chosen 225 | :raise FailedToChoose: if failed to choose the course 226 | """ 227 | result = self._call_api('choseCourse', {'courseId': course_id}) 228 | return result 229 | 230 | def del_chosen_course(self, course_id: int): 231 | """ 232 | delete a chosen course 233 | :param course_id: the course id, could be obtained from `query_chosen_course` 234 | :return: some useless data if success 235 | :raise FailedToDelChosen: if failed to delete the chosen course 236 | """ 237 | result = self._call_api('delChosenCourse', {'id': course_id}) 238 | return result 239 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | import os 4 | 5 | 6 | class Config: 7 | path = 'config.json' 8 | __slots__ = '_username', '_password', '_token' 9 | 10 | def __init__(self): 11 | self._username = '' 12 | self._password = '' 13 | self._token = '' 14 | if not os.path.exists(self.path): 15 | self._save() 16 | print("please fill config.json with username and password") 17 | exit(0) 18 | self._load() 19 | if not self._username or not self._password: 20 | print("please fill config.json with username and password") 21 | exit(0) 22 | 23 | def _save(self): 24 | c = { 25 | 'username': self._username, 26 | 'password': self._password, 27 | 'token': self._token, 28 | } 29 | with open(self.path, 'w') as f: 30 | json.dump(c, f, indent=4) 31 | 32 | def _load(self): 33 | with open(self.path, 'r') as f: 34 | j = json.load(f) 35 | self._username = j['username'] 36 | self._password = j['password'] 37 | self._token = j['token'] 38 | 39 | @property 40 | def username(self): 41 | return self._username 42 | 43 | @username.setter 44 | def username(self, value): 45 | self._username = value 46 | self._save() 47 | 48 | @property 49 | def password(self): 50 | return self._password 51 | 52 | @password.setter 53 | def password(self, value): 54 | self._password = value 55 | self._save() 56 | 57 | @property 58 | def token(self): 59 | return self._token 60 | 61 | @token.setter 62 | def token(self, value): 63 | self._token = value 64 | self._save() 65 | 66 | 67 | config = Config() 68 | 69 | root = "https://bykc.buaa.edu.cn/sscv" 70 | 71 | ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.109 Safari/537.36" 72 | -------------------------------------------------------------------------------- /crypto.py: -------------------------------------------------------------------------------- 1 | import base64 2 | 3 | from cryptography.hazmat.backends import default_backend 4 | from cryptography.hazmat.primitives import serialization, padding, hashes 5 | from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes 6 | from cryptography.hazmat.primitives.asymmetric import padding as asymmetric_padding 7 | import random 8 | 9 | # 这是一个1024bit的RSA公钥,从'app.js'中可以找到 10 | RSA_PUBLIC_KEY = b"MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDlHMQ3B5GsWnCe7Nlo1YiG/YmHdlOiKOST5aRm4iaqYSvhvWmwcigoyWTM+8bv2+sf6nQBRDWTY4KmNV7DBk1eDnTIQo6ENA31k5/tYCLEXgjPbEjCK9spiyB62fCT6cqOhbamJB0lcDJRO6Vo1m3dy+fD0jbxfDVBBNtyltIsDQIDAQAB" 11 | 12 | # 需要首先用base64解码,然后再用der格式来load 13 | public_key = serialization.load_der_public_key(base64.b64decode(RSA_PUBLIC_KEY), backend=default_backend()) 14 | 15 | 16 | def generate_aes_key() -> bytes: 17 | return "".join( 18 | [random.choice('ABCDEFGHJKMNPQRSTWXYZabcdefhijkmnprstwxyz2345678') for _ in range(16)] 19 | ).encode() 20 | 21 | 22 | def aes_encrypt(message: bytes, key: bytes) -> bytes: 23 | """ 24 | aes加密用的ECB模式,padding用的pkcs 25 | 需要注意的是app.js源码中声明iv等于key,但是ECB模式并不需要iv,就像这里modes.ECB()没有加入参数一样 26 | """ 27 | padder = padding.PKCS7(128).padder() 28 | padded_message = padder.update(message) + padder.finalize() 29 | 30 | cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=default_backend()) 31 | encryptor = cipher.encryptor() 32 | 33 | encrypted_message = encryptor.update(padded_message) + encryptor.finalize() 34 | return encrypted_message 35 | 36 | 37 | def aes_decrypt(message: bytes, key: bytes) -> bytes: 38 | cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=default_backend()) 39 | decryptor = cipher.decryptor() 40 | 41 | decrypted_message = decryptor.update(message) + decryptor.finalize() 42 | 43 | unpadder = padding.PKCS7(128).unpadder() 44 | unpadded_message = unpadder.update(decrypted_message) + unpadder.finalize() 45 | return unpadded_message 46 | 47 | 48 | def sign(message: bytes) -> bytes: 49 | digist = hashes.Hash(hashes.SHA1(), backend=default_backend()) 50 | digist.update(message) 51 | return base64.b16encode(digist.finalize()).lower() 52 | 53 | 54 | def rsa_encrypt(message: bytes) -> bytes: 55 | encrypted = public_key.encrypt(message, asymmetric_padding.PKCS1v15()) 56 | return base64.b64encode(encrypted) 57 | -------------------------------------------------------------------------------- /exceptions.py: -------------------------------------------------------------------------------- 1 | class ApiError(Exception): 2 | """ 3 | 调用API时出现错误 4 | """ 5 | pass 6 | 7 | 8 | class LoginError(ApiError): 9 | """ 10 | 当你的登录状态失效时会抛出这个错误 11 | """ 12 | pass 13 | 14 | 15 | class AlreadyChosen(ApiError): 16 | """ 17 | 已报名过该课程,请不要重复报名 18 | """ 19 | 20 | 21 | class FailedToChoose(ApiError): 22 | """ 23 | 选课失败,该课程不可选择 24 | """ 25 | 26 | 27 | class FailedToDelChosen(ApiError): 28 | """ 29 | 退选失败,未找到退选课程或已超过退选时间 30 | """ 31 | -------------------------------------------------------------------------------- /format_tool.py: -------------------------------------------------------------------------------- 1 | import texttable 2 | 3 | 4 | def format_print_course(course_json): 5 | table = texttable.Texttable() 6 | table.set_cols_align(['r', 'c', 'c', 'c']) 7 | table.set_deco(texttable.Texttable.BORDER | texttable.Texttable.HEADER) 8 | title = ['课程id', '课程名', '课程人数', '课程状态'] 9 | rows = [] 10 | for item in course_json['content']: 11 | if item['selected'] is True: 12 | status = "已选择" 13 | else: 14 | status = "未选择" 15 | rows.append([item['id'], 16 | item['courseName'], 17 | f"{item['courseCurrentCount']}/{item['courseMaxCount']}", 18 | status]) 19 | table.add_rows([title, *rows]) 20 | print(table.draw()) 21 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import format_tool 2 | from client import Client 3 | 4 | 5 | def test(): 6 | from config import config 7 | client = Client(config) 8 | client.soft_login() 9 | format_tool.format_print_course(client.query_student_semester_course_by_page(1, 100)) 10 | client.chose_course(1) 11 | client.logout() 12 | 13 | 14 | if __name__ == '__main__': 15 | test() 16 | -------------------------------------------------------------------------------- /patterns.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | token = re.compile("token=(.*)") 4 | execution = re.compile('') 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests~=2.26.0 2 | cryptography~=3.3.1 3 | texttable~=1.6.7 -------------------------------------------------------------------------------- /sso.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # SSO统一认证登录接口 3 | import logging 4 | 5 | import requests 6 | 7 | import patterns 8 | 9 | ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) ' \ 10 | 'Chrome/86.0.4240.75 Safari/537.36' 11 | 12 | 13 | class SsoApi: 14 | 15 | def __init__(self, session: requests.Session, username, password): 16 | self._session = session 17 | self._username = username 18 | self._password = password 19 | self._session.headers['User-Agent'] = ua 20 | self._url = '' 21 | 22 | def _get_execution(self): 23 | resp = self._session.get(self._url) 24 | result = patterns.execution.search(resp.text) 25 | assert result, 'unexpected behavior: execution code not retrieved' 26 | return result.group(1) 27 | 28 | def _get_login_form(self): 29 | return { 30 | 'username': self._username, 31 | 'password': self._password, 32 | 'submit': '登录', 33 | 'type': 'username_password', 34 | 'execution': self._get_execution(), 35 | '_eventId': 'submit', 36 | } 37 | 38 | def login_sso(self, url): 39 | """ 40 | 北航统一认证接口 41 | :param url: 不同网站向sso发送自己的域名,此时sso即了解是那个网站和应该返回何种token 42 | :return: token的返回形式为一个带有ticket的url,一般访问这个url即可在cookies中或者storages中储存凭证 43 | 不同的网站有不同的处理形式 44 | """ 45 | self._url = url 46 | self._session.cookies.clear() 47 | resp = self._session.post('https://sso.buaa.edu.cn/login', data=self._get_login_form(), allow_redirects=False) 48 | assert resp.status_code == 302, 'maybe your username or password is invalid' 49 | location = resp.headers['Location'] 50 | logging.info('location: ' + location) 51 | return location 52 | 53 | 54 | def test(): 55 | from config import config 56 | logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s: %(message)s') 57 | session = requests.session() 58 | sso = SsoApi(session, config.username, config.password) 59 | location = sso.login_sso('http://jwxt.buaa.edu.cn:8080/ieas2.1/welcome?falg=1') 60 | print(location) 61 | 62 | 63 | if __name__ == '__main__': 64 | test() 65 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | import unittest 4 | 5 | from client import Client 6 | from crypto import * 7 | 8 | 9 | class TestCrypto(unittest.TestCase): 10 | 11 | def test_aes_encrypt(self): 12 | aes_key = b"WdpzcfRTJ8PJpmyn" 13 | request = aes_encrypt(b"{}", aes_key) 14 | self.assertEqual(base64.b64encode(request), b'DwQdpyQ5I9u/4O+E8VS2aQ==') 15 | 16 | def test_aes_decrypt(self): 17 | aes_key = b"WdpzcfRTJ8PJpmyn" 18 | raw_response = b"9SZmhpsx93bPTr1TUbya0Io5cjazQBYh+U26Ow7D+2Wxu8asLBHP1bwDL7Z8r0w3Dyc9YF2i8grJi+KQCl/KTA==" 19 | response = aes_decrypt(base64.b64decode(raw_response), aes_key) 20 | resp_obj = json.loads(response) 21 | self.assertEqual(resp_obj['status'], '0') 22 | self.assertEqual(resp_obj['data'], []) 23 | 24 | def test_sha1_sign(self): 25 | _sign = sign(b"{}") 26 | self.assertEqual(_sign, b'bf21a9e8fbc5a3846fb05b4fa0859e0917b2202f') 27 | 28 | 29 | class TestApiCall(unittest.TestCase): 30 | client = None 31 | 32 | @classmethod 33 | def setUpClass(cls) -> None: 34 | from config import config 35 | cls.client = Client(config) 36 | cls.client.login() 37 | 38 | def test_get_user_profile(self): 39 | rtv = self.client.get_user_profile() 40 | self.assertGreater(rtv['id'], 0) 41 | 42 | def test_query_student_semester_course_by_page(self): 43 | rtv = self.client.query_student_semester_course_by_page(1, 10) 44 | self.assertEqual(rtv['size'], 10) 45 | 46 | 47 | if __name__ == '__main__': 48 | unittest.main() 49 | --------------------------------------------------------------------------------