├── .gitignore ├── README.md ├── config.sample.ini ├── main.py └── network ├── client.py ├── const.py ├── epe.py └── iaaa.py /.gitignore: -------------------------------------------------------------------------------- 1 | config.ini 2 | *__pycache__* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 反爬机制 2 | 3 | ### Login 阶段 4 | 5 | - 拿到IAAA token 6 | - 用IAAA token请求体质健康网站ggtypt/dologin,该网站将向cookie写入sso_pku_token 7 | - 向智慧场馆 venue-server/api/login 携带 sso_pku_token 请求,返回json中包含cgAuthorization字段 8 | - 用上一步的 cgAuthorization 加上个人身份,请求智慧场馆 venue-server/roleLogin,返回最终的cgAuthorization,此为查询场地、预订所需的身份证明。 9 | 10 | ### sign 验证 11 | 12 | 智慧场馆使用额外的sign防止爬虫,例如查询场地阶段需要url参数: 13 | 14 | ``` 15 | params = { 16 | venueSiteId: 60 17 | searchDate: 2021-10-01 18 | nocache: 1xxxxxxxxxxxxxx # 时间戳 19 | } 20 | ``` 21 | 22 | 则 23 | 24 | ``` 25 | sign = MD5(S + 当前路径(/api/reservation/day/info)+ params 按字典序排列 + 时间戳 + “ ” + S) 26 | ``` 27 | 28 | 其中有magic number S,**为防止爬虫滥用,本程序中填入了错误的字符串S,会导致401 Unauthorized错误,请填入正确的S后使用** 29 | 30 | ## 使用方法 31 | config.sample.ini中填入个人信息、参数设置,修改文件名为config.ini后,python运行main.py 32 | 33 | ## Known Issues 34 | - 命令行使用SOCKS代理可能会造成问题,HTTP/HTTPS则无问题(不确定原因) 35 | 36 | ## 声明 37 | 38 | 本脚本为喜欢运动、约不上场、且懂点儿技术的同学节省一点儿开发时间,希望各位用户在可控范围内传播,避免爬虫内卷,☆⌒(*^-゜)v THX!! 39 | 40 | 受[PKUAutoElective](https://github.com/zhongxinghong/PKUAutoElective)启发并复用了部分源码。 41 | -------------------------------------------------------------------------------- /config.sample.ini: -------------------------------------------------------------------------------- 1 | [user] 2 | # 学号,统一认证密码,手机号(订场信息需要) 3 | user_id = 1x000xxxxx 4 | passwd = ************** 5 | phoneNumber = ********** 6 | 7 | [target] 8 | date = 2021-10-06 9 | 10 | gym = 60 11 | # 例:qdb羽毛球场=60, 五四羽毛球场=86,其余在智慧场馆url自查 12 | 13 | prefer_site = [10, 11] 14 | # 1号场序号为0,排在前面的会优先刷上 15 | 16 | prefer_time = [2, 13] 17 | # 8:00开始的一场记为0, 以此类推 18 | 19 | interval = 1 20 | # 约从该时刻开始的几小时 21 | 22 | [loop] 23 | ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36" 24 | sleepTime = 2.0 25 | sleepTimeIfNotAvail = 20.0 26 | 27 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from network.iaaa import IAAAClient 2 | from network.epe import EpeClient 3 | import time 4 | import random 5 | import configparser 6 | 7 | config = configparser.ConfigParser() 8 | config.read("config.ini") 9 | 10 | ua = config["loop"]["ua"] 11 | prefer_site = eval(config["target"]["prefer_site"]) 12 | prefer_time = eval(config["target"]["prefer_time"]) 13 | game_interval = int(config["target"]["interval"]) 14 | slt = float(config["loop"]["sleepTime"]) 15 | sltNA = float(config["loop"]["sleepTimeIfNotAvail"]) 16 | 17 | def asciiTime(): 18 | return time.asctime( time.localtime(time.time()) ) 19 | 20 | def searchForAppropSite(r, date, ts, sites, length): 21 | data = r.json()["data"] 22 | timeMap = data["spaceTimeInfo"] 23 | spaceMap = data["reservationDateSpaceInfo"][date] 24 | timeId = [] 25 | avails = [] 26 | for t in ts: 27 | timeId.append(timeMap[t]["id"]) 28 | for tId in timeId: 29 | for s in sites: 30 | ls = spaceMap[s] 31 | if ls[str(tId)]["reservationStatus"] == 1: 32 | flag = True 33 | for i in range(1, length): 34 | if ls[str(tId + i)]["reservationStatus"] != 1: 35 | flag = False 36 | if flag: 37 | tIds = [str(i) for i in range(tId, tId+length)] 38 | avails.append((ls["id"], tIds)) 39 | return avails 40 | 41 | 42 | # Epe login to get cgAuthorization 43 | epe = EpeClient(1, timeout=1000) 44 | epe.set_user_agent(ua) 45 | r = epe.redirectVenue() 46 | 47 | iaaa = IAAAClient(timeout=30) 48 | iaaa.set_user_agent(ua) 49 | r1 = iaaa.oauth_home() 50 | r1 = iaaa.oauth_login(config["user"]["user_id"], config["user"]["passwd"]) 51 | # print("token: ", r1.json()) 52 | try: 53 | if not "token" in r1.json(): 54 | raise Exception("get token error") 55 | token = r1.json()["token"] 56 | except Exception as e: 57 | print(r1) 58 | 59 | r2 = epe.get_ticket(token) 60 | sso_pku_token = epe._session.cookies.get_dict()["sso_pku_token"] 61 | # print("sso pku token ", sso_pku_token) 62 | r3 = epe.beforeRoleLogin(sso_pku_token) 63 | access_token = r3.json()["data"]["token"]["access_token"] 64 | 65 | r = epe.roleLogin(access_token) 66 | 67 | cgAuth = r.json()["data"]["token"]["access_token"] 68 | 69 | 70 | ## start looking for available fields 71 | while True: 72 | r = epe.infoLookup(cgAuth, (config["target"]["gym"], config["target"]["date"])) 73 | try: 74 | if r.json()["message"] != "OK": 75 | raise Exception(r.text) 76 | except Exception as e: 77 | print(asciiTime(), ": ", e) 78 | time.sleep(slt + random.random()) 79 | continue 80 | avails = searchForAppropSite(r, config["target"]["date"], prefer_time, prefer_site, game_interval) 81 | if len(avails) == 0: 82 | print(asciiTime(), " ", "所选时段暂无可用场地") 83 | time.sleep(sltNA + random.random()) 84 | continue 85 | 86 | for x in avails: 87 | try: 88 | orderLs = [] 89 | spaceId = str(x[0]) 90 | for tId in x[1]: 91 | orderLs.append( 92 | { 93 | "spaceId": spaceId, 94 | "timeId": tId, 95 | "venueSpaceGroupId":None 96 | } 97 | ) 98 | r2 = epe.makeOrder(cgAuth, [config["target"]["gym"], config["target"]["date"]], orderLs) 99 | if r2.json()["message"] != "OK": 100 | raise Exception(r2.text) 101 | time.sleep(0.5) 102 | continue 103 | 104 | r3 = epe.submit(cgAuth, [config["target"]["gym"], config["target"]["date"]], orderLs, config["user"]["phonenumber"]) 105 | if r3.json()["message"] != "OK": 106 | raise Exception(r3.text) 107 | time.sleep(0.5) 108 | continue 109 | print("预订成功!记得去智慧场馆付款\n", r3.text) 110 | break 111 | except Exception as e: 112 | print(asciiTime(), " ", e) 113 | continue 114 | break 115 | 116 | 117 | epe.logout() 118 | -------------------------------------------------------------------------------- /network/client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: client.py 4 | # modified: 2019-09-09 5 | 6 | from requests.models import Request 7 | from requests.sessions import Session 8 | from requests.cookies import extract_cookies_to_jar 9 | 10 | class BaseClient(object): 11 | 12 | default_headers = {} 13 | default_client_timeout = 10 14 | 15 | def __init__(self, *args, **kwargs): 16 | if self.__class__ is __class__: 17 | raise NotImplementedError 18 | self._timeout = kwargs.get("timeout", self.__class__.default_client_timeout) 19 | self._session = Session() 20 | self._session.headers.update(self.__class__.default_headers) 21 | 22 | @property 23 | def user_agent(self): 24 | return self._session.headers.get('User-Agent') 25 | 26 | def _request(self, method, url, 27 | params=None, data=None, headers=None, cookies=None, files=None, 28 | auth=None, timeout=None, allow_redirects=True, proxies=None, 29 | hooks=None, stream=None, verify=None, cert=None, json=None): 30 | 31 | # Extended from requests/sessions.py for '_client' kwargs 32 | 33 | req = Request( 34 | method=method.upper(), 35 | url=url, 36 | headers=headers, 37 | files=files, 38 | data=data or {}, 39 | json=json, 40 | params=params or {}, 41 | auth=auth, 42 | cookies=cookies, 43 | hooks=hooks, 44 | ) 45 | prep = self._session.prepare_request(req) 46 | prep._client = self # hold the reference to client 47 | 48 | 49 | proxies = proxies or {} 50 | 51 | settings = self._session.merge_environment_settings( 52 | prep.url, proxies, stream, verify, cert 53 | ) 54 | 55 | # Send the request. 56 | send_kwargs = { 57 | 'timeout': timeout or self._timeout, # set default timeout 58 | 'allow_redirects': allow_redirects, 59 | } 60 | send_kwargs.update(settings) 61 | resp = self._session.send(prep, **send_kwargs) 62 | 63 | return resp 64 | 65 | def _get(self, url, params=None, **kwargs): 66 | return self._request('GET', url, params=params, **kwargs) 67 | 68 | def _post(self, url, data=None, json=None, **kwargs): 69 | return self._request('POST', url, data=data, json=json, **kwargs) 70 | 71 | def set_user_agent(self, user_agent): 72 | self._session.headers["User-Agent"] = user_agent 73 | 74 | def persist_cookies(self, r): 75 | """ 76 | From requests/sessions.py, Session.send() 77 | 78 | Session.send() 方法会首先 dispatch_hook 然后再 extract_cookies_to_jar 79 | 80 | 在该项目中,对于返回信息异常的请求,在 hooks 校验时会将错误抛出,send() 之后的处理将不会执行。 81 | 遇到的错误往往是 SystemException / TipsException ,而这些客户端认为是错误的情况, 82 | 对于服务器端来说并不是错误请求,服务器端在该次请求结束后可能会要求 Set-Cookies 83 | 但是由于 send() 在 dispatch_hook 时遇到错误而中止,导致后面的 extract_cookies_to_jar 84 | 未能调用,因此 Cookies 并未更新。下一次再请求服务器的时候,就会遇到会话过期的情况。 85 | 86 | 在这种情况下,需要在捕获错误后手动更新 cookies 以确保能够保持会话 87 | 88 | """ 89 | if r.history: 90 | 91 | # If the hooks create history then we want those cookies too 92 | for resp in r.history: 93 | extract_cookies_to_jar(self._session.cookies, resp.request, resp.raw) 94 | 95 | extract_cookies_to_jar(self._session.cookies, r.request, r.raw) 96 | 97 | def clear_cookies(self): 98 | self._session.cookies.clear() 99 | -------------------------------------------------------------------------------- /network/const.py: -------------------------------------------------------------------------------- 1 | class IAAAURL(object): 2 | Host = "iaaa.pku.edu.cn" 3 | OauthHomePage = "https://iaaa.pku.edu.cn/iaaa/oauth.jsp" 4 | OauthLogin = "https://iaaa.pku.edu.cn/iaaa/oauthlogin.do" 5 | 6 | 7 | class EpeURL(object): 8 | Scheme = "https" 9 | Host = "epe.pku.edu.cn" 10 | HomePage = "https://epe.pku.edu.cn" 11 | ggyptLogin = "https://epe.pku.edu.cn/ggtypt/login" 12 | SSOLoginRedirect = "https://epe.pku.edu.cn/ggtypt/dologin" 13 | SSOLogin = "https://epe.pku.edu.cn/ggtypt/dologin" 14 | venueLogin = "https://epe.pku.edu.cn/venue-server/loginto" 15 | beforeRoleLogin = "https://epe.pku.edu.cn/venue-server/api/login" 16 | RoleLogin = "https://epe.pku.edu.cn/venue-server/roleLogin" 17 | # qdbField = "https://epe.pku.edu.cn/venue/pku/venue-reservation/60" 18 | Logout = "https://epe.pku.edu.cn/ggtypt/logout" 19 | ReservationInfo = "https://epe.pku.edu.cn/venue-server/api/reservation/day/info" 20 | orderInfo = "https://epe.pku.edu.cn/venue-server/api/reservation/order/info" 21 | oderSubmit = "https://epe.pku.edu.cn/venue-server/api/reservation/order/submit" 22 | appKey = "8fceb735082b5a529312040b58ea780b" 23 | S = "c640ca392cd45fb3a55b02143a86c618" 24 | -------------------------------------------------------------------------------- /network/epe.py: -------------------------------------------------------------------------------- 1 | import time 2 | import json 3 | import string 4 | import random 5 | import hashlib 6 | from .client import BaseClient 7 | from .const import IAAAURL, EpeURL 8 | 9 | 10 | def calcSign(path, timestamp, params): 11 | S = EpeURL.S 12 | result = S + path 13 | for key in sorted(params): 14 | result = result + key 15 | result = result + params[key] 16 | result += timestamp + " " + S 17 | return result 18 | 19 | 20 | class EpeClient(BaseClient): 21 | 22 | def __init__(self, id, **kwargs): 23 | super().__init__(**kwargs) 24 | self._id = id 25 | self._expired_time = -1 26 | 27 | @property 28 | def id(self): 29 | return self._id 30 | 31 | @property 32 | def expired_time(self): 33 | return self._expired_time 34 | 35 | @property 36 | def is_expired(self): 37 | if self._expired_time == -1: 38 | return False 39 | return int(time.time()) > self._expired_time 40 | 41 | @property 42 | def has_logined(self): 43 | return len(self._session.cookies) > 0 44 | 45 | def set_expired_time(self, expired_time): 46 | self._expired_time = expired_time 47 | 48 | 49 | def redirectVenue(self, **kwargs): 50 | headers = kwargs.pop("headers", {}) 51 | r = self._get( 52 | url=EpeURL.ggyptLogin, 53 | params={ 54 | "service": EpeURL.venueLogin, 55 | }, 56 | headers=headers, 57 | # hooks=_hooks_check_title, 58 | **kwargs, 59 | ) 60 | return r 61 | 62 | def beforeRoleLogin(self, sso_token, **kwargs): 63 | headers = kwargs.pop("headers", {}) 64 | timestamp = str(int(time.time())) 65 | headers["sso-token"] = sso_token 66 | sign = calcSign("/api/login", timestamp, {}) 67 | # print(sign) 68 | headers["sign"] = hashlib.md5(sign.encode()).hexdigest() 69 | headers["timestamp"] = timestamp 70 | headers["app-key"] = EpeURL.appKey 71 | 72 | r = self._post( 73 | url=EpeURL.beforeRoleLogin, 74 | headers=headers, 75 | # hooks=_hooks_check_title, 76 | **kwargs, 77 | ) 78 | return r 79 | 80 | def roleLogin(self, acc_token, **kwargs): 81 | headers = kwargs.pop("headers", {}) 82 | timestamp = str(int(time.time())) 83 | headers["cgAuthorization"] = acc_token 84 | params = { 85 | "roleid": "3", 86 | } 87 | sign = calcSign("/roleLogin", timestamp, params) 88 | # print(sign) 89 | headers["sign"] = hashlib.md5(sign.encode()).hexdigest() 90 | headers["timestamp"] = timestamp 91 | headers["app-key"] = EpeURL.appKey 92 | 93 | r = self._post( 94 | url=EpeURL.RoleLogin, 95 | headers=headers, 96 | params = params, 97 | # hooks=_hooks_check_title, 98 | **kwargs, 99 | ) 100 | return r 101 | 102 | def infoLookup(self, cgAuth, info, **kwargs): 103 | headers = kwargs.pop("headers", {}) 104 | timestamp = str(int(time.time())) 105 | headers["cgAuthorization"] = cgAuth 106 | params = { 107 | "venueSiteId": info[0], 108 | "searchDate": info[1], 109 | "nocache":timestamp 110 | } 111 | sign = calcSign("/api/reservation/day/info", timestamp, params) 112 | # print(sign) 113 | headers["sign"] = hashlib.md5(sign.encode()).hexdigest() 114 | headers["timestamp"] = timestamp 115 | headers["app-key"] = EpeURL.appKey 116 | 117 | r = self._get( 118 | url=EpeURL.ReservationInfo, 119 | headers=headers, 120 | params = params, 121 | # hooks=_hooks_check_title, 122 | **kwargs, 123 | ) 124 | return r 125 | 126 | def makeOrder(self, cgAuth, info, orderLs, **kwargs): 127 | headers = kwargs.pop("headers", {}) 128 | timestamp = str(int(time.time())) 129 | headers["cgAuthorization"] = cgAuth 130 | params = { 131 | "venueSiteId": info[0], 132 | "reservationDate": info[1], 133 | "weekStartDate": info[1], 134 | "reservationOrderJson": json.dumps(orderLs) 135 | } 136 | sign = calcSign("/api/reservation/order/info", timestamp, params) 137 | # print(sign) 138 | headers["sign"] = hashlib.md5(sign.encode()).hexdigest() 139 | headers["timestamp"] = timestamp 140 | headers["app-key"] = EpeURL.appKey 141 | 142 | r = self._post( 143 | url=EpeURL.orderInfo, 144 | headers=headers, 145 | params = params, 146 | # hooks=_hooks_check_title, 147 | **kwargs, 148 | ) 149 | return r 150 | 151 | def submit(self, cgAuth, info, orderLs, phone, **kwargs): 152 | headers = kwargs.pop("headers", {}) 153 | timestamp = str(int(time.time())) 154 | headers["cgAuthorization"] = cgAuth 155 | params = { 156 | "venueSiteId": info[0], 157 | "reservationDate": info[1], 158 | "weekStartDate": info[1], 159 | "reservationOrderJson": json.dumps( 160 | orderLs 161 | ), 162 | "phone": phone, 163 | "isOfflineTicket": "1", 164 | } 165 | sign = calcSign("/api/reservation/order/submit", timestamp, params) 166 | # print(sign) 167 | headers["sign"] = hashlib.md5(sign.encode()).hexdigest() 168 | headers["timestamp"] = timestamp 169 | headers["app-key"] = EpeURL.appKey 170 | 171 | r = self._post( 172 | url=EpeURL.oderSubmit, 173 | headers=headers, 174 | params = params, 175 | # hooks=_hooks_check_title, 176 | **kwargs, 177 | ) 178 | return r 179 | 180 | 181 | def logout(self, **kwargs): 182 | headers = kwargs.pop("headers", {}) 183 | # headers = _get_headers_with_referer(kwargs) 184 | r = self._get( 185 | url=EpeURL.Logout, 186 | params={ 187 | "service": "https://epe.pku.edu.cn/venue", 188 | }, 189 | headers=headers, 190 | # hooks=_hooks_check_title, 191 | **kwargs, 192 | ) 193 | # print("actual header", r.headers) 194 | return r 195 | 196 | def get_ticket(self, token, **kwargs): 197 | headers = kwargs.pop("headers", {}) 198 | r = self._get( 199 | url = EpeURL.SSOLogin, 200 | params = { 201 | "_rand": str(random.random()), 202 | "token": token, 203 | }, 204 | headers = headers, 205 | **kwargs 206 | ) 207 | # print("ticket header", headers) 208 | return r -------------------------------------------------------------------------------- /network/iaaa.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: iaaa.py 4 | # modified: 2019-09-10 5 | 6 | from urllib.parse import quote 7 | from .client import BaseClient 8 | # from .hook import get_hooks, debug_print_request, check_status_code, check_iaaa_success 9 | from .const import IAAAURL, EpeURL 10 | 11 | # _hooks_check_status_code = get_hooks( 12 | # debug_print_request, 13 | # check_status_code, 14 | # ) 15 | 16 | # _hooks_check_iaaa_success = get_hooks( 17 | # debug_print_request, 18 | # check_status_code, 19 | # check_iaaa_success, 20 | # ) 21 | 22 | 23 | class IAAAClient(BaseClient): 24 | 25 | # default_headers = { 26 | # "Accept": "application/json, text/javascript, */*; q=0.01", 27 | # "Accept-Encoding": "gzip, deflate, br", 28 | # "Accept-Language": "en-US,en;q=0.9", 29 | # "Host": IAAAURL.Host, 30 | # "Origin": "https://%s" % IAAAURL.Host, 31 | # "Connection": "keep-alive", 32 | # } 33 | 34 | def oauth_home(self, **kwargs): 35 | headers = kwargs.pop("headers", {}) 36 | headers["Referer"] = EpeURL.HomePage 37 | headers["Upgrade-Insecure-Requests"] = "1" 38 | 39 | r = self._post( 40 | url=IAAAURL.OauthHomePage, 41 | params={ 42 | "appID": "ty", 43 | "appName": "北京大学体测系统", 44 | "redirectUrl": EpeURL.SSOLoginRedirect, 45 | "redirectLogonUrl": EpeURL.SSOLoginRedirect, 46 | }, 47 | headers=headers, 48 | # hooks=_hooks_check_status_code, 49 | **kwargs, 50 | ) 51 | return r 52 | 53 | def oauth_login(self, username, password, **kwargs): 54 | headers = kwargs.pop("headers", {}) 55 | headers["Referer"] = IAAAURL.OauthHomePage 56 | # headers["Referer"] = "%s?appID=syllabus&appName=%s&redirectUrl=%s" % ( 57 | # IAAAURL.OauthHomePage, quote("学生选课系统"), ElectiveURL.SSOLoginRedirect) 58 | headers["X-Requested-With"] = "XMLHttpRequest" 59 | r = self._post( 60 | url=IAAAURL.OauthLogin, 61 | data={ 62 | "appid": "ty", 63 | "userName": username, 64 | "password": password, 65 | "randCode": "", 66 | "smsCode": "", 67 | "otpCode": "", 68 | "redirUrl": EpeURL.SSOLoginRedirect, 69 | }, 70 | headers=headers, 71 | # hooks=_hooks_check_iaaa_success, 72 | **kwargs, 73 | ) 74 | # print(headers) 75 | return r 76 | --------------------------------------------------------------------------------